diff --git a/.gitattributes b/.gitattributes index 9f2732947f..b848ea562b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,7 +1,7 @@ # Tell git what files are txt *.py text diff=python *.pyj text diff=python -*.recipe text diff=python +*.recipe text diff=python linguist-language=python *.coffee text *.js text *.pot text @@ -46,6 +46,7 @@ resources/coffee-script.js linguist-vendored=true resources/csscolorparser.js linguist-vendored=true resources/viewer/hyphen* linguist-vendored=true resources/viewer/jquery* linguist-vendored=true +resources/stylelint-bundle.min.js linguist-vendored=true src/hunspell linguist-vendored=true # Mark generated files diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93d8705156..7a9be1189b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-2019] + # windows is disabled because webenging is crashing when compiling rapydscript in CI, try re-enabling after next Qt update + os: [ubuntu-latest, macos-latest] steps: - name: Checkout source code uses: actions/checkout@v3 @@ -67,4 +68,5 @@ jobs: run: | set -xe runuser -u ci -- python setup.py test --under-sanitize + echo "Running test_rs" runuser -u ci -- python setup.py test_rs diff --git a/.gitignore b/.gitignore index 4e35b4d848..04ed5e5a4b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,57 +6,52 @@ .bzrignore .build-cache .cache -compile_commands.json -link_commands.json -src/calibre/plugins -resources/images.qrc -resources/icons.rcc -manual/generated -manual/locale -manual/.doctrees -build -dist -docs -resources/localization -resources/hyphenation -resources/scripts.calibre_msgpack -resources/changelog.json -resources/ebook-convert-complete.calibre_msgpack -resources/builtin_recipes.xml -resources/builtin_recipes.zip -resources/template-functions.json -resources/editor-functions.json -resources/user-manual-translation-stats.json -resources/editor.js -resources/viewer.js -resources/viewer.html -resources/content-server/index-generated.html -resources/content-server/locales.zip -resources/mathjax -resources/fonts/liberation -resources/mozilla-ca-certs.pem -resources/user-agent-data.json -icons/icns/*.iconset -setup/installer/windows/calibre/build.log -setup/pyqt_enums -tags -nbproject/ -translations/ +/src/calibre/plugins +/resources/images.qrc +/resources/icons.rcc +/manual/generated +/manual/locale +/manual/.doctrees +/build +/dist +/docs +/resources/localization +/resources/hyphenation +/resources/scripts.calibre_msgpack +/resources/changelog.json +/resources/ebook-convert-complete.calibre_msgpack +/resources/builtin_recipes.xml +/resources/builtin_recipes.zip +/resources/template-functions.json +/resources/editor-functions.json +/resources/user-manual-translation-stats.json +/resources/editor.js +/resources/viewer.js +/resources/viewer.html +/resources/content-server/index-generated.html +/resources/content-server/locales.zip +/resources/mathjax +/resources/fonts/liberation +/resources/mozilla-ca-certs.pem +/resources/user-agent-data.json +/icons/icns/*.iconset +/setup/installer/windows/calibre/build.log +/setup/pyqt_enums +/tags +/nbproject/ +/translations/ *.mdproj *.pidb *.sln *.userprefs -.project -.pydevproject -.settings/ +/.project +/.pydevproject +/.settings/ *.DS_Store -calibre_plugins/ -recipes/*.mobi -recipes/*.epub -recipes/debug +/calibre_plugins/ /.metadata/ -.idea +/.idea /*env*/ -cmake-build-* -bypy/b -bypy/virtual-machines.conf +/cmake-build-* +/bypy/b +/bypy/virtual-machines.conf diff --git a/=template.py b/=template.py deleted file mode 100644 index 1d4fcfab6c..0000000000 --- a/=template.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPL v3 Copyright: %YEAR%, %USER% <%MAIL%> - -%HERE% diff --git a/COPYRIGHT b/COPYRIGHT index beea784254..b454998864 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,5 +1,5 @@ Files: * -Copyright: Copyright (C) 2008-2022 Kovid Goyal +Copyright: Copyright (C) 2008-2024 Kovid Goyal License: GPL-3 The full text of the GPL is distributed as in /usr/share/common-licenses/GPL-3 on Debian systems. diff --git a/Changelog.old.txt b/Changelog.old.txt index 9359642843..f266957839 100644 --- a/Changelog.old.txt +++ b/Changelog.old.txt @@ -1,4 +1,1852 @@ +{{{ 5.43.0 2022-05-27 + +:: new features + +- Kobo driver: Allow using templates to generate collections + +- [1975406] Book details popup: Double clicking on the cover now uses calibre's internal image viewer. Right click on the cover to open it with another program. + +:: bug fixes + +- [1971461] Fix Book details blank when switching from device view to library view + +- [1973591] TXT Input: Fix a regression in 5.39 that caused the option to remove indents also removing blank lines + +- [1972069] E-book viewer: Fix incorrect sorting of highlights from the first internal file of a book + +:: improved recipes +- Outlook Magazine +- India Today +- The New Yorker +- Foreign Affairs + +:: new recipes +- Asahi Shimbun by Albert Aparicio Isarn +- Business Today Magazine by unkn0wn +- Outlook Business Magazine by unkn0wn +- Donga by Minsik Cho +- Le Monde (English) by Darko Miletic +- Hinduism Today by Vishwas Vasuki +- The MIT Press Reader by yodha8 +- Live Science by yodha8 +- Financial Times Print Edition by Kovid Goyal +- Various Catalan language news sources by santboia + +}}} + +{{{ 5.42.0 2022-05-03 + +:: new features + +- E-book viewer: Ignore accents when doing a search + +- [1969926] Book list searching: Ignore punctuation when searching. So that, for example, Gravitys will match Gravity's + +- [1970045] Show the text used for marking books in the tooltip + +:: bug fixes + +- [1971150] Edit book: Reduce memory consumption by the checkpoint system when doing operations that involve parsing all book files + +- [1971015] Amazon metadata download: Fix titles starting with [ being ignored + +- [1970497] Edit metadata dialog: Undo not working correctly in identifiers field + +- [1970391] Fix viewing LRF files not working + +- [1969981] PDF Output: Fix an error on some invalid CSS in the input document + +- Linux binary: Workaround for Qt WebEngine not working on systems with glibc > 2.33 + +:: improved recipes +- New Yorker +- OMG! Ubuntu! +- ACM Queue +- CACM +- Science News +- Quanta Magazine +- Outlook Magazine +- Indian Express + +:: new recipes +- Caravan Magazine (Hindi) by Areet Mahadevan +- LWN (Free) by yodha8 +- IEEE Spectrum Magazine by yodha8 +- Financial Times by Kovid Goyal +- Cosmos Magazine by yodha8 +}}} + +{{{ 5.41.0 2022-04-22 + +:: new features + +- [1968810] Allow creating multiple types of temporary marks (pins) by right clicking the mark books button (which can be added to the calibre toolbar via Preferences->Toolbars & menus) + +- Kobo driver: Support updated firmware + +- [1967149] Show a popup message when a Kindle is connected mentioning the Amazon cover bug and how to workaround it + +- Edit book: Table of Contents tool: Allow using the title attribute on headings tags to get the text for table of contents entries + +- When creating a custom column to display real (floating point) numbers allow specifying the number of decimal digits when editing values + +- Amazon metadata download: Add support for amazon.in country website + +:: bug fixes + +- [1969302] Edit book: Fix AltGr+{ not working on some keyboard layouts + +- [1967828] TXT Input: Fix rare failure to convert some large TXT files with non-ASCII text + +- Get books: Update English language Amazon plugins for website changes + +:: improved recipes +- The Economic Times India +- Business Standard +- scmp.com +- Wired Magazine Monthly Edition +- Reason Magazine +- The Skeptical Inquirer +- Times of India +- LiveMint +- The Week +- Indian Express +- Hindustan Times + +:: new recipes +- Eenadu by unkn0wn +- Harvard Business Review by unkn0wn +- Hindustan by unkn0wn +- Dainik Bhaskar by unkn0wn +- Free Inquiry by Howard Cornett +- Sportstar by unkn0wn +- Digit Magazine by unkn0wn +- The Diplomat by unkn0wn +}}} + +{{{ 5.40.0 2022-04-01 + +:: new features + +- [1966872] Content server viewer: Allow editing bookmarks + +- [1967028] Read covers from CBC comic files + +- [1966537] Allow filtering authors/tags when creating virtual library based on them + +- [1966851] Add a copy button to the image view popup + +- Template language: Support for nested functions and a string concatenation operator + +:: bug fixes + +- DOCX Output: Fix a comment immediately after a
  • tag breaking the conversion + +- Standalone ToC editor: Fix spurious error message if left open for more than two minutes + +- [1965693] Fix search-as-you-type triggers extra search after manual confirmation + +:: improved recipes +- Courrier International + +:: new recipes +- Reason Magazine by Howard Cornett +- Seminar Magazine by unkn0wn +- Frontline by unkn0wn +}}} + +{{{ 5.39.1 2022-03-18 + +:: new features + +- [1963875] E-book viewer: Allow scrolling of the ToC, highlights, bookmarks, etc. with touch gestures + +- [1963822] Edit metadata dialog: When using the change case operations if some text is selected, only operate on the selected text + +- [1964123] Use atomic writes for the config files ensures no partial data is written in case of crash/powerloss + +:: bug fixes + +- 5.39.1 fixes a couple of regressions that broke case change in the Bulk metadata edit dialog and remembering column widths in the book list on some systems. + +- [1964742] Content server: Fix reading of books with thousands of internal files not working in the Chrome browser + +- [1965182] Catalog generation: Fix a rare crash when generating very large catalogs + +- Edit/Polish book: Fix hardcoded Unicode ligatures not being preserved in AZW3 format books + +- [1963868] Fix automatic searches causing search box to lose focus when search as you type is enabled in Preferences->Searching + +- [1963748] Edit book: Check book: Auto fix package identifier being empty + +- [1963856] Amazon metadata download: Fix getting series info from amazon.jp + +- Edit book: Insert hyperlinks: When sorting anchors on elements without any text content, use the anchor itself + +- Edit book: Make the saved search panel freely resizable + +- Edit book: When dragging to select a region or adjust the selection fix mouse moving outside the image causing the region to no longer be adjusted + +:: improved recipes +- India Legal Magazine +- The Smithsonian +- The Federalist + +:: new recipes +- Swarajya Magazine by unkn0wn +- Open Magazine by unkn0wn +}}} + +{{{ 5.38.0 2022-03-04 + +:: new features + +- [1852929] E-book viewer: When displaying estimated time to completion for reading a book, remember the reading rate the next time the book is opened + +- [1961500] Dark theme: Highlight the current cell in the book list with a lighter background and different foreground to make it more obvious + +- [1961639] An option to disable editing composite columns in the main book list when Tabbing through them (Preferences->Look & feel->Edit metadata) + +:: bug fixes + +- Tag editor: Fix regression in previous release that caused double clicking on tags to not work on non Linux platforms + +- [1962365] Copy to library: Fix annotations not being copied + +- [1962213] Edit book: Spell check: Fix words after a comment not being checked + +- [1960554] PDF Output: Fix conversion failing if there are ToC entries pointing to removed content + +- [1961775] E-book viewer: Fix an error when opening books with MathML for the second time if the last read position was at a MathML element + +- Edit book: Fix double clicking to select a word also selecting smart quotes surrounding the word + +- EPUB 3 metadata: Fix non-integer series index being sometimes represented using exponential notation + +:: improved recipes +- Lenta.ru and aif.ru +- Indian Express +- Live Mint +- Mainichi +- Japan Times + +:: new recipes +- Hindustan Times by unkn0wn +- India Legal Magazine by unkn0wn +- RT на русском by Vuizur +}}} + +{{{ 5.37.0 2022-02-18 + +:: new features + +- [1961129] Book details: Add actions to trim the cover to the right-click menu + +- [1960586] Allow removing multiple email addresses at once in Preferences->Sharing by email + +- Book details: Use a better mono-spaced font on Windows by default + +- Add a tweak in Preferences->Tweaks to change the behavior of the Tab key when completing entries + +- [1959928] Edit metadata: In "All in one" mode add an adjustable splitter between the cover and formats boxes + +:: bug fixes + +- [1960686] Textile output: Don't fail if input document has invalid padding or margin specifications + +- [1960446] E-book viewer: Fix image display window not remembering its size and settings when run from within calibre + +- E-book viewer: Fix setting to use roman numerals for series not being respected + +- Edit book: When saving a copy do not fail if the original file has no write permissions + +- [1960180] Embed fonts tool: Create when missing + +- Tag editor: Improve performance when very large number of tags present + +:: improved recipes +- Live Mint +- The Hindu +- Reuters +- MMC RTV Slovenija +- Down To Earth +- Publico.PT +}}} + +{{{ 5.36.0 2022-02-04 + +:: new features + +- Edit metadata dialog: Allow controlling which custom columns are present in this dialog via Preferences->Look & feel->Edit metadata + +- Edit metadata dialog: Allow manually sizing the various sections of the dialog in "All on 1 tab" mode + +- Edit book: Spell checking: Update the bundled English and Spanish dictionaries + +- [1958773] BibTeX catalogs: Support tags like custom columns + +:: bug fixes + +- [1959659] Amazon metadata download: Fix paragraphs in the comments being merged + +- [1958979] Amazon.de metadata download: Fix published date and series information not being fetched for some books + +- Email delivery: Fix sending email via Hotmail not working since this week because Microsoft changed the SMTP server name + +- [1959220] Do not remove articles for titles in the Polish language + +- [1959207] E-book viewer: When using Read aloud do not automatically lookup the highlighted word until read aloud is paused or stopped + +- E-book viewer: Fix Ctrl+P shortcut for printing not working + +- [1958882] Show an error when viewing a specific format and the file is missing + +- Edit book: Fix renaming of classes that start/end with non word characters not working + +- [1958730] Edit book: Preview panel: Fix hyphenation at end of line being rendered as boxes on macOS + +- [1959893] Fix incorrect selection size displayed in Trim image dialog when image is scaled down to fit + +- [1959782] Edit book: Fix pasting files from another editor instance failing if a file with the same name already exists + +- [1959981] When reviewing metadata if the newly downloaded metadata has no language but there is an existing language, ensure it is preserved + +:: improved recipes +- India Today +- Indian Express +- Live Mint +- Al Jazeera in English +- The Financial Express +- The Straits Times + +:: new recipes +- title by author +}}} + +{{{ 5.35.0 2022-01-21 + +:: new features + +- [1956006] Coloring/icon rules: Allow creating a rule for date columns that matches *today* + +- Kobo driver: Add support for new firmware + +- [1954890] Content server: Show total number of results when searching for books + +:: bug fixes + +- [1958028] E-book viewer: Fix searching for text near the end of a chapter sometimes not working + +- [1954714] E-book viewer: Fix auto hyphenation on macOS not rendering the hyphens correctly + +- Edit book: Reports: Fix thumbnails of SVG images not rendered + +- ODT metadata: Support reading tags from multiple elements + +- [1958115] LRF Input: Fix a regression in calibre 5 that broke parsing of some LRF files + +- [1956097] MOBI output: Don't fail if input document contains invalid % based lengths + +- [1955308] AZW3 Input: Handle AZW3 files with incorrect TAGX Offset INDX header fields + +- [1956932] Comic conversion: Fix conversion of comic images that are stored as grayscale images in JPEG format not working when converting to PDF with image processing turned off + +- [1955967] calibredb catalog: Fix --ids and --search options not working for CSV/XML catalogs + +- [1958490] Tag browser: Fix the find box not using all available width + +- [1956192] E-book viewer: Remove books that do not exist from the recently opened book list + +- Completion popups: Fix display of items containing line breaks + +- [1956129] Fix line breaks in custom column descriptions not being rendered in their tooltips + +- [1956088] Fix Preferences->Searching->Clear search histories not taking effect till a restart for some search boxes + +- [1955732] Hierarchical entries in user category may not merge correctly in tag browser + +:: improved recipes +- Foreign Affairs +- MIT Technology Review +- Reuters +- Clarin +- General Knowledge Today +- Popular Science + +:: new recipes +- Dw.de by xav +- Equestria Daily by Timothee Andres +}}} + +{{{ 5.34.0 2021-12-17 + +:: new features + +- Happy holidays to everyone! + +- Driver for the new Nook Glowlight 4 + +- Edit book: Spell check tool: Add an exclude files button to exclude some files from being checked + +- EPUB/MOBI Catalogs: Increase the maximum thumbnail size to 3 inches from 2 inches + +- [1953739] Allow creating a shortcut in Preferences->Shortcuts->Edit metadata to paste metadata ignoring the value of the exclude_fields tweak + +- [1954715] E-book viewer: Displays links marked up as glossary and bibliography links as popups + +- [1954572] Add a tweak in Preferences->Tweaks to provide the sort value for undefined numbers + +:: bug fixes + +- Edit book: Fix pressing F8 to jump to next misspelled word not working after last word in current file + +- [1954889] Fix PDB E-reader output broken in calibre 5 + +- [1954839] Edit book: Reports: Include descendant selectors that use classes when counting class usage + +- [1954726] E-book viewer: Fix an error when opening some books with highlights that span in-line text formatting + +- [1954460] MTP driver: Do not send the calibre device db files to the root folder on the Supernote A5 x as it fails + +- ToC Editor: Workaround an occasional error when closing on Windows if the file being edited is in a DropBox/antivirus prone folder + +- Fix a regression in the previous release that broke creating new keyboard shortcuts + +- Comments editor: When flowing the tool bar onto multiple lines do not split up groups of buttons + +- Various compatibility fixes for Python 3.10 used by some Linux distributions + +:: improved recipes +- Pocket +- El Pais +- American Prospect +- Mediapart + +}}} + +{{{ 5.33.2 2021-12-03 + +:: new features + +- Allow changing the icon used for calibre libraries. Right click the library icon in calibre and choose "Change the icon for this library" + +- Comments editor: Use a single line for all three toolbars if they fit + +- Edit book: Allow merging HTML files by drag and drop of the files onto another HTML file + +- Kobo driver: Add support for listing purchased audiobooks + +- Edit metadata all-in-one mode: The cover and formats column now gives as much vertical space as possible to the cover image + +- [1952562] Add books dialog: When a non-book file type is added the next time the dialog is used, preselect the "All files" filter + +- [1952764] calibre-server --manage-users: Allow managing users while the server is running and also add actions to automate changing user account restrictions + +- [1950762] EPUB 3 metadata: If the book contains a "subtitle" append it to the main title when reading metadata + +:: bug fixes + +- [1950673] E-book viewer: Fix an occasional hang on startup at "Loading section" + +- [1952142] Get books: Update the Kobo plugin for website changes + +- [1951673] Bulk edit metadata dialog: Fix changing the search mode resetting other fields + +- [1951507] E-book viewer: Fix sorting of highlights incorrect in books that use HTML ids with a hyphen in them + +- [1951467] PDF Output: Fix the option to break long words at the ends of lines causing boxes to be rendered at the end of the line on macOS with some fonts + +- Google metadata plugin: When searching by ISBN if no results are found retry using an alternate query syntax + +- 5.33.2 fixes a couple of regressions that broke the toolbar in the popup comments editor dialog and rendering of the download + metadata button in the edit metadata dialog on Windows, as well as reading files from MTP devices on Windows + +:: improved recipes +- Smithsonian Magazine + +:: new recipes +- The Epoch Times by Kovid Goyal +- Mens Day Out by Vishwas Vasuki +}}} + +{{{ 5.32.0 2021-11-12 + +:: new features + +- [major] Edit book: Add a tool to transform HTML tags based on rules (Tools->Transform HTML) + + Allows for making transformations such as changing one html tag to another, deleting tags, wrapping + them in another tag, etc. Also available during conversions via the Look & feel->Transform HTML + section of the conversion dialog. + +- [1949908] Driver for the new Tolino Vision 6 + +- Kobo driver: Add support for the latest firmware released last week + +- [1948889] When picking a random book ensure recently chosen books are not re-selected + +- Icon theme chooser dialog: Allow right clicking on a theme to visit its homepage + +:: bug fixes + +- Amazon metadata download: Add support for more markup variations in amazon's sites that could prevent the fetching of + ratings, comments and series metadata for some books + +- Google search: Add support for new beta search results page markup that was preventing using cached Google pages + to search for Amazon metadata + +- PDF Output: Fix the option to preserve cover aspect ratio being ignored when converting comics + +- [1950412] DOCX Input: Sanitize image filenames more strictly to workaround broken EPUB software + +- [1950206] Linux binary: Fix file dialogs not working on Fedora 35 under KDE + +- [1949604] When sending email to the Kindle and PocketBook sync services use ASCII filenames as there have been some + reports of issues with non-ASCII filenames with these services. + +- [1950033] Book Details: Fix missing copy options on composite columns + +- Ask for confirmation when deleting covers from books + +:: improved recipes +- New York Post +- Liberation +- Boston Globe +- The Globe and Mail +- LeMonde + +:: new recipes +- India speaks reddit feed by Vishwas Vasuki +}}} + +{{{ 5.31.1 2021-10-29 + +:: new features + +- [1948883] Kindle driver: Support the new Kindle PaperWhite 2021 + +- Add an option under Preferences->Behavior to have calibre recognize numbers when sorting (this was previously under Preferences->Tweaks) + +- E-book viewer: Add a button to directly open the viewer help section in the calibre user manual to the viewer controls + +- E-book viewer: Prevent the display from sleeping when using auto-scroll or read aloud modes (Implemented only on Windows and macOS) + +- Edit book: Set semantics tool: Add support for EPUB 3 landmarks + +- [1948493] Add an entry to the Connect/share menu to open the content server in a local browser when it is running + +:: bug fixes + +- [1947879] Content server: Fix some OPDS feeds failing with non-ASCII content + +- [1948560] Tag browser: Fix incorrect first letter partitioning when enabling numeric collation of items that start with a number + +- [1949167] 5.31.1 fixes a bug in an HTML serialization library calibre uses that broke a few things, such as the comments editor + tool in the metadata dialog + +}}} + +{{{ 5.30.0 2021-10-22 + +:: new features + +- Add support for the new Kobo Sage and Libra 2 e-book reader devices + +- [1946439] E-book viewer: Read aloud: Allow right clicking to play/pause reading + +- Sending books by e-mail: Preserve non-English characters in attached filenames + +- [1946560] Tag browser: Allow searching for sub-categories by right clicking on them + +:: bug fixes + +- E-book viewer: Fix cover and full screen images not centered in paged mode when more than one page is displayed per screen + +- ToC Editor: Ignore in succession clicks on the OK and Cancel buttons to avoid accidentally closing the window when finishing creating a new entry + +- [1905479] Comments editor: Fix the formatting buttons not showing the current state correctly and fix some keyboard shortcuts not working when more than one comments editor is present in a single window + +- [1946417] Tag browser: Fix renaming of User categories in Virtual libraries + +- [1947948] Make removing large numbers of custom column icons easier + +:: improved recipes +- Private Eye +- Foreign Policy +- Le Monde Diplomatique - cono sur + +}}} + +{{{ 5.29.0 2021-10-08 + +:: new features + +- [1945890] Allow drag and drop of books onto formats in the Tag browser to convert them to that format + +- [1945891] Allow creating sorts based on multiple columns (Add the Sort action to the toolbar via Preferences->Toolbars & menus) + +- Edit book: When changing a paragraph to a heading if the cursor is adjacent to a paragraph tag but not inside any tags other than body, use the adjacent tag + +:: bug fixes + +- [1945889] Auto adding: Run relevant plugins before reading metadata from the book. Matches behavior of manual adding + +- [1945882] Content server: Fix category collapse by partition not working + +- Prevent Tab from causing focus to leave the Tag browser + +- Edit book: See what changed: Fix non-BMP unicode characters causing highlighting of changed words to be slightly misplaced + +- PDF Output: Fix a regression that broke conversion of comics that contain 1-bit images + +- Edit book: Fix pressing Ctrl+Tab inserting a tab at the start of a line instead of switching tabs + +- [1945098] Fix a regression in the previous release that caused identifiers set by some plugins to not be saved in the database + +- [1946342] Template language: Fix nesting composite columns sometimes failing + +:: improved recipes +- Entrepreneur Magazine +- Dawn +- New York Review of Books + +:: new recipes +- Various Indian news sources by Vishvas Vasuki + +}}} + +{{{ 5.28.0 2021-09-24 + +:: new features + +- Edit metadata dialog: Customize cover generation: Allow saving and loading cover generation settings as "themes" + +- [1944614] E-book viewer: Allow pressing the 0-9 keys to apply a quick highlight style + +- [1943521] Book details panel: While clicking tags/authors/etc. holding down the Ctrl+Shift modifier keys now add the tag to the current search with "AND" instead of "OR" when using only Ctrl + +- [1944057] Add an option to the preferences drop down menu to restart calibre without third party plugins + +:: bug fixes + +- [1944562] Edit book: When renaming classes in style sheets only recognize class names preceded by a period + +- E-book viewer: Fix lookup in Google partially hidden due to change in Google results page markup + +- Conversion dialog: Search replace expression builder: Fix incorrect search result highlighting when non-BMP unicode characters are present in the text + +- [1943270] E-book viewer: Fix popup footnote blank when the footnote link points to a tag + +- [1944433] E-book viewer: Fix jumping to highlights in text that occurs after a line break and newline character not working in paged mode + +- [1943495] Kindle Output: Strip EXIF metadata from JPEG images as the Kindle renderer has issues with it + +:: improved recipes +- Аргументы и Факты +- India Today + +}}} + +{{{ 5.27.0 2021-09-10 + +:: new features + +- When adding Markdown (.md) or Textile (.textile) files that contain references to images, automatically add them as TXTZ with the images + +:: bug fixes + +- DOCX Output: Correctly convert soft hyphens in the input document to DOCX soft hyphens + +- [1942805] DOCX Input: Fix a bookmark at the end of a paragraph causing the bookmark at the start of the paragraph to be skipped + +- [1942773] Edit book: Spell check: Fix EPUB 3 nav document not being spell checked when not in the spine + +- [1942012] PDF Output: Fix a rare failure when the input document has a ToC item pointing to the last page + +- [1942129] Windows: Fix a regression in calibre 5 that caused drag and drop from WinZip to not work + +- [1941992] TXT Output: Fix a regression in calibre 5 that caused the max line length option to not work + +- When auto converting added TXT files with image references to TXTZ use a full Markdown parser to detect Markdown images + +:: improved recipes +- BBC News +- Foreign Affairs + +:: new recipes +- The Week by Kovid Goyal +}}} + +{{{ 5.26.0 2021-08-27 + +:: new features + +- [1941013] Dark color scheme: Use a darker blue for highlighted items + +:: bug fixes + +- Content server book viewer: Show a message when a search finds no matches + +- MOBI Output: Fix JPEG images without any JFIF metadata not being rendered on the Kindle + +- [1939908] Comic input: Fix single color images having their colors changed by normalization + +- [1940005] E-book viewer: Fix creating multiple highlights in a single paragraph that also contains some extra text formatting at the start causing the second and subsequent highlights to malfunction + +- [1939912] Edit book: Fix a regression in the previous release that broke the options in the Remove unused CSS dialog + +:: improved recipes +- Boston Globe + +:: new recipes +- NYTimes Cooking by gourav +}}} + +{{{ 5.25.0 2021-08-13 + +:: new features + +- [1939469] Edit/Polish book: Remove unused CSS now also removes unreferenced stylesheets + +- E-book viewer: Add some CSS variables and classes that allow writing calibre specific CSS in ebooks. See https://manual.calibre-ebook.com/viewer.html#designing-your-book-to-work-well-with-the-calibre-viewer + +- A new framework plugins can use to be notified about changes to calibre libraries + +- [1938752] Edit metadata dialog: When pasting into the identifiers field if the clipboard contains a URL paste it directly as a URL identifier + +:: bug fixes + +- [1938448] E-book viewer: When displaying popup footnotes use the same writing direction as the main text for the footnote popups size and header + +- E-book viewer: Improve the text layout when looking up words in Google + +- Content server viewer: Fix read aloud not working on mobile browsers + + +:: improved recipes +- The Guardian and The Observer +- Wall Street Journal +- The Atlantic + +}}} + +{{{ 5.24.0 2021-07-30 + +:: new features + +- Conversion: Insert metadata as jacket: Allow adding timestamp and publisher fields. Also allow controlling the formatting of date/time fields + +- [1937025] Cover browser: Add an option to view the central book by double clicking instead of single clicking (Preferences->Look & feel->Cover browser) + +- [1936891] Tag browser: Add actions to the configure menu to toggle the display of counts and average rating + +- [1936472] Tag browser: Allow plugins to add entries to the context menu + + +:: bug fixes + +- [1938189] fetch-ebook-metadata: Fix an error when using the --cover option and no cover is found + +- [1936792] HTML Input: Fix the presence of BookDesigner markup causing conversion to fail + +- [1936184] TXT Input: Do not fail if the txt file references a directory as a resource + +:: improved recipes +- MSNBC +- Nature News +- Boston Globe +- Foreign Policy +- Le Monde + +}}} + +{{{ 5.23.0 2021-07-09 + +:: new features + +- [1934204] Annotations browser: Show highlight color in the preview panel + +- TXTZ format: Store type of text formatting in the metadata and use it automatically when converting from TXTZ + +- [1934043] Edit metadata dialog: Allow holding Ctrl and clicking the item editor buttons to instead open the manage dialog + +:: bug fixes + +- [1929325] Annotations browser: Fix searching for words in languages such as Chinese that do not have word delimiters not working + +- News download: Fix URLs with spaces in them not being downloaded since calibre 5.0 + +- [1933989] When searching for books by an author from the Manage authors dialog, use exact matches + +- [1933797] MOBI Output: Fix invalid color specification as plain numbers causing conversion to fail + +- [1933684] MOBI Output: Fix invalid text indent specification causing conversion to fail + +- Linux: Drop the unmaintained dbus-python in favor of jeepney for DBUS + +- Edit book: Workaround for Qt bug that caused the panel sizes in the editor to not be remembered across sessions + +:: improved recipes +- The Guardian and the Observer +- National Geographic +- Handelsblatt +- Huffington Post + +}}} + +{{{ 5.22.1 2021-06-25 + +:: new features + +- [1931646] E-book viewer: Allow clicking links in popup footnotes + +- Main book list: Scroll per pixel rather than per item by default. Can be returned to previous behavior via Preferences->Tweaks->Control behavior of book list + +- Linux: Drop support for the global menu bar + +:: bug fixes + +- [1932152] E-book viewer: Fix font sizes specified in absolute units not being honored in locales where the decimal separator is not the period + +- [1931566] E-book viewer: Fix searching for short strings in text with lots of similar entries displaying incorrect matches + +- [1932392] MOBI Output: Fix using percentage units for margins resulting in too large margins when using the tablet output profile + +- [1931599] E-book viewer: Fix back button not working after jumping to a bookmark + +- [1932992] Content server: OPDS feed: Fix incorrect up URL in category group feeds + +- [1933559] Content server: Fix a regression in the previous release that broke editing of series metadata + +- E-book viewer: Fix scrolling backwards by screen-fulls not working with very large page margins. + +- MOBI Input: Fix a regression in calibre 5 that broke processing of Haodoo format files + +- Conversion: Fix the smarten punctuation option not applying to inserted jacket page + +- 5.22.1 fixes a typo in the previous release that broke device detection on Linux + +:: improved recipes +- TheAtlantic.com +- Hindu +- People Daily + +}}} + +{{{ 5.21.0 2021-06-11 + +:: new features + +- Driver for the new Kobo Ellipsa + +- [1930958] Content server: When editing metadata for fields that take multiple values, make it easier to remove individual values by simply tapping a button + +- [1930900] Browser viewer: Make current color scheme setting propagate to all devices automatically when using user accounts + +- E-book viewer: Image popup: Show the image resolution in the popup window's titlebar + +:: bug fixes + +- [1930922] HTML Input: Fix handling of @import rules in stylesheets nested more than one level deep + +- [1930912] Fix viewer search context menu to clear searches not clearing search settings + +- E-book viewer image popup: Fix full screen button in incorrect state when starting in full screen + +:: improved recipes +- Associated Press +- The Hindu + +}}} + +{{{ 5.20.0 2021-06-04 + +:: new features + +- E-book viewer: Highlights: Make URLs in the notes for highlights clickable + +- [1930136] Book details: Ctrl-clicking on tags now adds them to the existing search instead of replacing it + +- E-book viewer: Allow using the back button to return from jumping to a search result + +:: bug fixes + +- Get books: Fix the Kobo store plugin for changes to the website + +- [1929827] Edit book: Fix non breaking spaces in snippets being converted to normal spaces + +- [1930466] ToC Editor: Fix a regression that caused changes to not be saved on machines where running a worker process takes more than ten seconds + +- Fix error when changing the "Search the net" URLs for the Content server + +:: improved recipes +- Jerusalem Post +- Popular Science +- Ambito Financiero +- Ambito.com +- Infobae + +}}} + +{{{ 5.19.0 2021-05-28 + +:: new features + +- E-book viewer: Add a preference under Scrolling behavior to reverse the tap zones used to turn pages. So tapping on the left goes forward and the right backward + + +:: bug fixes + +- [1929862] E-book viewer: Fix regression in 5.15 that caused incorrect display of font sizes that contain a period and use absolute units + +- [1929240] PDF Output: Fix font kerning issues with some TrueType fonts + +- [1929267] Edit book: Fix a regression in 5.18 that broke editing/creating saved searches + +- [1919025] Windows: Fix for standalone ToC Editor not working on systems where Qt WebEngine causes a crash at exit + +- Windows MTP driver: When scanning an MTP device such as an Android phone ignore folders that Windows fails to enumerate instead of failing with an error + +- ToC Editor: Fix a regression that broke choosing split points in some XHTML files + +- [1929465] PDB Input: Fix a regression in calibre 5 that broke processing some plucker format PDB files + +- Edit book: Fix sorting in spell check dialog on language broken for books that have unknown languages + +- Annotations browser: Fix a typo that broke sorting for highlights + +- [1909730] Annotations browser: When showing a highlight preserve paragraph boundaries for multi-paragraph highlights + +- Viewer highlights panel: Fix the "Edit notes" link not saving the changes + +- [1929164] E-book viewer: When using a right click/shift-click to adjust the selection, move the section boundary that is closer to the click point + +}}} + +{{{ 5.18.0 2021-05-21 + +:: new features + +- Content server viewer: Improved search functionality + +Searching can now be done for whole words and regular expressions. And all search results are +listed at once with some context for easy navigation. + +- [1928596] E-book viewer: Allow making the image popup full screen + +- E-book viewer: Allow expanding/collapsing all items in the Table of Contents at a particular level by right clicking on one item of that level and choosing the option to expand/collapse + +- E-book viewer: Add shortcuts shift+home and shift+end to extend current selection to start/end of line + +- [1927520] Book details: When creating rules to convert identifiers to URLs allow using {id_unquoted} to avoid quoting the identifier value + +- [1927062] Review downloaded metadata: Allow double clicking on a cover to see it at a larger size + +- [1927012] Annotations browser: Add a Refresh button + + +:: bug fixes + +- Edit book: Fix ctrl-clicking on a class name jumping to the wrong CSS rule if the stylesheet contains top level comments + +- [1928579] Fix search and replace on identifiers not working if the replaced value has colons + +- [1922691] Annotations browser: Sort the entries in order of position in book + +- ToC editor: Dark mode: Fix colors in location selection panel not dark + +- [1926793] E-book viewer: Fix right or shift-clicking to extend selection not shrinking selection when the click is inside the selection +- [1927546] Avoid spurious errors on multiple simultaneous calibre launches + +- [1925961] E-book viewer: Ignore mouse scroll events that would turn pages when editing notes + +- E-book viewer: Fix sorting bookmarks by title not working + +- Windows WPD driver: Fix an error reading the filesystem on some MTP based devices + +:: improved recipes +- IEEE Spectrum + +}}} + +{{{ 5.17.0 2021-04-30 + +:: new features + +- [1926484] E-book viewer: Image popup: Allow dragging with the mouse to pan the image + +- [1923724] Sort button: Allow selecting which columns are in the popup sort menu + +- [1851908] E-book viewer: When suggesting a default bookmark title, use the name of the current chapter + +- [1925038] E-book viewer: When searching the Table of Contents allow holding the Shift key to search backwards + +- [1925294] E-book viewer: Add a shortcut Ctrl+0 to restore default font size + +:: bug fixes + +- Windows MTP driver: Rewrite parts of the driver in the hope of fixing some rare and hard to reproduce crashes + +- Windows MTP driver: Set modified and created times when putting files/folders on device. Also read modified time correctly. + +- [1918591] Windows: E-book viewer: Fix switching away from viewer while in full screen and switching back causing some corruption until the page is scrolled + +- [1925378] Fix a regression in the previous release that caused errors when editing empty date values + +- Get books: Update Gutenberg plugin for website changes + +- [1926518] E-book viewer: The quick highlight button should replace the style of an existing highlight, when one is selected + +- [1925247] Elide long items in the middle when showing the completion popup for tags, to make it easier to use with hierarchical tags + +- [1925988] E-book viewer: Read aloud: Fix soft hyphens causing read aloud words to be broken up + +- [1925390] E-book viewer: Right clicking when text is selected should extend the selection instead of doing nothing + +- Fix settings in the ToC Editor tool being forgotten when calibre is closed + +- [1926025] DOCX Output: Fix conversion failing if the input document has missing images + +- [1925961] Content server viewer: Fix mouse wheel scrolling not working on the box used to edit notes for highlights + +- E-book viewer: Fix read aloud word tracking in flow mode not very reliable + +- E-book viewer: Fix navigation shortcuts not working in Read aloud mode + +:: improved recipes +- Barrons +- Krebs On Security + +}}} + +{{{ 5.16.1 2021-04-17 + +:: new features + +:: bug fixes + +- [1924703] CHM Input: Fix handling of some CHM files that use non-ASCII internal filenames and don't specify a character encoding in their metadata + +- [1924824] Fix a regression in the previous release that prevented calibre from starting if there was a failure in a third party plugin + +- Content server viewer: Fix a regression in the previous that broke handling of URLs in stylesheets + +- [1924767] Fix a regression in the previous release that broke changing sections in the convert single book dialog after changing the input or output formats + +- [1924675] Fix using 'is set' rules not working for column rules with rating values + +- E-book viewer: Fix a regression in 5.15 that causes the viewer to ignore page-break CSS properties (in 5.16.1) + +- Fix a regression that broke using stored templates (in 5.16.1) + +- [1924875] Fix auto scroll books shortcut not working in standalone cover browser window (in 5.16.1) + +- [1924890] E-book viewer: Fix jumping to previous section sometimes not working in flow mode (in 5.16.1) + +- [1924853] E-book viewer: Fix a regression in 5.15 that broke changing keyboard shortcuts in the viewer preferences (in 5.16.1) + +- E-book viewer: Fix a regression in 5.15 that broke using the delete key to delete highlights (in 5.16.1) + +:: improved recipes +- Granta +- New Scientist + +:: new recipes +- The Saturday paper by Alistair Francis +- Crikey by Alistair Francis + +}}} + +{{{ 5.15.0 2021-04-16 + +:: new features + +- [1917634] Allow auto scrolling through the list of books by pressing the X key or right clicking on the cover browser + +Useful to have a "slideshow" of book covers. The speed of scrolling can be controlled in Preferences->Look & feel->Cover browser + +- E-book viewer: Speed up first time open for EPUB files with lots of styling + +- E-book viewer preferences: Allow searching for keyboard shortcuts + +- Edit book: Allow editing WEBP images + +- Various improvements to the template language, see https://www.mobileread.com/forums/showthread.php?t=337573 + +- [1921610] E-book viewer: Show the current progress percentage in the bottom bar of the viewer controls. This can be customized in the viewer preferences under Headers and footers + +- [1921689] E-book viewer: Add an option under Preferences->Miscellaneous to not restore open panels such as Search, Table of Contents etc on restart + +- When exporting highlights as text or Markdown also output top level chapter titles + +- [1922327] Allow downloading metadata from amazon.se + +- [1922591] Preferences->Tweaks: Allow specifying that calibre should open the book details window when double clicking on a book + +- [1922341] MOBI Output: Convert WebP images to PNG so they work with Amazon's software + +- [1921793] DOCX Input: When converting embedded fonts, replace spaces in the filename with underscores to keep the execrable epubcheck happy + +- E-book viewer: Selection bar: Add keyboard shortcuts for all buttons. Hover over a button in the bar to see the shortcut + + +:: bug fixes + +- [1924232] FB2 Output: Fix a regression in calibre 5 that caused paragraphs containing only non-breaking spaces to be removed + +- [1924187] Metadata comments editor: Fix setting block alignment destroying other block level properties + +- E-book viewer: Fix margins not being adjusted immediately when preferences are changed + +- [1921604] Edit book: Upgrade book internals: Fix skipping the NCX removal dialog not remembering the chosen option + +- [1922570] Conversion: When specifying a line-height do not apply it to the tag, as it is pointless and causes the execrable epubcheck to complain + +- E-book viewer: Improve scrolling behavior when extending the selection using keyboard shortcuts + +- E-book viewer: Fix read percent for HTML files that are rendered in a single screen being 0% rather than 100% + +- [1924598] E-book viewer: Highlights panel: Do not expand all sections when adding/deleting/modifying highlights + +- [1922503] CHM Input: Fix a regression in calibre 5.0 that broke opening of some files that don't specify a character encoding + +- EPUB2 metadata: Read ISBNs in identifier elements without schemes if they are valid ISBNs and no properly identified isbns are present + +- [1922309] Update Amazon metadata plugin for changes to amazon websites + +- Edit book: Fix detection of class names containing hyphens/underscores + +:: improved recipes +- New Scientist +- Irish Times +- 1843 +- The Straits Times + +:: new recipes +- Los Danieles – Columnas sin techo by CAVALENCIA +}}} + +{{{ 5.14.0 2021-03-26 + +:: new features + +- Edit book: When right clicking on a class in a HTML file, add an option to rename the class throughout the book + +- [1919103] Adding from ISBN: Add an option to check if there are existing books with the specified ISBNs already in the library + +- [1920576] Template tester: Instead of next/last, the template tester now shows the template values for selected books + +- [1918047] Content server: Allow swiping left and right to show next/previous book on the book details page + +- [1919072] E-book viewer: Make the commonly used Shift+Arrow key shortcuts for modifying selections using the keyboard work. Also add shortcuts for selecting by character, line and paragraph. + +:: bug fixes + +- [1920613] MOBI Output: Fix latest Kindle firmware not displaying select publisher font option for calibre produced AZW3 files + +- [1919033] E-book viewer: Fix errors when viewing books with mathematics that are split over multiple internal files + +- [1918436] Content server viewer: Fix highlights not sorted correctly in the highlights panel + +- [1918737] Get books: Fix Smashwords plugin not working because of website changes + +- [1920733] Improve the performance of QuickView especially when using composite columns + +- [1918428] Improve performance of the virtual_libraries() template function + +- [1920250] Browser viewer: Fix Go to Location not working for positions + +- [1905257] E-book viewer: Fix searching in the Table of Contents not working + +- [1918105] E-book viewer: In paged mode, fix scrolling not working correctly when margins are set to zero + +- [1918437] E-book viewer: Fix incorrect ToC navigation in books that link the entries to inline tags that wrap block tags that span multiple pages + +- [1920592] Category editor: Searching in library shouldn't automatically open Quickview + +- [1919260] Conversion: Fix a hang caused by long sequences of non-word characters when heuristics are enabled, either explicitly or for some input formats such as TXT + +- Fix a regression in calibre 5 that broke --explode-book and --implode-book actions for calibre-debug.exe + +- Fix a regression in calibre 5 that broke setting metadata in RTF files + +:: new recipes +- ZackZack.at by Dirk Gomez +}}} + +{{{ 5.13.0 2021-03-10 + +:: new features + +- [1917967] E-book Viewer: Allow editing the current book by pressing Ctrl+D or adding a button for it to the viewer tool bar + +- Edit book: Add a command line flag to allow selecting the specified text when opening a book + +- [1917363] Edit metadata dialog: When trimming covers, show the size of the current trim region + +:: bug fixes + +- Fix a regression in the previous release that broke sending of emails with text longer than 900 characters + +- E-book viewer: Fix using keyboard to extend selection not turning pages + +- [1918030] Fix searching for items from the Manage dialog not working correctly + +- [1917386] PDF input: Replace paragraph separator characters with spaces + +:: improved recipes +- The Conversation + +}}} + +{{{ 5.12.0 2021-02-26 + +:: new features + +- [1915773] E-book viewer: When searching start the search from the current position, jumping to the first match at or after the current page + +- [1916411] E-book viewer: Have the Table of Contents view automatically scroll to keep the chapter being read currently visible + +- Various improvements to the calibre template language, see https://www.mobileread.com/forums/showthread.php?t=337573 + +- Edit book: File browser: Show total size of items in category when hovering over category with mouse + +- Kobo driver: Add support for latest firmware and also an option to choose the color used for cover letterboxing + +:: bug fixes + +- [1915685] E-book viewer: Fix selection popup bar sometimes going off screen when dragging up to the top line of text + +- Get books: Update the Biblio and Chitanka stores for website changes + +- [1915770] Edit book: Fix editing of JavaScript files not working + +:: improved recipes +- Slate +- Harper's Magazine + +}}} + +{{{ 5.11.0 2021-02-12 + +:: new features + +- [1912958] Edit book: Add a tool to split the tag at the current cursor position, creating a new tag with the same style and class attributes. To add the tool go to the Toolbars section in the editor preferences + +- [1911107] E-book viewer: Show the URL when hovering over external links + +- E-book viewer: Redesign the reference mode to also work on touch screens without a mouse. Now in reference mode paragraph numbers are displayed for all paragraphs + +- Edit book: Allow Ctrl-clicking on class names to jump to the first style rule that matches the tag and class + +- Content server: When browsing highlights for a book allow selecting multiple highlights to delete or export quickly + +- [1912954] Allow creating keyboard shortcuts to copy show and view URLs for selected books to clipboard + +- calibredb: Add a timeout option to control the timeout when connecting to the calibre server + +:: bug fixes + +- [1913854] Content server: Fix dragging selection handles not working in Safari + +- [1915303] E-book viewer: Fix links with a href of "#" not working + +- E-book viewer: Hide the controls when clicking the back or forward buttons + +- [1914921] E-book viewer: When jumping to a highlight using the highlights panel, the back button should return to position before jump + +- [1914157] E-book viewer: Fix incorrect tooltip when hovering over a section title in the search results list + +- HTMLZ Output: Fix a regression in calibre 5 that broke creating HTMLZ documents when using the option to place CSS inline + +:: improved recipes +- Endgadget + +}}} + +{{{ 5.10.1 2021-01-22 + +:: new features + +- [1911888] Bulk metadata edit: Add a new control to compress the cover image files for all selected books + +- [1912212] Add support for the CB7 comic file file format + +- [1912070] E-book viewer: Allow adding a button to the selection bar that copies the + currently selected text along with a calibre:// URL to show the text in the book + +- Conversion: Insert metadata: Allow showing identifiers such as ISBN in the jacket page template + +- Conversion: Insert metadata: Allow hiding entries in the jacket template when they are not present in the metadata + +- [1912337] calibre-server --manage-users: Add a scriptable interface: calibre-server --manage-users -- help + +- [1912070] E-book viewer: Add keyboard shortcuts to copy the current location to the clipboard + +- [1912003] E-book viewer: Highlight all currently visible Table of Contents entries, not just the first + +- Add a new tweak under Preferences->Tweaks->Author sort name algorithm to optionally recognize + common surname prefixes such as von, van, de, etc. when generating sort names. + +:: bug fixes + +- [1911470] E-book viewer: Move read aloud pop-up bar to the bottom of the screen in flow mode + +- [1911218] E-book viewer: Fix scrolling with two fingers on touch pad on macOS not smooth + +- [1911466] PDF Output: When converting fixed layout input documents fix anchors inserted for navigation sometimes being rendered as blue boxes + +- [1904350] Edit book: Remove unused CSS: Fix selectors that don't match from CSS rules containing multiple selectors not being removed + +- Bulk metadata download: Fix series number not being changes if the series is the same as the existing series + +- E-book viewer: Fix a regression that caused non-HTML descriptions to not be displayed in the metadata page + +- E-book viewer: Fix clock being displayed in 24 hr format on some systems even though system locale is set to use 12 hr format. + +- Bulk metadata edit: Fix regression that inverted the meaning of the case sensitivity setting in the Search & replace tab + +- calibredb list: Fix incorrect output when redirecting to file + +- 5.10.1 fixes a regression in 5.10.0 that broke conversion when including metadata as a jacket page and using long text custom columns + +:: improved recipes +- Jacobin +- Japan Times +- The Wall Street Journal +- Mediapart + +}}} + +{{{ 5.9.0 2021-01-08 + +:: new features + +- Annotations: Allow exporting highlights and bookmarks in Markdown format, with a link to open the book at the highlight location + +- [1909529] Content server viewer: Allow exporting all highlights + +- [1909339] Icon rules editor: Add a button to open the icons folder + +- [1909258] Quickview: Dropdown menu for all selectable columns + +:: bug fixes + +- [1909880 1906152] Fix a regression that caused the Content server to crash if a client closed a connection during a file transfer on macOS and Linux. + +- [1909224] LRF Output: Fix conversion broken in calibre 5 when font size rescaling is active + +- E-book viewer: Cancel any speech in progress when hiding the selection popup bar after triggering the speak aloud action on it + +- [1909332] Color/icon rules editor: Fix duplicate rule button not working correctly + +- [1909291] Fix dropping files onto Book details causing an error if the confirmation dialog is disabled + +:: improved recipes +- General Knowledge Today +- El Pais +- USA Today +- WirtchaftsWoche Online +- The Guardian +- Arcamax +- Miami Herald +- The Seattle Times + +:: new recipes + +- Mallorca Zeitung by VoHegg +- T-Online by VoHegg +- El Diario by Dirk Gómez + +}}} + +{{{ 5.8.1 2020-12-24 + +:: new features + +- Happy holidays to all calibre users! + +- E-book viewer: Add a mode to follow links with only the keyboard (triggered by Alt+F) + +- [1908929] Edit book: A new option to show a configurable number lines above the current line when syncing the position of the preview panel to the current position in the code editor (under Preview settings in the Editor preferences). + +- [1907410] Windows: Automatically resolve shortcuts (.lnk files) when adding books to calibre. + +- Content server viewer: Don't enter full screen mode automatically when reading on desktop like devices (this can be controlled via a setting in the viewer preferences under Page layout) + +- E-book viewer tool bar: Add a select all action and a Read aloud action (can be added by right clicking the tool bar and configuring it) + +- Template/formatter enhancements: Add a 'for' statement and add the ability for a developer to pass extra information to a template. + +- [1907919 1907918] Rules editors for icon/coloring rules: Add a button to duplicate rules and to convert a rule to advanced template mode + +:: bug fixes + +- Content server viewer: Fix regression in 5.0 that broke scrolling on iOS + +- [1908000] E-book viewer: Fix error when scrolling to some search results in flow mode + +- [1898394] AZW3 Input: Fix rare AID based links not working. + +- [1907907] E-book viewer: Fix clock in header/footer not using system time format + +- Windows: Fix Read aloud not working with books that have a single large internal text file, such as MOBI or DOCX books + +- PDF Output: Fix a regression causing conversion to fail when typesetting Chinese text + +- Amazon metadata download: Fix no results being found when using the automatic or Google servers because of a change in the markup of the Google search results page. + +- [1909217 1909197] Version 5.8.1 fixes a couple of regressions that broke the Save to disk function and changing Page layout settings in the viewer. + +:: improved recipes + +- The Australian +- The Atlantic +- Zerohedge +- New York Times Book Review + +:: new recipes + +- SchwarzerPfeil by tastytea +- Substack by topynate +}}} + +{{{ 5.7.2 2020-12-12 + +:: new features + +- [major] E-book viewer: Add a "Read aloud" function that works via the operating system's Text-to-speech engine. + + Click the "Read aloud" button in the viewer controls to start reading the book text aloud from the current page. + +- A new busy spinner for waiting animations + +- [1907140] Edit metadata: Add buttons to easily set yes/no fields also shortcut to clear the field. + +:: bug fixes + +- [1905967] PDF Output: Fix a regression in the previous release that broke text rendering for some fonts due to a bug in Qt WebEngine (full fix is in 5.7.2). + +- [1905736] PDF Output: Fix conversion failing when adding header/footer and the input document defines margins/padding on the tag. + +- [1907159] Windows: Fix the case of library names in copied calibre:// links sometimes incorrect. + +- [1906459] AZW3 Input: Fix a regression in calibre 5 that broke processing of files with inline flow replacements. + +- [1906149] Fix hiding and showing Book details panel changes its size by a pixel or two. + +- [1907067] Get books: Fix amazon.fr not working because of website changes. + +- Bulk metadata search/replace: Fix text transform function not being applied to the test result in character mode + +- [1906464] Book list: Improve rendering of column headers when they don't fit, by eliding them instead of just cutting off rendering. + +- [1906063] Fix template function "first_non_empty" fails if no argument evaluates non-empty + +- [1907773] Fix regression in 5.7.0 causing failure to start if one of the previously used libraries had a stored path ending in a slash. + +:: improved recipes +- The Guardian +- The Atlantic + +}}} + +{{{ 5.6.0 2020-11-27 + +:: new features + +- Edit book: Show a non-modal popup for a few seconds to allow undoing file delete operations + +- [1903418] Device books view: Add an action to the context menu to easily jump to the matching book in the calibre library view + +- [1903270] Add ability to undo Generate cover in the Edit metadata screen, by long clicking the Generate cover button + +- Edit book: Allow adding tags to the list of tags for the insert tag button + +- calibredb add: New option --automerge to automatically merge duplicates + +- [1905646] Add an edit notes action to the context menu in the viewer highlights panel + +- [1903333] Content server viewer: Allow viewing images in a new window by right clicking them, matches the calibre builtin viewer behavior + +- [1903403] Book details window: Allow opening the edit metadata window using either the keyboard shortcut or the context menu + +- Allow copying the current search as a calibre:// URL by right clicking the search box + +- Book details panel: Add entries to copy calibre:// links for the current book to the context menu + +- Edit book: Upgrade book: Ask whether to keep the NCX based Table of Contents + +:: bug fixes + +- [1904310] Windows: Fix calibre portable launcher not working correctly from root folder in calibre 5.5 + +- [1904505] macOS: Fix rendering of space after punctuation incorrect in Big Sur + +- [1905319] DOCX Input: When converting images placed using the obsolete VML markup default them to being inline rather than block images + +- [1905479] Metadata edit dialog: Fix keyboard shortcuts for bold/italic/underline not working when more than one comments editor widget is present on a single tab + +- [1905113] When using calibre://show-book URLs and the book is not found, clear any Virtual library or search restriction and then show the book + +- macOS: Fix calibre:// URLs not working from other documents + +- Fix calibre:// URL search action not changing library if needed + +- [1904305] annotation_count() template function displaying value for deleted annotations + +- [1905806] Edit book: Fix syntax highlighting not recognizing ends-with selector + +:: improved recipes +- The BBC +- Folha de Sao Paolo +- Netzpolitik + +}}} + +{{{ 5.5.0 2020-11-13 + +:: new features + +- [major] Support the calibre:// URL scheme + + Clicking on calibre:// URLs can be used to have calibre perform various actions. + For details, see: https://manual.calibre-ebook.com/url_scheme.html + +- Viewer: Show calibre:// URL for current book in the Goto->Location panel + +- [1902518] Edit book: Add an action to the right click menu for tabs to close tabs to the right of the current tab. + +- [1902413] Add ability to copy tag, publisher and author in Book details panel by right-clicking + +- [1902326] Annotations browser: Add a context menu for common actions. + +- [1899839] Annotations browser: Show a dot for highlights with notes. + +- [1902227] Edit book: Saved searches panel: Allow copying the current saved search to the regular search panel by clicking the Export button. + +- Windows: Fix command line arguments not working for the portable.exe launchers + +- Viewer: back and forward buttons on the mouse now trigger the back and forward actions + +- [1902313] Option to mark all books with annotations/bookmarks + +- Various minor improvements to how keyboard focus is handled in the Tag browser + +:: bug fixes + +- [1903294] Edit book: Fix regression in calibre 5 that caused an error when managing user dictionaries. + +- [1903831] Viewer: Fix previous/next buttons iterating over removed bookmarks + +- [1903825] Welcome wizard: Fix changing the language causing a empty folder to be created. + +- [1903699] Viewer: Fix regression causing custom shortcuts with Shift key pressed not working + +- [1903423] Viewer: In dark mode when showing images with transparency in the popup use a light background color as most images are designed with a light background color in mind. + +- [1903428] Fix a regression in the previous release that broke conversion of DJVU files + +- [1903363] Metadata edit: "Set to Undefined" button missing on integer columns. + +- [1903086] Entry remains underlined in Tag browser after clearing filter + +- [1902126] XML Catalogs: Fix languages field missing + +- Fix a regression that broke loading of third party plugins that are encoded in an encoding other than UTF-8 + +- macOS: Fix viewer not responding to cmd+c to copy text by default + +- Windows: Fix sending email on computers with non-ASCII computer names + +:: improved recipes +- Spectator Magazine + +}}} + +{{{ 5.4.2 2020-10-30 + +:: new features + +- [1900761] Windows: Allow adding of books to calibre from folders whose path length is larger than 260 characters + +- Tag browser: Add an option in Preferences->Look & feel->Tag browser to allow the tag browser to get keyboard focus + +- Tag browser: Allow editing the set of permissible values and colors for a custom column with fixed values, by right clicking on it. + +- Edit metadata dialog: Make most custom metadata controls use only a single line. + + Elide the names of custom columns that are longer than a fixed width, instead of using multiple lines. + Configurable via Preferences->Tweaks->Edit metadata custom column label length + +- Edit book: Remove unused CSS: Add an option to merge CSS rules that have identical properties + +- [1901379] Book details window: Double clicking on the cover now shows it in the default system image viewer. + +- [1900874] News download: Allow passing username/password in feed URLs. + +- [1900890] Open with: Allow renaming Open with applications. + +:: bug fixes + +- [1900868] Viewer: Fix jumping to search result not always working in flow mode. + +- Get Books: Update Google and Gutenberg plugins for website changes + +- [1900946] Viewer: Fix keyboard shortcuts using Ctrl+Alt+letter key not working on Windows. + +- [1900942] Viewer: Fix keyboard shortcuts to shrink/grow selection not working. + +- [1900938] Viewer: Fix keyboard shortcut to toggle highlights panel not working when the highlights panel is itself focused. + +- [1900358] Viewer: handle editing of missing highlights more gracefully + + +- [1901289] Drivers: Fix regression in calibre 5 that broke connecting to SONY devices + +- [1901276] Open with: On Linux when reading names from .desktop files, use the first matching language + +- [1901593] Conversion: Fix a regression in calibre 5 that broke conversion of some malformed CSS stylesheets. + +- [1901957] Conversion: Fix a regression in calibre 5 that broke processing of some PDB files with images. + +- [1901278] Conversion: Fix regression in calibre 5 in the handling of @import() rules in stylesheets that import from folders above themselves. + +- [1901232 1901230] Content server: Fix various controls on the book details page not working when viewing a random book. + +- [1901273] Tag browser: Fix a crash when renaming a saved search to a value that already exists. + +- [1900921] Tag browser: Fix renaming of custom column with fixed set of values not working. + +- [1901630] Fix a regression in calibre 5 that broke auto-sync of generated catalogs to devices. + +- [1901113] Fix setting rating via ebook-meta command line tool broken in calibre 5. + +- [1900099] When boolean columns are set to bistate, checking "show checkmarks" results in all non-boolean values being shown as false + +- Spell check: Fix using non UTF-8 dictionaries broken in calibre 5 + +- Fix a rare issue where restarting calibre from within the program would fail. + +- Version 5.4.2 fixes bugs in 5.4.0 that prevented calibre from starting on macOS and from choosing save file names on Windows. + +:: improved recipes + +- Psychology Today +- Washington Post +- LifeHacker +- The Atlantic +- New York Review of Books + +}}} + +{{{ 5.3.0 2020-10-16 +:: new features + +- Tag browser: When grouping items by first letter if adjacent letters have few entries combine them into a single group. + +Can be controlled via an option in Preferences->Look & feel->Tag browser to combine 'first letters' together if there are a small number of items under adjacent letters + +- [1899163] Viewer: Allow displaying the current page / total pages in the header/footer. Useful in paged mode to see exactly how many pages are left. + +- [1899834] Viewer search panel: Show more result context in a tooltip when hovering over a search result. + +- DOCX Input: Add support for Word controls used to rotate or flip images + +Only works with output formats such as EPUB that support CSS transforms + +- [1899762] Viewer image pop-up: Allow maximizing/minimizing the window. + +- Edit metadata dialog: Use only a single line for custom column date fields + +- [1899341] Add an item to search for categories to the Category editor context menu. + +- [1899316] Category editor: Add a right click menu to change case of the selected entries. + + +:: bug fixes + +- [1898221] Fix a regression in 5.0 that caused performance of dialogs that contain title/series/tags edit fields to be very poor with large libraries. + +- Fix a regression in 5.0 that caused some pop-up menus to appear on the wrong monitor in multi-monitor setups + +- macOS: Fix a long standing bug that prevented drag and drop of multiple items + +- Windows: Fix a regression that broke scanning for default programs for the Open with action + +- [1898894] Windows: Fix a regression in 5.0 causing some MOBI files with non-BMP characters not being processed correctly. + +- E-book viewer: Fix scrolling past the end of chapter boundaries not working in books that have negative margins + +- [1899466] Edit metadata dialog: fix incorrect rendering of custom column names that start with emoji. + +- [1899318] Quickview: Fix nothing shown after clearing the search. + +- [1900022] Dark mode: Fix radio buttons having no outline. + +- [1900066] Annotations browser: When showing a book in the calibre library that is not currently visible, display an error. + + +:: new recipes + +- Deutsche Welle by VoHe + +:: improved recipes + +- Reuters + + +}}} + +{{{ 5.2.0 2020-10-07 +:: new features + +- [1897354] Browse annotations: Add a check box to restrict the displayed annotations to only the books currently selected in the calibre library. + +- Allow storing and calling functions in the calibre template language (Preferences->Template functions) + +- [1897336] Add a shortcut (Shift+V) to open the last read book. + + +:: bug fixes + +- Fix import errors with some third party plugin on Windows + +- [1898598] Viewer: Fix toolbar show controls button not working when no book is open. + +- [1898577] Viewer: Fix incorrect positioning of context menu for viewer toolbar. + +- MOBI Input: Fix regression that broke reading of some documents + +- Bulk metadata search and replace: Fix some regular expression causing errors with the new regex engine + +- Fix a regression that broke application of plugboards when sending by e-mail + +- [1898441] ODT Input: Fix a regression that broke conversion of ODT files with footnotes. + +- [1898413] Viewer: Fix mouse wheel not working while selection bar is visible + +- Viewer: Fix selection popup bar not always close to mouse when ending select-to-drag + +- Fix calibre-server not exiting on Ctrl+C on Windows + +- [1897410] Content server OPDS feeds: Fix error if the metadata for a book contains particular Unicode characters. + +- [1898167] Edit book: Reports tool: Fix a regression that broke sorting. + +- HTMLZ Output: Fix an error when converting a document that has SVG images + + + +}}} + +{{{ 5.1.0 2020-10-02 +:: new features + +- Enhancement: allow using templates in search expressions + +- [1897435] Viewer lookup panel: Add a checkbox to disable automatic update of lookup when the selected text changes. + +- [1897415] Viewer: Allow skipping the confirmation when using the remove highlight button in the popup bar. + + +:: bug fixes + +- [1897618] ToC Editor: Fix bulk rename of entries not working + +- [1897571] Linux: Fix right clicking on system tray icon not working + +- [1897356] Fix User category based searches unreliable dues to caching issues + +- PDF Output: Don't fail if the input document has an html file identified as the cover + +- [1897467] Catalog generation: Fix a regression that broke generating catalogs if books with no comments are present. + +- [1897314] Windows: Fix error on first run of calibre after install/upgrade + +- Viewer: Fix copy to clipboard button in toolbar not working + +- [1897297] Viewer: Fix copy to clipboard not copying text as HTML to clipboard in addition to plain text + +- [1897409] macOS: Fix some drop down menus such as in the ToC editor and cover generation in the Edit metadata dialog not working + + + +}}} + +{{{ 5.0.1 2020-09-25 +:: new features + +- [major] For details on the major changes in calibre between 4.0 and 5.0, see https://calibre-ebook.com/new-in/fourteen + +- [major] E-book viewer: Add support for highlighting + +https://manual.calibre-ebook.com/viewer.html#highlighting-text + +- [major] E-book viewer: Add support for right-to-left and vertical text + +- [major] Switch calibre to Python 3. This means that some no longer maintained third party plugins will not work. See https://www.mobileread.com/forums/showthread.php?t=326405 + +- Dark mode support for the Content server and E-book viewer user interfaces + +- Content server's in-browser viewer now supports bookmarking. Bookmarks and highlights are auto-synced across devices. + +- Regular expression engine used for searching the book list and reading metadata from filenames has been made more powerful. + + +:: bug fixes + +- Version 5.0.1 fixes a bug in 5.0.0 that broke connecting to devices on macOS + + + +}}} + +{{{ 4.23.0 2020-08-21 +:: new features + +- Kobo driver: Add support for new firmware. Also add recognition of 'Kobo Plus' subscription books + +- [1889925] Edit book: Allow customizing the base background/foreground and link colors for the preview window. + +- [1891765] Quickview panel: Add actions to the context menu to search for book in library, open in E-book viewer, etc. + +- Allow forcing calibre to remember the column width for the On device column by right clicking the header of the column + +- Template language: Add functions to perform math operations to convert fractional numbers to integers + + +:: bug fixes + +- [1849958] Content server: Fix Esc key not working in several views. + +- [1889973] Fix unable to delete default value for custom columns of boolean type, once set + +- EPUB Output: When splitting don't consider files with only a single character empty + +- Comments editor: Fix syntax coloring for HTML view in dark mode + +- Book list: When a series column is not wide enough, elide text in the middle so that the series number is visible + + +:: new recipes + +- linuxnews.de and t3n.de by Volker Heggemann + +:: improved recipes + +- El Diplo +- calibre Blog +- ESPN +- LA Times +- Winnipeg Free Press +- Popular Science +- Science News Recent Issues + + +}}} + {{{ 4.22.0 2020-07-31 :: new features @@ -21133,3 +22981,5 @@ calibre is now able to download social metadata like tags/rating/reviews etc., i - Critica Digital - Infobae - Spiegel International + +}}} diff --git a/Changelog.txt b/Changelog.txt index 737a06b259..26de694ecc 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -6,7 +6,7 @@ # to the ticket list. # Also, each release can have new and improved recipes. -# {{{ 6.x.0 2022-xx-xx +# {{{ 7.x.0 2024-xx-xx # # :: new features # @@ -23,6 +23,1187 @@ # - title by author # }}} +{{{ 7.7.0 2024-03-14 + +:: new features + +- [2056116] Trim image: Allow specifying the size of the trim rectangle using numbers + +- [2056664] Full text search: Allow pressing Ctrl+S to select the current book in the calibre book list + +- Speed up scrolling through the book list book-by-book by only redrawing the book details panel when scrolling pauses + +- Add a tweak under Preferences->Tweaks to set the first day of the week in popup calendars used to input dates + +:: bug fixes + +- [2056470] CHM Input: Handle CHM files with no HHC Table of Contents + +- E-book viewer: Fix a few settings such as pages per screen and header/footers not being saved in profiles + +- [2056614] Full text search: Ignore text inside tags when indexing books + +- DOCX Output: Preserve spaces around soft hyphens + +- Book details popup: Copy the original cover image to the clipboard even when the image is rescaled to fit into view + +- Fix changing disabled colors in the UI color palette overriding non-disabled colors + +- Content server: Fix deletion of viewer profiles not working + +:: improved recipes +- LiveMint +- Bloomberg +- National Geographic +- MIT Technology Review +- NY Review of Books +- Scientific American + +}}} + +{{{ 7.6.0 2024-03-01 + +:: new features + +- [major 1979022] E-book viewer: Allow saving current settings in "profiles" that can be quickly and easily swapped between + + To create a profile or switch to a previously saved profile access "Profiles" from the viewer controls or press the `Alt+P` shortcut. + +- [2053144] Edit book: Add a shortcut `Ctrl+M` to merge selected files + +- Get books: Add support for Amazon Mexico + +- A new toolbar button to show all available actions in sub menus. Can be added via `Preferences->Toolbars & menus` + +- Edit book: Allow selecting multiple books to edit at once, opening all selected books in separate editor instances + +:: bug fixes + +- [2054617] Cover grid: Fix dragging the mouse while holding shift to extend the selection not working well + +- [2054934] E-book viewer: Fix doing a multi-page selections sometimes causing the start of the selection to move backwards + +- Edit book: Live CSS: Fix regression causing incorrect colors in calibre 7 + +- Windows: Fix a regression in calibre 7 that caused images in long text columns to not be displayed in the tooltip for the column + +- Fix disabled items in menus having blurry text + +- Content server: Fix a regression in the previous release that caused an error when doing a search/sort on some browsers + +:: improved recipes +- New Yorker +- Moneycontrol +- Swarajya Mag +- nautil.us +- Pro Physik + +:: new recipes +- The Week UK by unkn0wn +- Andhrajyothy by unkn0wn +}}} + +{{{ 7.5.1 2024-02-09 + +:: new features + +- Allow fine tuning the colors used in the calibre interface via Preference->Look & feel->Main interface->User interface colors + + Note that calibre no longer follows system colors on Linux in light mode (the only place where it used to follow system colors). However, + there is a convenient button in the manage colors dialog to import the system colors. + +- [2052462] Full text search window: Add actions to the context menu to open the book at the clicked on result. Also allow using the view book shortcut to open the currently selected search result + +- [2052460] When showing the Full text search dialog, pre populate the search box with the contents of the main calibre search box if the main search box contains a simple search + +:: bug fixes + +- Windows: Fix a regression in 7.0 that caused images referring to files on the disk within comments columns to not display in some circumstances + +- E-book viewer: Fix clock showing hour as zero instead of 12 between 12 and 1 am/pm + +- [2050100] Edit book: When wrapping selected text in a tag, preserve the selection after wrapping + +- [2050075] Content server: Fix a periodic spurious error message when reading books in offline mode + +- Fix a regression in 7.0 caused by a regression in Qt that would result in calibre hanging rarely when using the cover browser view + +- [2049992] Fix custom template functions not useable in save to disk templates + +- Fix a regression in 7.2 that caused the popup used for editing fields in the book list to be mis-positioned on very wide monitors + +- [2052766] Version 7.5.1 fixes a bug in 7.5.0 where calibre would not start up using dark colors when the system was in dark mode on some windows installs and another bug that could cause errors when using cover grid mode with covers stored in CMYK colorspace + +:: improved recipes +- El Diplo +- Science News +- Barrons +- Financial Times +- Business Today +- Le Monde +- Scientific American + +:: new recipes +- ugeskriftet by morusn +- Martin Fowler and Gitbug Blogs by Lucas Lois +}}} + +{{{ 7.4.0 2024-01-19 + +:: new features + +- Add support for the Supernote A6X2 + +- Narrow layout: Place the cover browser above the book list when the window has a narrow aspect ratio. Can be controlled via Preferences->Look & feel->Cover browser + +- [2046825] Category notes browser: Add a button to search for books in the currently selected category + +- Keyboard shortcut to toggle main window layout (Alt+Shift+L) + +- Add the possibility to assign keyboard shortcuts to the tag browser sort functions + +:: bug fixes + +- Fix a regression in the previous release that caused the cover browser to not close when it is used as a separate window + +- Fix a regression in the previous release that caused comments in the Book details panel to be rendered below rather than at the side of the other information + +- Edit book: Fix highlighting of special character not changed immediately after it is edited, only after a subsequent action + +- Move Preferences to the left in the default toolbar, making it less likely to be hidden behind an expander button on small screens + +:: improved recipes +- LiveMint +- Foreign Policy +- New Scientist + +}}} + +{{{ 7.3.0 2024-01-05 + +:: new features + +- Tag browser: Allow displaying buttons next to items that have notes or links for easy access. To enable these use Preferences->Look & feel->Tag browser + +- Font subsetting: Preserve all OpenType layout features. This makes the subset font files larger but ensures there is no visual degradation when using the subsetted font files in different contexts. + +:: bug fixes + +- macOS: Fix main calibre window not remembering its size on restart + +- Edit book: Fix the "Show changes" window moving down by a few pixels every time it is opened + +- [2047257] Amazon metadata download: Fixes for various changes to amazon website markup + +- [2047181] E-book viewer: When looking up words via Google in Europe pre-approve the GDPR consent cookie + +- [2046673] Windows: Content server: Fix regression in previous release that broke testing for local connections + +- Windows: Fix a regression that caused selections in the Book details panel to be invisible + +:: improved recipes +- Foreign Policy +- Business Standard +- Il Post +- Indian Express +- Washington Post + +:: new recipes +- Hindustan Times Print Edition by unkn0wn +}}} + +{{{ 7.2.0 2023-12-15 + +:: new features + +- Content server: Also listen for all incoming IPv6 connections by default, not just IPv4 + +- Book details: Allow deleting notes from the right click menu easily + +:: bug fixes + +- [2045133] Windows: Edit book: Fix a crash when using the check book tool with a book that contains malformed markup, in calibre 7 + +- Windows: Improve flickering at startup due to a Qt regression in calibre 7 + +- Windows: Fix moving the main window causing popup/floating windows to be resized in calibre 7 due to yet another Qt regression + +- [2044659] Fix detection of existing books on the Tolino Vision 6 + +- Linux: Fix external applications not being launched under Wayland in calibre 7 because of a bug in Qt + +- Linux: Fix network requests failing under Fedora in calibre 7 + +:: improved recipes +- Reuters +- Scientific American +- New Yorker +- Times of India +- infzm +- singtaohk + +}}} + +{{{ 7.1.0 2023-11-23 + +:: new features + +- Notes editor: When pasting HTML with images offer to download remote images in the pasted content + +:: bug fixes + +- Fix regression in 7.0 that broke restore of db from backups + +- Content server: Fix newly added books on homepage not restricted to the books the logged in user is allowed to access + +- [2044118] When starting in system tray do not flash the main window briefly + +- Notes editor: Fix spurious error message when saving a note that contains pasted HTML that refers to an image + +- [2043998] Fix a regression that caused the unknown/unset date to incorrectly be displayed/edited as a date in the year AD 101 in some timezones + +- Comments editor: Fix data file links not working + +- Linux installer: Check that the user has libxcb-cursor.so.0 installed. If not quit early with an error message asking them to install it + +- [2044408] LRF Output: Fix a regression in 7.0 that broke conversion to LRF + +:: improved recipes +- Substack + +:: new recipes +- The World Ahead by unkn0wn + +}}} + +{{{ 7.0.0 2023-11-17 + +:: new features + +- [major] For details on the major changes in calibre between 7.0 and 6.0, see https://calibre-ebook.com/new-in/sixteen + +- [major] The ability to add notes to any author, tag, series, etc. in calibre with links and images + +- [2040487] E-book viewer: Add support for HTML image maps + +- Content server home page: Show the three most recently added books in the default library + +- Content server: Add button to return to book details page at top level of controls + +- Content server: Book details: Open links in comments fields in the same window rather than a new window + +- Add a "Show items in selected books" choice to Manage tags and Manage authors + +- [2042804] Show the previously used language, if any, second in the language selection drop down + +:: bug fixes + +- [2043415] Metadata download: Fix downloads from Google not working in Europe because of the GDPR + +- [2042815] Fix editing dates with days sometimes off by one day + +- Edit book: Fix highlighting for special characters not visible when the cursor is on the line with the special character + +- [2043248] Fix embedding metadata showing infinite error dialogs on windows if a file is locked by another program + +- [2040074] PDF Output: Fix an error when trying to set header or footer with an input document that has invalid content after the main body + +- [2042791] E-book viewer: Read aloud: Fix clicking on empty spaces causing read aloud to restart from beginning of chapter + +- Book details: Fix Manage authors via context menu not working + +- [2041848] E-book viewer: Showing chrome should close an active footnote popup first and only show chrome if the user repeats the action + +- [2041745] Fix very slow metadata updates on some PDF files + +- [1262875] Catalogs: Do not erase any tags the user adds to a catalog book entry when the catalog is re-generated + +- [2042748] Linux installer: Fix downloading of signatures for older versions + +- [2041357] Fix editing non-active Virtual library changes sort of book list + +:: improved recipes +- Financial Times +- National Geographic +- Hamilton Spectator +- Times of India +- MIT Tech Review +- Bloomberg +- Washington Post +- Project Syndicate +- Cumhuriyet +- Foreign Affairs +- Harvard Business Review +- Wall Street Journal + +:: new recipes +- Bookforum, Kirkus Reviews and Poetry Magazine by ping + +}}} + +{{{ 6.29.0 2023-10-20 + +:: new features + +- [2038760] E-book viewer: Add a command line flag --new-instance to force the viewer to open a new window even if the option to always use a single viewer window is set + +- [2038862] E-book viewer: Image popup: Add a checkbox to remember the last used zoom level + +:: bug fixes + +- [2039336] Annotations browser: Fix exporting highlights in markdown not including all chapter titles for books with only a single highlight per chapter or a multi level ToC + +- [2038747] E-book viewer: Fix a regression that could cause the viewer to enter an infinite loop when displaying the result of a search that has only one match that is not found + +- [2038747] E-book viewer: Fix the occasional search result being marked as not found even though it is found + +- [2039474] TXTZ Output plugin: Only keep images if the text format is one that can reference images + +- [2038848] TXTZ Output: Fix cover not being properly identified in the generated TXTZ metadata + +- [2038575] FB2 Input: Fix the "Annotations" section not showing up in the Table of Contents + +- [2039395] Linux: Content server: Do not call listen on pre-activated sockets + +- Fix sort order of similarly-named hierarchical categories + +- [2038778] Fix a regression that broke reading of covers from HTMLZ and TXTZ files + +:: improved recipes +- Wall Street Journal +- Scientific American +- 1843 +- Financial Times +- Spectator Magazine +- El Diplo +- Wasshington Post +- national Geographic + +:: new recipes +- Project Syndicate, Scroll.in and Newslaundry by unkn0wn +}}} + +{{{ 6.28.1 2023-10-07 + +:: new features + +- Edit book: Show full path to book being edited in the status bar + +- Edit book: When adding dictionaries allow directly a LibreOffice adding the dictionary just by choosing the language + +- Edit book: When saving a copy add some convenience actions to edit the copy immediately either in the current editor window or a new window + +- E-book viewer: Highlights panel: Allow right clicking to export only selected highlights + +:: bug fixes + +- [2034900] Edit book: Fix a regression in the previous release that caused Text search to sometimes not select matches correctly + +- [2037198] Edit book: When copying files do it in order so that the files are pasted in the same order when pasting into another editor instance + +- Edit book: Fix smart tag insert not working correctly if the selected text starts with the closing angle bracket of a tag + +- [2038238] Save to disk: Do not limit the total path length to 240 characters on non-Windows platforms + +- [2037898] Fix incorrect cover for AZW3 version of calibre User Manual + +- [2029723] Content server: Change formatting of book details to match new de-emphasized titles based formatting in the main calibre program + +- PDF metadata: Fix a regression that broke updating metadata in PDF files without an /Info dictionary + +- NOOK driver: For the Glowlight 2023 write the calibre metadata files into the NOOK sub-folder as the firmware does not allow writing files into the root folder + +- NOOK driver: Fix Glowlight 2023 not being detected on Linux and macOS + +- [2037454] E-book viewer: Make CFI calculation more robust especially on pages with very little content + +- [2037543] E-book viewer: Workaround bug in Chromium where getBoundingClientRect() fails sometimes leading to incorrect calculation of anchor positions + +- [2037237] Fix errors caused by .DS_Store files inserted into the .caltrash directory on macOS if the user happens to open .caltrash in Finder + +- Version 6.28.1 fixes a regression in 6.28.0 that could cause errors when merging some book records in calibre + +:: improved recipes +- National Geographic +- Bloomberg +- Endgadget +- Times of India +- Horizons + +:: new recipes +- Business Standard Print Edition by unkn0wn + +}}} + +{{{ 6.27.0 2023-09-22 + +:: new features + +- [2034954] Kobo driver: Support updated firmware + +- Nook driver: Add support for Nook Glowlight Plus 2023 + +- Data files manager: Allow drag and drop of files onto the list of files to add new files + +- [2036266] Support 7z archives in addition to ZIP and RAR for automatic ebook extraction when adding to calibre + +- Linux/macOS: Fix detection of the Kindle Scribe with MTP firmware + +- [2034760] E-book viewer: Allow clearing list of recently opened books from the open button's popup menu itself + +- [2034905] E-book viewer: Add a link to show the currently viewed book in the calibre library (Go to->Metadata in the viewer controls to access it) + +:: bug fixes + +- [2034999] Book information dialog: Fix a regression that caused incorrect color for titles in dark mode + +- [2034977] E-book viewer: Fix searching does not jump to first match if all matches are before current position in book + +- [2035579] EPUB Output: Fix duplicated title page entry in spine for books that define a title page that ends up getting split + +- Allow for-light/dark-theme icon overrides to work for plugin icons placed by the user in the override folder + +- [2035338] PDF Output: Fix background image + text not rendering correctly if the same background image is used with different text multiple times + +- [2034968] E-book viewer: Fix section titles in highlights panel being incorrectly expanded to full titles when the section has multiple highlights + +- [2035039] Prevent Quickview window from opening in the background + +- [2034900] Edit book: Fix a regression in the previous release that caused an error when doing text based searches + +:: improved recipes +- Ambito +- Financial Times +- USA Today + +:: new recipes +- Times of India Print Edition by unkn0wn +- Hindu Feeds based by unkn0wn +- Australian Financial Review by unkn0wn +}}} + +{{{ 6.26.0 2023-09-08 + +:: new features + +- MTP driver: Support the new Kindle Scribe firmware that causes it to act as an MTP device instead of USB disk. Add "Documents" to the list of default folders to send books to + +- E-book viewer: Add a new option under scrolling behavior to control horizontal mouse wheel events jumping to next section + +- Allow full customization of Book details styling via Preferences->Look & feel->Book details + +:: bug fixes + +- [2023046] Get books: Update ebooks.com plugin for website changes + +- Edit book: Fix searching for non-BMP unicode characters highlighting only half the character + +- [2034075] E-book viewer: Fix displaying more than one page per screen causing page layout to be slightly wrong for some books + +- [2034404] E-book viewer: Fix clicking on links with empty destinations hanging the viewer + +- [2033981] E-book viewer: Fix modifying, then jumping to and then modifying the highlight again from the highlights panel causing the highlight to be deleted + +- [2033205] E-book viewer: Fix indication of current section in Table of Contents sometimes wrong after changing font size + +- [2033530] E-book viewer/Content server: Disallow browser native context menu when right clicking in sandboxed iframes + +- MTP driver: Ignore top level folders whose names start with a leading dot Also ignore AppleDouble files, top level system and fonts folders and sdr folders on Kindle devices + +- [2033074] FB2 Input: use the

    tag for paragraphs that dont contain other block content + +- [2033118] E-book viewer: Fix clicking on the back/forward buttons not working in some situations + +- [2032974] E-book viewer: Fix scrolling to Table of Contents items that are within a single internal file not activating the back button + +- [2032694] E-book viewer: Fix changing the sans-serif font without also changing the monospaced font not working + +:: improved recipes +- National Geographic +- Sportstar +- Bangkok Post +- MIT Technology Review +- Bloomberg +- Economic Times India +- Firstpost + +:: new recipes +- Nikkei Asia (Magazine) and Espresso by unkn0w7n +}}} + +{{{ 6.25.0 2023-08-18 + +:: new features + +- [2029723] Book details panel: De-emphasize titles making the actual data stand out more + +- Allow using the new manage data files dialog from within the edit metadata dialog + +- [2030342] Trash dialog: Allow right clicking on an entry to save it to disk + +- [2027794] When merging books by drag-and-drop add an option to use the dragged cover instead of the cover in the target book + +- [2031571] Create catalog: Add buttons to easily select all/non/visible fields when creating CSV/XML catalogs + +- [2031570] Preferences->Add your own columns: Add buttons to show/hide all columns + +:: bug fixes + +- [2031341] Fix a regression in the previous release that broke parsing of some ISO-8601 timestamps + +- [2030671] E-book viewer: Mouse wheel horizontal events should jump sections not internal file boundaries + +- [2031569] Fix Preferences->Add your own columns changing check state on moving columns + +- Get books: update various Polish e-book stores for website changes + +- [2029521] E-book viewer: Fix CFI parsing of numbers with trailing zeros causing some bookmarks to not work + +- [2029521] E-book viewer: Show an error when creating a bookmark if the bookmark position is not found + +- [2031047] CBR Input: Fix comics with extremely long internal filenames not working on Windows + +- Edit book: Saved searches: Fix incorrect import in generated source code for some builtin functions + +- Data file manager: Fix errors on systems with larger font sizes + +:: improved recipes +- Bloomberg +- Focus +- Epoch Times +- Hindu +- Business Today +- NYTimes + +:: new recipes +- The Oldie by Sophist +- Various new Russian and Ukrainian news sources +}}} + +{{{ 6.24.0 2023-08-04 + +:: new features + +- [2023509] Easily manage the extra data files associated with a book by right clicking the edit metadata button and choose "Manage data files" + +- [2028216] Content server: Full text search: Allow searching a restricted subset of books + +- [2029014] E-book viewer: Allow clicking on calibre:// URLs + +- [2027727] Support fuzzier searching in the Tag browser find allowing unaccented characters to match accented ones + +:: bug fixes + +- [2028404] DOCX conversion: Preserve underline style and color + +- Fix incorrect sorting of device view after some operations + +- [2023737] Fix visit content server in browser not working when the content server is configured to listen on an IPv6 interface + +- [2028019] Fix one hour offset in some timezones on Windows for dates before 1970 + +- [2027763] Windows: HTML Input: Fix error when trying to add HTML files with links to other files that are invalid pathnames + +- Get books: Update various Polish language book stores for website changes + +:: improved recipes +- Epoch Times +- Business Standard +- Tagesspiegel +- Bloomberg Business Week +- MIT Technology Review +- Live Mint +- Private Eye + +:: new recipes +- The New Republic Magazine by ping +- Inc42 by unkn0w7n +}}} + +{{{ 6.23.0 2023-07-14 + +:: new features + +- [2025942] PDF Output: Add an option to discard the book cover instead of inserting it as the first page of the PDF + +- [2025333] Content server: Allow opening the Book details page for a matched book from the Full text search results page + +:: bug fixes + +- Windows: Fix empty data folder getting created in the directory calibre is launched from when changing title/author for a book with an existing data folder + +- [2026795] LIT Output: Fix error converting anything to LIT on Windows + +- Ignore failures to expire old trash during startup + +- [2025786] Amazon metadata download: Fix retrieval of publisher information from amazon.fr + +:: improved recipes +- Guardian & Observer +- Washington Post +- Private Eye +- Associated Press + +:: new recipes +- ThePressProject by Sotiris Papatheodorou +}}} + +{{{ 6.22.0 2023-06-30 + +:: new features + +- Kobo driver: Add support for newest firmware + +- [2023604] Trash bin: Allow setting removed books to be permanently deleted on library close + +- Windows: Nicer error message when file/folder is locked in another program + +:: bug fixes + +- PDF Output: Fix regression that caused large slowdowns when converting books with lots of internal HTML files to PDF + +- [2024139] CHM Input: Fix ToC entries that use fragments not supported + +- E-book viewer: Fix searching for text next to hidden text not scrolling to the match + +- [2024375] E-book viewer: Fix selection popup not showing for some books on some platforms when the selection is in the top line + +- [2024433] DOCX Output: Fix multiple SVG images in the input document causing all the SVG images in the output to be just one of the input images + +- [2023943] MOBI Input: Ignore another form of corruption in trailing bytes + +:: improved recipes +- Foreign Affairs +- Nature +- Bloomberg +- LiveMint +- Hindu and Hindu Business Line +- Deautsche Welle +- Horizons +- Indian Express +- Psych +- Harper's Magazine + +:: new recipes +- Radio Canada by quatorze +- Deutschland Funk by Armin Geller +}}} + +{{{ 6.21.0 2023-06-13 + +:: new features + +- DOCX Output: Add support for SVG images + + Now the generated DOCX will contain both the rasterized version of the SVG + image and the original SVG image as the preferred source, which is supported + by modern versions of Word. + +- [2023367] E-book viewer: Allow configuring the actions triggered by touch gestures + +- DOCX Input: Add support for SVG images + +:: bug fixes + +- Windows: Fix a regression in the previous release that could cause files to be deleted if one of the files/folders was open in another program while changing title/author in calibre + +- [2023395] macOS: Fix extra dock icons visible when doing a job using Qt WebEngine such as converting to PDF or searching in Get books + +- [2023476] macOS and Linux: Fix an error when changing metadata or deleting books whose files are owned by another user + +- [2023377] CHM Input: Yet another regression opening CHM files with missing internal files on windows + +- [2023431] CHM Input: Resolve absolute links to resource files from the root of the CHM file + +:: improved recipes +- Guardian & Observer +- Harper's Magazine Print recipe +- Live Mint + +:: new recipes +- The India Forum by unkn0wn + +}}} + +{{{ 6.20.0 2023-06-09 + +:: bug fixes + +- [2021413] CHM Input: Fix a regression in the previous release that broke conversion of CHM files + +- Windows: Make moving files in the calibre library folder more robust, locking folders in addition to files, before the start of the move + +- [2023046] Get books: Update Barnes and Noble store plugin for website changes + +- [2023189] Kindle output: Only re-encode JPEG images with EXIF metadata if the metadata contains actual transpose operations + +- [2023041] PDF Output: Fix error when input document contains multiple instances of a font some with vertical metrics and some without + +- PDF Output: Fix using CSS Multicolumns for body causing conversion to fail when header/footer is specified + +- [2022035] MOBI Input: Fix a crash when converting some corrupted palmdoc compressed MOBI files + +- [2021554] E-book viewer: Ensure CSS stylesheets are interpreted as UTF-8 + +:: improved recipes +- Foreign Affairs + +:: new recipes +- Prospect Magazine UK (Free) by ping +}}} + +{{{ 6.19.1 2023-05-29 + +:: new features + +- HTML Input: Restrict adding of resources like images to only files within the folder hierarchy starting at the parent folder of the root HTML file + Can be controlled by customizing the HTML to Zip plugin in Preferences->Plugins or the --allow-local-files-outside-root option to the + ebook-convert command + +:: bug fixes + +- PDF Output: Fix regression in previous release causing non-English entries to be incorrectly encoded into the PDF bookmarks + +- PDF Output: Fix regression in previous release that caused blank pages when generating headers or footers + +- [2021367] Book list: Fix editing-in-place not pre-selecting existing text for some column types + +- Amazon.de metadata download: Update for site changes + +- PDF Output: Set /Creator and /Producer in /Info + +- [2020906] Fix row height incorrect in Manage category dialog when blank + +- [2021452] 6.19.1 fixes a bug in 6.19.0 that broke the edit metadata dialog + +}}} + +{{{ 6.18.1 2023-05-26 + +:: new features + +- [2020603] Cover download: Allowing saving alternate covers to disk or in the book's data folder by right clicking on the cover + +- [2020237] Content server: Allow disabling full text search via the web interface + +- [2020233] When sending books to the device confirm the overwrite if the book already exists on the device + +- E-book viewer: Handle horizontal wheel events as section jumps in paged mode + +- Comic Input: When grayscaling comic images use 16bit gray instead of 8bit for better fidelity + + When using the PNG format for images this results in larger files but with better grayscaling fidelity. + +- Add a new option in Preferences->Searching to disable keyboard searching in book list (i.e. you can turn off the behavior that pressing a key will jump to the first book whose title starts with that letter) + +- [2018423] Manage categories dialog: Use alternating row colors and allow adjusting row height + +- Allow assigning a keyboard shortcut in Preferences->Shortcuts to open the data folder of a book + +- Various improvements to syntax highlighting for the Markdown long text editor + +:: bug fixes + +- [2018025] Fix a regression in 6.16 that broke restoring of the database + +- Tag browser: Fix using F2 to edit items not allowing completion + +- Book details: Fix formatting of text when copying all book details in narrow mode + +- Book details: Fix copy all not respecting line breaks in fields + +- [2018660] Fix a regression in previous release that broke scrolling when using the scroll_per_row tweak + +- [2018548] Fix a regression in the previous release that broke the category manager dialog in some situations + +- 6.18.1 fixes a regression that broke setting metadata and generating PDF files in the macOS and Linux binary builds + + +:: improved recipes +- NYTimes +- Economist +- Washington Post +- Irish Independent and Irish Times +- Live Mint +- Psych +- Hindu + +:: new recipes +- Irish Times Free by unkn0wn +- elEconomista.es and El Confidencial by Hugo Meza + +}}} + +{{{ 6.17.0 2023-04-26 + +:: new features + +- Font subsetting: Add support for WOFF format fonts and CID keyed fonts. Also further reduce file sizes when subsetting + +- Book details: Show a link to open the data files folder when data files are present + +- Template language: Add various functions to query the extra files associated with a book + +- [2017195] Edit book: Compress images: Support compression of images in the WEBP format as well + +- Comments editor: Add buttons to create links to data files and also to folders easily when inserting a link + +- Allow displaying the id, formats and path builtin columns via Preferences->Add your own columns + +- [2017232] Trash bin: Add a button to clear the bin + +- Metadata editor: Use a dedicated editor with preview for custom columns that store Markdown formatted text + +:: bug fixes + +- Fix a regression in the previous release that could result in empty author folders remaining in the library when the author of a book is changed + +- [2017373] Fix the data files associated with a book not being handled when using the Merge books and Copy to library functions + +- Fix a regression in the previous release that broke some operations in the Manage tags/authors/etc. dialogs + +- [2017217] Ensure metadata.opf is always written when deleting book even if it is not sequenced for backup + +:: improved recipes +- Scientific American + +}}} + +{{{ 6.16.0 2023-04-20 + +:: new features + +- [major] Allow storing extra data files with a book + + Right click the Add books button to add arbitrary files as "data files" to a book record. + These are managed by calibre along with the book files, but cannot be used for conversion/viewing. + Select a book and press the "O" key to view the data files in your file explorer. + +- Allow undoing the deletion of books from the calibre library + + Now deleted books are stored in a calibre "Trash bin" from which they can be restored with + a single click. To view the trash bin, right click the "Remove books" button. + +- [2016070] Kobo driver: Add support for the new Kobo Elipsa 2E + +- Book details: if an item has an associated link then offer that link in the item's context menu + + +:: bug fixes + +- [2015617] Content server viewer: Fix end of chapter content being occasionally skipped when scrolling by screen full with multiple pages + +- [2017130] E-book viewer: Fix a regression that caused notes from a different highlights to be shown in some situations + +- [2015795] E-book viewer: Show an error if the user tries to search for only punctuation or spaces in the search modes that ignore these + +- Fix custom columns not showing in Book details links from other libraries + +:: improved recipes +- Frontline +- Outlook Magazine + +:: new recipes +- Bar and Bench by unkn0wn +- The Washington Post Print Edition by unkn0wn +}}} + +{{{ 6.15.1 2023-04-07 + +:: new features + +- Allow adding external links to tags, series, publishers, etcetera in addition to authors + + The links show up as click-able icons in the book details panel. They can be set by right clicking the + author/tag/etc. in the Tag browser and choosing "Manage" + +- calibre:// URL scheme: Add support for a new type of URL that pops up the metadata of the specified book in a new window + + Works even with books not in the current library. See https://manual.calibre-ebook.com/url_scheme.html#open-a-book-details-window-on-a-book-in-some-library for details + +- EPUB Output: Do not shrink images to fit the screen size by default, as modern readers do this themselves well enough. Can be controlled via an option in the EPUB Output section of the conversion dialog + +- Edit metadata dialog: Add buttons to pop up the category editing windows easily + +- [2012304] Metadata download: Allow specifying rules to transform publisher names in addition to author and tag names + +- [2007764] Edit metadata dialog: Use both a colored border and an icon to indicate errors in line edits + +- A new tweak in Preferences->Tweaks to control what program is run when clicking on URLs in calibre + +:: bug fixes + +- [2009586] E-book viewer: Fix a regression that caused incorrect highlight collision detection in some books + +- E-book viewer: Fix images embedded inside SVG tags not available for viewing in a pop-up + +- [2013972] DOCX Input: Do not ignore images that are present as fallbacks for a word drawing object + +- Comic Input: When converting grayscaled PNG images to PNG ensure output images are stored as indexed PNG + +- [2012797] Fix active tab not easy to distinguish in dark mode + +- [2011755] Content server: Fix re-opening book from home page after making progress not opening to correct last read position when a user is logged in + +- [2012760] Comments editor: When copying to clipboard, copy clean HTML rather than the junk Qt produces + +- Version 6.15.1 fixes an issue with the new URL scheme popping up incorrect book details windows + +:: improved recipes +- Saechsische Zeitung +- LA Times +- Mediapart +- Live Mint +- The Hindu + +:: new recipes +- Tehelka by Areet Mahadevan +- The Wire by unkn0wn + +}}} + +{{{ 6.14.1 2023-03-16 + +:: new features + +- [2007765] Edit metadata: When setting a cover from comic files allow choosing which page to use as the cover + +- [2009304] Allow display of columns built from other columns as comments in Book details + +- Comments editor: Add a shortcut for "Paste and match style" (Ctrl+Shift+V) + +:: bug fixes + +- [2004639] macOS: ToC Editor: Fix mouse becoming unusable when trying to create a new entry + +- When computing title sorts strip leading and trailing quotes, not just leading quotes + +- [2009268] Content server viewer: Fix searching only showing results from the current chapter onwards + +- [2009735] Check book: Fix some incorrect line numbers reported in a few CSS error messages + +- Fix regression in 6.14.0 that caused some generated resources to be excluded from the calibre source bundle + +- [2011586] Fix regression in 6.14.0 that broke using paths with single quotes in them for the calibre library + +- Fix ToC Editor on macOS in 6.14.0 not working inside the Edit book tool only + +:: improved recipes +- Strange Horizons +- The Saturday Paper +- New Scientist +- The Mainichi +- DR Nyheder +- New York Magazine +- Bloomberg +- Deccan Herald +}}} + +{{{ 6.13.0 2023-02-17 + +:: new features + +- Content server: E-book viewer: Long tapping on an image now causes it to be displayed in an internal popup rather than a new window as some browsers block the creation of new windows + +:: bug fixes + +- E-book viewer: Fix some adjacent highlights with nothing in between them not being displayed. + +- [2006726] Content server: Workaround for Safari regression causing bookmarks to disappear on reload + +- [2007039] E-book viewer: Read aloud: Fix a regression in the previous release that caused the Read aloud controls to not reappear when Read aloud is canceled and restarted + +- [2006062] E-book viewer: Read aloud: Fix a regression in the previous release that caused an error when using Read aloud on a chapter with no text, such as the cover page + +- E-book viewer: Fix a regression that caused a spurious error on Windows when reading out selected text + +- [2007165] Fix a regression in calibre 5.0 that broke sorting the device view by title if one of the books has an empty title + +- Edit book: Spell Check dialog: Fix second word not getting selected when after first word is fixed + +- [2004621] Improve hover highlight color in tree views + +:: improved recipes +- CNN +- Bloomberg + +:: new recipes +- The Economist Espresso by unkn0wn +- Science X by unkn0wn +- Horizons by unkn0wn +- Deccan Herald by unkn0wn +- The Monthly by unkn0wn +}}} + +{{{ 6.12.0 2023-02-03 + +:: new features + +- E-book viewer: Read aloud: On Windows switch to using the new Microsoft speech subsystem with access to more voices + + Note that this means that old voice, speed and audio devices setting will not be used so a reconfiguration might be needed. + +- [2003712] calibre:// URL scheme: allow specifying a Virtual library for show_book URLs + +- [2003227] Add by ISBN: Allow adding using identifiers other than ISBN as well + +- Update bundled Qt to 6.4 this means calibre on macOS is now only supported on Big Sur and newer + +- Spell check dialog: Allow up and down arrow keys to work regardless of focus + +- [2002257] Allow multiple Template tester dialogs + +:: bug fixes + +- Windows MTP device driver: Ignore failure to enumerate objects inside non-root folders + + There are apparently a lot of devices out there that fail in this way. + So rather than aborting the scan simply ignore the folder. + +- Book list: Fix a regression in the previous release that broke dragging to select multiple books + +- [2004197] Content server viewer: Fix reload book not actually reloading until the browser is also refreshed + +- [2003916] E-book viewer: Fix occasional false warning about highlight being overwritten + +- [2003908] E-book viewer: Fix detection of selected highlights when all text is selected + +- [2003729] Fix an error when embedding metadata into a large number of books + +- [2004522] When updating metadata in EPUB 2 files and no language is specified, do not remove the tag as this causes epubcheck to complain. Instead set the language to "und" + +- [2004083] Wireless device driver: Remove the timeout for initial connection + +- [2003652] Use an icon rather than a color to report errors in fields and the search box + +- Conversion dialog: Regex builder: Workaround bug in Qt that prevented searching for non breaking spaces in the wizard used to test search expressions + +- [2002864] Spell check dialog: move down after correcting word, not up + +- [2002534] Get books: Fix Mobileread store plugin not working + +:: improved recipes +- Jerusalem Post +- LiveMint +- The Seattle Times +- India Today +- Outlook Magazine +- Live Mint +- Irish Independent +- Irish Times + +:: new recipes +- Boston Globe Print Edition by unkn0wn +- Observer Reach Foundation by unkn0wn +}}} + +{{{ 6.11.0 2023-01-06 + +:: new features + +- Edit book: Check book: Allow automatic fixing of various simple CSS errors + +- E-book viewer: When Read aloud is speaking, make the control bar translucent so that words under the bar are visible + +- Edit book: Switch to a new library (stylelint) for find problems in CSS as the old library was no longer maintained. + +- Edit book: File browser: Allow using keyboard shortcuts to re-order the spine + +- [1982532] calibredb list: Allow specifying multiple fields for --sort-by + +- [2000037] Check library: Allow opening the book folder easily + +:: bug fixes + +- Fix windows not being moved onto the current monitor when they were previously visible on a removed monitor that was to the left of the current monitor + +- [1999995] Book list: Fix a regression in the previous release that broke drag and drop of multiple books + +- [2000877] Fix detection of Tolino Vision 6 on macOS/Linux + +- [2001880] Content server: Fix auto full screen not working when continuing to read books with user account enabled + +- Edit book: Set semantics: Fix error when setting the "Notes" semantic + +- [1999956] HTMLZ output: Fix images referred to in CSS stylesheets not being converted + +- [2000881] Book details panel: Fix HTML comment tags in the comments breaking display of book details + +- Content server home page: When showing recently read books from across devices hide the entries for which loading the cover fails + +- Windows Text-to-speech: Do not fail to configure if one of the voices has no defined language + +- Fix a regression in calibre 5 that broke using a file for the --extra-css option of ebook-convert + +- Content server FTS: Fix page header bar not visible + +- Content server: Fix identifiers from third party metadata download plugins not becoming clickable links on the book details page + +- Edit book: Warn when saving will overwrite a read-only file + +- Fix restoring geometry of maximized/fullscreen dialogs forcing them visible + +- [1999936] Fix a regression in the previous release that caused spurious error message when doing some out of band searches + +- Fix a regression in the previous release that broke choosing new programs for the Open with function + +:: improved recipes +- PC World +- HNA +- Caravan Magazine +- Harvard Business Review +- Various Israeli news sources + +:: new recipes +- NHK News - by Richard A. Steps +- Globes in English by barakplasma +}}} + +{{{ 6.10.0 2022-12-16 + +:: new features + +- [major] Content server: Add support for searching the full text of books. Simply click the FTS link on the search page to start a full text search. + +- [1998557] Content server: When using user accounts, the homepage now shows recently read books from any device not just the current device + +- [1999685] Kobo driver: Bump the max supported firmware version + +- [1998780] Conversion: New Output profile for the Kindle Scribe + +- [1998705] Check library: Allow ignoring folder names as well as files names + +:: bug fixes + +- [1999349] Edit book: Fix various formatting operations not inserting the tags in the correct place in the presence of non-BMP characters + +- Edit book: Use instead of for strikethrough + +- [1998899] Edit book: Fix export saved search to search panel not preserving the wrap checkbox state + +- [1998767] Content server: Redirect the index page to always have trailing slash when using URL prefixes + +- Book list: Workaround for change in Qt 6 behavior where clicking on an already selected row does not deselect other rows + +- [1998165] Windows: Fix a regression in calibre 6 causing Open with to not extract icons from EXE files + +:: improved recipes +- Indian Express +- Financial Times +- TIME Magazine +- Hindu Business Line Print Edition +- Arts and Letters Daily +- Frontline +- Sportstar +- New Yorker + +:: new recipes +- Fokus by Henrik Holm +- Press Information Bureau by unkn0wn +- Himal Southasian by unkn0wn +- Indian Express Print Edition by unkn0wn +}}} + {{{ 6.9.0 2022-11-25 :: new features @@ -503,1851 +1684,3 @@ Then, when searching the Tag browser, press Ctrl+Alt+Shift+F to restrict the dis - The Economic Times India Print Edition by unkn0wn }}} -{{{ 5.43.0 2022-05-27 - -:: new features - -- Kobo driver: Allow using templates to generate collections - -- [1975406] Book details popup: Double clicking on the cover now uses calibre's internal image viewer. Right click on the cover to open it with another program. - -:: bug fixes - -- [1971461] Fix Book details blank when switching from device view to library view - -- [1973591] TXT Input: Fix a regression in 5.39 that caused the option to remove indents also removing blank lines - -- [1972069] E-book viewer: Fix incorrect sorting of highlights from the first internal file of a book - -:: improved recipes -- Outlook Magazine -- India Today -- The New Yorker -- Foreign Affairs - -:: new recipes -- Asahi Shimbun by Albert Aparicio Isarn -- Business Today Magazine by unkn0wn -- Outlook Business Magazine by unkn0wn -- Donga by Minsik Cho -- Le Monde (English) by Darko Miletic -- Hinduism Today by Vishwas Vasuki -- The MIT Press Reader by yodha8 -- Live Science by yodha8 -- Financial Times Print Edition by Kovid Goyal -- Various Catalan language news sources by santboia - -}}} - -{{{ 5.42.0 2022-05-03 - -:: new features - -- E-book viewer: Ignore accents when doing a search - -- [1969926] Book list searching: Ignore punctuation when searching. So that, for example, Gravitys will match Gravity's - -- [1970045] Show the text used for marking books in the tooltip - -:: bug fixes - -- [1971150] Edit book: Reduce memory consumption by the checkpoint system when doing operations that involve parsing all book files - -- [1971015] Amazon metadata download: Fix titles starting with [ being ignored - -- [1970497] Edit metadata dialog: Undo not working correctly in identifiers field - -- [1970391] Fix viewing LRF files not working - -- [1969981] PDF Output: Fix an error on some invalid CSS in the input document - -- Linux binary: Workaround for Qt WebEngine not working on systems with glibc > 2.33 - -:: improved recipes -- New Yorker -- OMG! Ubuntu! -- ACM Queue -- CACM -- Science News -- Quanta Magazine -- Outlook Magazine -- Indian Express - -:: new recipes -- Caravan Magazine (Hindi) by Areet Mahadevan -- LWN (Free) by yodha8 -- IEEE Spectrum Magazine by yodha8 -- Financial Times by Kovid Goyal -- Cosmos Magazine by yodha8 -}}} - -{{{ 5.41.0 2022-04-22 - -:: new features - -- [1968810] Allow creating multiple types of temporary marks (pins) by right clicking the mark books button (which can be added to the calibre toolbar via Preferences->Toolbars & menus) - -- Kobo driver: Support updated firmware - -- [1967149] Show a popup message when a Kindle is connected mentioning the Amazon cover bug and how to workaround it - -- Edit book: Table of Contents tool: Allow using the title attribute on headings tags to get the text for table of contents entries - -- When creating a custom column to display real (floating point) numbers allow specifying the number of decimal digits when editing values - -- Amazon metadata download: Add support for amazon.in country website - -:: bug fixes - -- [1969302] Edit book: Fix AltGr+{ not working on some keyboard layouts - -- [1967828] TXT Input: Fix rare failure to convert some large TXT files with non-ASCII text - -- Get books: Update English language Amazon plugins for website changes - -:: improved recipes -- The Economic Times India -- Business Standard -- scmp.com -- Wired Magazine Monthly Edition -- Reason Magazine -- The Skeptical Inquirer -- Times of India -- LiveMint -- The Week -- Indian Express -- Hindustan Times - -:: new recipes -- Eenadu by unkn0wn -- Harvard Business Review by unkn0wn -- Hindustan by unkn0wn -- Dainik Bhaskar by unkn0wn -- Free Inquiry by Howard Cornett -- Sportstar by unkn0wn -- Digit Magazine by unkn0wn -- The Diplomat by unkn0wn -}}} - -{{{ 5.40.0 2022-04-01 - -:: new features - -- [1966872] Content server viewer: Allow editing bookmarks - -- [1967028] Read covers from CBC comic files - -- [1966537] Allow filtering authors/tags when creating virtual library based on them - -- [1966851] Add a copy button to the image view popup - -- Template language: Support for nested functions and a string concatenation operator - -:: bug fixes - -- DOCX Output: Fix a comment immediately after a

  • tag breaking the conversion - -- Standalone ToC editor: Fix spurious error message if left open for more than two minutes - -- [1965693] Fix search-as-you-type triggers extra search after manual confirmation - -:: improved recipes -- Courrier International - -:: new recipes -- Reason Magazine by Howard Cornett -- Seminar Magazine by unkn0wn -- Frontline by unkn0wn -}}} - -{{{ 5.39.1 2022-03-18 - -:: new features - -- [1963875] E-book viewer: Allow scrolling of the ToC, highlights, bookmarks, etc. with touch gestures - -- [1963822] Edit metadata dialog: When using the change case operations if some text is selected, only operate on the selected text - -- [1964123] Use atomic writes for the config files ensures no partial data is written in case of crash/powerloss - -:: bug fixes - -- 5.39.1 fixes a couple of regressions that broke case change in the Bulk metadata edit dialog and remembering column widths in the book list on some systems. - -- [1964742] Content server: Fix reading of books with thousands of internal files not working in the Chrome browser - -- [1965182] Catalog generation: Fix a rare crash when generating very large catalogs - -- Edit/Polish book: Fix hardcoded Unicode ligatures not being preserved in AZW3 format books - -- [1963868] Fix automatic searches causing search box to lose focus when search as you type is enabled in Preferences->Searching - -- [1963748] Edit book: Check book: Auto fix package identifier being empty - -- [1963856] Amazon metadata download: Fix getting series info from amazon.jp - -- Edit book: Insert hyperlinks: When sorting anchors on elements without any text content, use the anchor itself - -- Edit book: Make the saved search panel freely resizable - -- Edit book: When dragging to select a region or adjust the selection fix mouse moving outside the image causing the region to no longer be adjusted - -:: improved recipes -- India Legal Magazine -- The Smithsonian -- The Federalist - -:: new recipes -- Swarajya Magazine by unkn0wn -- Open Magazine by unkn0wn -}}} - -{{{ 5.38.0 2022-03-04 - -:: new features - -- [1852929] E-book viewer: When displaying estimated time to completion for reading a book, remember the reading rate the next time the book is opened - -- [1961500] Dark theme: Highlight the current cell in the book list with a lighter background and different foreground to make it more obvious - -- [1961639] An option to disable editing composite columns in the main book list when Tabbing through them (Preferences->Look & feel->Edit metadata) - -:: bug fixes - -- Tag editor: Fix regression in previous release that caused double clicking on tags to not work on non Linux platforms - -- [1962365] Copy to library: Fix annotations not being copied - -- [1962213] Edit book: Spell check: Fix words after a comment not being checked - -- [1960554] PDF Output: Fix conversion failing if there are ToC entries pointing to removed content - -- [1961775] E-book viewer: Fix an error when opening books with MathML for the second time if the last read position was at a MathML element - -- Edit book: Fix double clicking to select a word also selecting smart quotes surrounding the word - -- EPUB 3 metadata: Fix non-integer series index being sometimes represented using exponential notation - -:: improved recipes -- Lenta.ru and aif.ru -- Indian Express -- Live Mint -- Mainichi -- Japan Times - -:: new recipes -- Hindustan Times by unkn0wn -- India Legal Magazine by unkn0wn -- RT на русском by Vuizur -}}} - -{{{ 5.37.0 2022-02-18 - -:: new features - -- [1961129] Book details: Add actions to trim the cover to the right-click menu - -- [1960586] Allow removing multiple email addresses at once in Preferences->Sharing by email - -- Book details: Use a better mono-spaced font on Windows by default - -- Add a tweak in Preferences->Tweaks to change the behavior of the Tab key when completing entries - -- [1959928] Edit metadata: In "All in one" mode add an adjustable splitter between the cover and formats boxes - -:: bug fixes - -- [1960686] Textile output: Don't fail if input document has invalid padding or margin specifications - -- [1960446] E-book viewer: Fix image display window not remembering its size and settings when run from within calibre - -- E-book viewer: Fix setting to use roman numerals for series not being respected - -- Edit book: When saving a copy do not fail if the original file has no write permissions - -- [1960180] Embed fonts tool: Create when missing - -- Tag editor: Improve performance when very large number of tags present - -:: improved recipes -- Live Mint -- The Hindu -- Reuters -- MMC RTV Slovenija -- Down To Earth -- Publico.PT -}}} - -{{{ 5.36.0 2022-02-04 - -:: new features - -- Edit metadata dialog: Allow controlling which custom columns are present in this dialog via Preferences->Look & feel->Edit metadata - -- Edit metadata dialog: Allow manually sizing the various sections of the dialog in "All on 1 tab" mode - -- Edit book: Spell checking: Update the bundled English and Spanish dictionaries - -- [1958773] BibTeX catalogs: Support tags like custom columns - -:: bug fixes - -- [1959659] Amazon metadata download: Fix paragraphs in the comments being merged - -- [1958979] Amazon.de metadata download: Fix published date and series information not being fetched for some books - -- Email delivery: Fix sending email via Hotmail not working since this week because Microsoft changed the SMTP server name - -- [1959220] Do not remove articles for titles in the Polish language - -- [1959207] E-book viewer: When using Read aloud do not automatically lookup the highlighted word until read aloud is paused or stopped - -- E-book viewer: Fix Ctrl+p shortcut for printing not working - -- [1958882] Show an error when viewing a specific format and the file is missing - -- Edit book: Fix renaming of classes that start/end with non word characters not working - -- [1958730] Edit book: Preview panel: Fix hyphenation at end of line being rendered as boxes on macOS - -- [1959893] Fix incorrect selection size displayed in Trim image dialog when image is scaled down to fit - -- [1959782] Edit book: Fix pasting files from another editor instance failing if a file with the same name already exists - -- [1959981] When reviewing metadata if the newly downloaded metadata has no language but there is an existing language, ensure it is preserved - -:: improved recipes -- India Today -- Indian Express -- Live Mint -- Al Jazeera in English -- The Financial Express -- The Straits Times - -:: new recipes -- title by author -}}} - -{{{ 5.35.0 2022-01-21 - -:: new features - -- [1956006] Coloring/icon rules: Allow creating a rule for date columns that matches *today* - -- Kobo driver: Add support for new firmware - -- [1954890] Content server: Show total number of results when searching for books - -:: bug fixes - -- [1958028] E-book viewer: Fix searching for text near the end of a chapter sometimes not working - -- [1954714] E-book viewer: Fix auto hyphenation on macOS not rendering the hyphens correctly - -- Edit book: Reports: Fix thumbnails of SVG images not rendered - -- ODT metadata: Support reading tags from multiple elements - -- [1958115] LRF Input: Fix a regression in calibre 5 that broke parsing of some LRF files - -- [1956097] MOBI output: Don't fail if input document contains invalid % based lengths - -- [1955308] AZW3 Input: Handle AZW3 files with incorrect TAGX Offset INDX header fields - -- [1956932] Comic conversion: Fix conversion of comic images that are stored as grayscale images in JPEG format not working when converting to PDF with image processing turned off - -- [1955967] calibredb catalog: Fix --ids and --search options not working for CSV/XML catalogs - -- [1958490] Tag browser: Fix the find box not using all available width - -- [1956192] E-book viewer: Remove books that do not exist from the recently opened book list - -- Completion popups: Fix display of items containing line breaks - -- [1956129] Fix line breaks in custom column descriptions not being rendered in their tooltips - -- [1956088] Fix Preferences->Searching->Clear search histories not taking effect till a restart for some search boxes - -- [1955732] Hierarchical entries in user category may not merge correctly in tag browser - -:: improved recipes -- Foreign Affairs -- MIT Technology Review -- Reuters -- Clarin -- General Knowledge Today -- Popular Science - -:: new recipes -- Dw.de by xav -- Equestria Daily by Timothee Andres -}}} - -{{{ 5.34.0 2021-12-17 - -:: new features - -- Happy holidays to everyone! - -- Driver for the new Nook Glowlight 4 - -- Edit book: Spell check tool: Add an exclude files button to exclude some files from being checked - -- EPUB/MOBI Catalogs: Increase the maximum thumbnail size to 3 inches from 2 inches - -- [1953739] Allow creating a shortcut in Preferences->Shortcuts->Edit metadata to paste metadata ignoring the value of the exclude_fields tweak - -- [1954715] E-book viewer: Displays links marked up as glossary and bibliography links as popups - -- [1954572] Add a tweak in Preferences->Tweaks to provide the sort value for undefined numbers - -:: bug fixes - -- Edit book: Fix pressing F8 to jump to next misspelled word not working after last word in current file - -- [1954889] Fix PDB E-reader output broken in calibre 5 - -- [1954839] Edit book: Reports: Include descendant selectors that use classes when counting class usage - -- [1954726] E-book viewer: Fix an error when opening some books with highlights that span in-line text formatting - -- [1954460] MTP driver: Do not send the calibre device db files to the root folder on the Supernote A5 x as it fails - -- ToC Editor: Workaround an occasional error when closing on Windows if the file being edited is in a DropBox/antivirus prone folder - -- Fix a regression in the previous release that broke creating new keyboard shortcuts - -- Comments editor: When flowing the tool bar onto multiple lines do not split up groups of buttons - -- Various compatibility fixes for Python 3.10 used by some Linux distributions - -:: improved recipes -- Pocket -- El Pais -- American Prospect -- Mediapart - -}}} - -{{{ 5.33.2 2021-12-03 - -:: new features - -- Allow changing the icon used for calibre libraries. Right click the library icon in calibre and choose "Change the icon for this library" - -- Comments editor: Use a single line for all three toolbars if they fit - -- Edit book: Allow merging HTML files by drag and drop of the files onto another HTML file - -- Kobo driver: Add support for listing purchased audiobooks - -- Edit metadata all-in-one mode: The cover and formats column now gives as much vertical space as possible to the cover image - -- [1952562] Add books dialog: When a non-book file type is added the next time the dialog is used, preselect the "All files" filter - -- [1952764] calibre-server --manage-users: Allow managing users while the server is running and also add actions to automate changing user account restrictions - -- [1950762] EPUB 3 metadata: If the book contains a "subtitle" append it to the main title when reading metadata - -:: bug fixes - -- [1950673] E-book viewer: Fix an occasional hang on startup at "Loading section" - -- [1952142] Get books: Update the Kobo plugin for website changes - -- [1951673] Bulk edit metadata dialog: Fix changing the search mode resetting other fields - -- [1951507] E-book viewer: Fix sorting of highlights incorrect in books that use HTML ids with a hyphen in them - -- [1951467] PDF Output: Fix the option to break long words at the ends of lines causing boxes to be rendered at the end of the line on macOS with some fonts - -- Google metadata plugin: When searching by ISBN if no results are found retry using an alternate query syntax - -- 5.33.2 fixes a couple of regressions that broke the toolbar in the popup comments editor dialog and rendering of the download - metadata button in the edit metadata dialog on Windows, as well as reading files from MTP devices on Windows - -:: improved recipes -- Smithsonian Magazine - -:: new recipes -- The Epoch Times by Kovid Goyal -- Mens Day Out by Vishwas Vasuki -}}} - -{{{ 5.32.0 2021-11-12 - -:: new features - -- [major] Edit book: Add a tool to transform HTML tags based on rules (Tools->Transform HTML) - - Allows for making transformations such as changing one html tag to another, deleting tags, wrapping - them in another tag, etc. Also available during conversions via the Look & feel->Transform HTML - section of the conversion dialog. - -- [1949908] Driver for the new Tolino Vision 6 - -- Kobo driver: Add support for the latest firmware released last week - -- [1948889] When picking a random book ensure recently chosen books are not re-selected - -- Icon theme chooser dialog: Allow right clicking on a theme to visit its homepage - -:: bug fixes - -- Amazon metadata download: Add support for more markup variations in amazon's sites that could prevent the fetching of - ratings, comments and series metadata for some books - -- Google search: Add support for new beta search results page markup that was preventing using cached Google pages - to search for Amazon metadata - -- PDF Output: Fix the option to preserve cover aspect ratio being ignored when converting comics - -- [1950412] DOCX Input: Sanitize image filenames more strictly to workaround broken EPUB software - -- [1950206] Linux binary: Fix file dialogs not working on Fedora 35 under KDE - -- [1949604] When sending email to the Kindle and PocketBook sync services use ASCII filenames as there have been some - reports of issues with non-ASCII filenames with these services. - -- [1950033] Book Details: Fix missing copy options on composite columns - -- Ask for confirmation when deleting covers from books - -:: improved recipes -- New York Post -- Liberation -- Boston Globe -- The Globe and Mail -- LeMonde - -:: new recipes -- India speaks reddit feed by Vishwas Vasuki -}}} - -{{{ 5.31.1 2021-10-29 - -:: new features - -- [1948883] Kindle driver: Support the new Kindle PaperWhite 2021 - -- Add an option under Preferences->Behavior to have calibre recognize numbers when sorting (this was previously under Preferences->Tweaks) - -- E-book viewer: Add a button to directly open the viewer help section in the calibre user manual to the viewer controls - -- E-book viewer: Prevent the display from sleeping when using auto-scroll or read aloud modes (Implemented only on Windows and macOS) - -- Edit book: Set semantics tool: Add support for EPUB 3 landmarks - -- [1948493] Add an entry to the Connect/share menu to open the content server in a local browser when it is running - -:: bug fixes - -- [1947879] Content server: Fix some OPDS feeds failing with non-ASCII content - -- [1948560] Tag browser: Fix incorrect first letter partitioning when enabling numeric collation of items that start with a number - -- [1949167] 5.31.1 fixes a bug in an HTML serialization library calibre uses that broke a few things, such as the comments editor - tool in the metadata dialog - -}}} - -{{{ 5.30.0 2021-10-22 - -:: new features - -- Add support for the new Kobo Sage and Libra 2 e-book reader devices - -- [1946439] E-book viewer: Read aloud: Allow right clicking to play/pause reading - -- Sending books by e-mail: Preserve non-English characters in attached filenames - -- [1946560] Tag browser: Allow searching for sub-categories by right clicking on them - -:: bug fixes - -- E-book viewer: Fix cover and full screen images not centered in paged mode when more than one page is displayed per screen - -- ToC Editor: Ignore in succession clicks on the OK and Cancel buttons to avoid accidentally closing the window when finishing creating a new entry - -- [1905479] Comments editor: Fix the formatting buttons not showing the current state correctly and fix some keyboard shortcuts not working when more than one comments editor is present in a single window - -- [1946417] Tag browser: Fix renaming of User categories in Virtual libraries - -- [1947948] Make removing large numbers of custom column icons easier - -:: improved recipes -- Private Eye -- Foreign Policy -- Le Monde Diplomatique - cono sur - -}}} - -{{{ 5.29.0 2021-10-08 - -:: new features - -- [1945890] Allow drag and drop of books onto formats in the Tag browser to convert them to that format - -- [1945891] Allow creating sorts based on multiple columns (Add the Sort action to the toolbar via Preferences->Toolbars & menus) - -- Edit book: When changing a paragraph to a heading if the cursor is adjacent to a paragraph tag but not inside any tags other than body, use the adjacent tag - -:: bug fixes - -- [1945889] Auto adding: Run relevant plugins before reading metadata from the book. Matches behavior of manual adding - -- [1945882] Content server: Fix category collapse by partition not working - -- Prevent Tab from causing focus to leave the Tag browser - -- Edit book: See what changed: Fix non-BMP unicode characters causing highlighting of changed words to be slightly misplaced - -- PDF Output: Fix a regression that broke conversion of comics that contain 1-bit images - -- Edit book: Fix pressing Ctrl+Tab inserting a tab at the start of a line instead of switching tabs - -- [1945098] Fix a regression in the previous release that caused identifiers set by some plugins to not be saved in the database - -- [1946342] Template language: Fix nesting composite columns sometimes failing - -:: improved recipes -- Entrepreneur Magazine -- Dawn -- New York Review of Books - -:: new recipes -- Various Indian news sources by Vishvas Vasuki - -}}} - -{{{ 5.28.0 2021-09-24 - -:: new features - -- Edit metadata dialog: Customize cover generation: Allow saving and loading cover generation settings as "themes" - -- [1944614] E-book viewer: Allow pressing the 0-9 keys to apply a quick highlight style - -- [1943521] Book details panel: While clicking tags/authors/etc. holding down the Ctrl+Shift modifier keys now add the tag to the current search with "AND" instead of "OR" when using only Ctrl - -- [1944057] Add an option to the preferences drop down menu to restart calibre without third party plugins - -:: bug fixes - -- [1944562] Edit book: When renaming classes in style sheets only recognize class names preceded by a period - -- E-book viewer: Fix lookup in Google partially hidden due to change in Google results page markup - -- Conversion dialog: Search replace expression builder: Fix incorrect search result highlighting when non-BMP unicode characters are present in the text - -- [1943270] E-book viewer: Fix popup footnote blank when the footnote link points to a tag - -- [1944433] E-book viewer: Fix jumping to highlights in text that occurs after a line break and newline character not working in paged mode - -- [1943495] Kindle Output: Strip EXIF metadata from JPEG images as the Kindle renderer has issues with it - -:: improved recipes -- Аргументы и Факты -- India Today - -}}} - -{{{ 5.27.0 2021-09-10 - -:: new features - -- When adding markdown (.md) or textile (.textile) files that contain references to images, automatically add them as txtz with the images - -:: bug fixes - -- DOCX Output: Correctly convert soft hyphens in the input document to DOCX soft hyphens - -- [1942805] DOCX Input: Fix a bookmark at the end of a paragraph causing the bookmark at the start of the paragraph to be skipped - -- [1942773] Edit book: Spell check: Fix EPUB 3 nav document not being spell checked when not in the spine - -- [1942012] PDF Output: Fix a rare failure when the input document has a ToC item pointing to the last page - -- [1942129] Windows: Fix a regression in calibre 5 that caused drag and drop from WinZip to not work - -- [1941992] TXT Output: Fix a regression in calibre 5 that caused the max line length option to not work - -- When auto converting added TXT files with image references to TXTZ use a full markdown parser to detect markdown images - -:: improved recipes -- BBC News -- Foreign Affairs - -:: new recipes -- The Week by Kovid Goyal -}}} - -{{{ 5.26.0 2021-08-27 - -:: new features - -- [1941013] Dark color scheme: Use a darker blue for highlighted items - -:: bug fixes - -- Content server book viewer: Show a message when a search finds no matches - -- MOBI Output: Fix JPEG images without any JFIF metadata not being rendered on the Kindle - -- [1939908] Comic input: Fix single color images having their colors changed by normalization - -- [1940005] E-book viewer: Fix creating multiple highlights in a single paragraph that also contains some extra text formatting at the start causing the second and subsequent highlights to malfunction - -- [1939912] Edit book: Fix a regression in the previous release that broke the options in the Remove unused CSS dialog - -:: improved recipes -- Boston Globe - -:: new recipes -- NYTimes Cooking by gourav -}}} - -{{{ 5.25.0 2021-08-13 - -:: new features - -- [1939469] Edit/Polish book: Remove unused CSS now also removes unreferenced stylesheets - -- E-book viewer: Add some CSS variables and classes that allow writing calibre specific CSS in ebooks. See https://manual.calibre-ebook.com/viewer.html#designing-your-book-to-work-well-with-the-calibre-viewer - -- A new framework plugins can use to be notified about changes to calibre libraries - -- [1938752] Edit metadata dialog: When pasting into the identifiers field if the clipboard contains a URL paste it directly as a URL identifier - -:: bug fixes - -- [1938448] E-book viewer: When displaying popup footnotes use the same writing direction as the main text for the footnote popups size and header - -- E-book viewer: Improve the text layout when looking up words in Google - -- Content server viewer: Fix read aloud not working on mobile browsers - - -:: improved recipes -- The Guardian and The Observer -- Wall Street Journal -- The Atlantic - -}}} - -{{{ 5.24.0 2021-07-30 - -:: new features - -- Conversion: Insert metadata as jacket: Allow adding timestamp and publisher fields. Also allow controlling the formatting of date/time fields - -- [1937025] Cover browser: Add an option to view the central book by double clicking instead of single clicking (Preferences->Look & feel->Cover browser) - -- [1936891] Tag browser: Add actions to the configure menu to toggle the display of counts and average rating - -- [1936472] Tag browser: Allow plugins to add entries to the context menu - - -:: bug fixes - -- [1938189] fetch-ebook-metadata: Fix an error when using the --cover option and no cover is found - -- [1936792] HTML Input: Fix the presence of BookDesigner markup causing conversion to fail - -- [1936184] TXT Input: Do not fail if the txt file references a directory as a resource - -:: improved recipes -- MSNBC -- Nature News -- Boston Globe -- Foreign Policy -- Le Monde - -}}} - -{{{ 5.23.0 2021-07-09 - -:: new features - -- [1934204] Annotations browser: Show highlight color in the preview panel - -- TXTZ format: Store type of text formatting in the metadata and use it automatically when converting from TXTZ - -- [1934043] Edit metadata dialog: Allow holding Ctrl and clicking the item editor buttons to instead open the manage dialog - -:: bug fixes - -- [1929325] Annotations browser: Fix searching for words in languages such as Chinese that do not have word delimiters not working - -- News download: Fix URLs with spaces in them not being downloaded since calibre 5.0 - -- [1933989] When searching for books by an author from the Manage authors dialog, use exact matches - -- [1933797] MOBI Output: Fix invalid color specification as plain numbers causing conversion to fail - -- [1933684] MOBI Output: Fix invalid text indent specification causing conversion to fail - -- Linux: Drop the unmaintained dbus-python in favor of jeepney for DBUS - -- Edit book: Workaround for Qt bug that caused the panel sizes in the editor to not be remembered across sessions - -:: improved recipes -- The Guardian and the Observer -- National Geographic -- Handelsblatt -- Huffington Post - -}}} - -{{{ 5.22.1 2021-06-25 - -:: new features - -- [1931646] E-book viewer: Allow clicking links in popup footnotes - -- Main book list: Scroll per pixel rather than per item by default. Can be returned to previous behavior via Preferences->Tweaks->Control behavior of book list - -- Linux: Drop support for the global menu bar - -:: bug fixes - -- [1932152] E-book viewer: Fix font sizes specified in absolute units not being honored in locales where the decimal separator is not the period - -- [1931566] E-book viewer: Fix searching for short strings in text with lots of similar entries displaying incorrect matches - -- [1932392] MOBI Output: Fix using percentage units for margins resulting in too large margins when using the tablet output profile - -- [1931599] E-book viewer: Fix back button not working after jumping to a bookmark - -- [1932992] Content server: OPDS feed: Fix incorrect up URL in category group feeds - -- [1933559] Content server: Fix a regression in the previous release that broke editing of series metadata - -- E-book viewer: Fix scrolling backwards by screen-fulls not working with very large page margins. - -- MOBI Input: Fix a regression in calibre 5 that broke processing of Haodoo format files - -- Conversion: Fix the smarten punctuation option not applying to inserted jacket page - -- 5.22.1 fixes a typo in the previous release that broke device detection on Linux - -:: improved recipes -- TheAtlantic.com -- Hindu -- People Daily - -}}} - -{{{ 5.21.0 2021-06-11 - -:: new features - -- Driver for the new Kobo Ellipsa - -- [1930958] Content server: When editing metadata for fields that take multiple values, make it easier to remove individual values by simply tapping a button - -- [1930900] Browser viewer: Make current color scheme setting propagate to all devices automatically when using user accounts - -- E-book viewer: Image popup: Show the image resolution in the popup window's titlebar - -:: bug fixes - -- [1930922] HTML Input: Fix handling of @import rules in stylesheets nested more than one level deep - -- [1930912] Fix viewer search context menu to clear searches not clearing search settings - -- E-book viewer image popup: Fix full screen button in incorrect state when starting in full screen - -:: improved recipes -- Associated Press -- The Hindu - -}}} - -{{{ 5.20.0 2021-06-04 - -:: new features - -- E-book viewer: Highlights: Make URLs in the notes for highlights clickable - -- [1930136] Book details: Ctrl-clicking on tags now adds them to the existing search instead of replacing it - -- E-book viewer: Allow using the back button to return from jumping to a search result - -:: bug fixes - -- Get books: Fix the Kobo store plugin for changes to the website - -- [1929827] Edit book: Fix non breaking spaces in snippets being converted to normal spaces - -- [1930466] ToC Editor: Fix a regression that caused changes to not be saved on machines where running a worker process takes more than ten seconds - -- Fix error when changing the "Search the net" URLs for the Content server - -:: improved recipes -- Jerusalem Post -- Popular Science -- Ambito Financiero -- Ambito.com -- Infobae - -}}} - -{{{ 5.19.0 2021-05-28 - -:: new features - -- E-book viewer: Add a preference under Scrolling behavior to reverse the tap zones used to turn pages. So tapping on the left goes forward and the right backward - - -:: bug fixes - -- [1929862] E-book viewer: Fix regression in 5.15 that caused incorrect display of font sizes that contain a period and use absolute units - -- [1929240] PDF Output: Fix font kerning issues with some TrueType fonts - -- [1929267] Edit book: Fix a regression in 5.18 that broke editing/creating saved searches - -- [1919025] Windows: Fix for standalone ToC Editor not working on systems where Qt WebEngine causes a crash at exit - -- Windows MTP driver: When scanning an MTP device such as an Android phone ignore folders that Windows fails to enumerate instead of failing with an error - -- ToC Editor: Fix a regression that broke choosing split points in some XHTML files - -- [1929465] PDB Input: Fix a regression in calibre 5 that broke processing some plucker format PDB files - -- Edit book: Fix sorting in spell check dialog on language broken for books that have unknown languages - -- Annotations browser: Fix a typo that broke sorting for highlights - -- [1909730] Annotations browser: When showing a highlight preserve paragraph boundaries for multi-paragraph highlights - -- Viewer highlights panel: Fix the "Edit notes" link not saving the changes - -- [1929164] E-book viewer: When using a right click/shift-click to adjust the selection, move the section boundary that is closer to the click point - -}}} - -{{{ 5.18.0 2021-05-21 - -:: new features - -- Content server viewer: Improved search functionality - -Searching can now be done for whole words and regular expressions. And all search results are -listed at once with some context for easy navigation. - -- [1928596] E-book viewer: Allow making the image popup full screen - -- E-book viewer: Allow expanding/collapsing all items in the Table of Contents at a particular level by right clicking on one item of that level and choosing the option to expand/collapse - -- E-book viewer: Add shortcuts shift+home and shift+end to extend current selection to start/end of line - -- [1927520] Book details: When creating rules to convert identifiers to URLs allow using {id_unquoted} to avoid quoting the identifier value - -- [1927062] Review downloaded metadata: Allow double clicking on a cover to see it at a larger size - -- [1927012] Annotations browser: Add a Refresh button - - -:: bug fixes - -- Edit book: Fix ctrl-clicking on a class name jumping to the wrong CSS rule if the stylesheet contains top level comments - -- [1928579] Fix search and replace on identifiers not working if the replaced value has colons - -- [1922691] Annotations browser: Sort the entries in order of position in book - -- ToC editor: Dark mode: Fix colors in location selection panel not dark - -- [1926793] E-book viewer: Fix right or shift-clicking to extend selection not shrinking selection when the click is inside the selection -- [1927546] Avoid spurious errors on multiple simultaneous calibre launches - -- [1925961] E-book viewer: Ignore mouse scroll events that would turn pages when editing notes - -- E-book viewer: Fix sorting bookmarks by title not working - -- Windows WPD driver: Fix an error reading the filesystem on some MTP based devices - -:: improved recipes -- IEEE Spectrum - -}}} - -{{{ 5.17.0 2021-04-30 - -:: new features - -- [1926484] E-book viewer: Image popup: Allow dragging with the mouse to pan the image - -- [1923724] Sort button: Allow selecting which columns are in the popup sort menu - -- [1851908] E-book viewer: When suggesting a default bookmark title, use the name of the current chapter - -- [1925038] E-book viewer: When searching the Table of Contents allow holding the Shift key to search backwards - -- [1925294] E-book viewer: Add a shortcut Ctrl+0 to restore default font size - -:: bug fixes - -- Windows MTP driver: Rewrite parts of the driver in the hope of fixing some rare and hard to reproduce crashes - -- Windows MTP driver: Set modified and created times when putting files/folders on device. Also read modified time correctly. - -- [1918591] Windows: E-book viewer: Fix switching away from viewer while in full screen and switching back causing some corruption until the page is scrolled - -- [1925378] Fix a regression in the previous release that caused errors when editing empty date values - -- Get books: Update Gutenberg plugin for website changes - -- [1926518] E-book viewer: The quick highlight button should replace the style of an existing highlight, when one is selected - -- [1925247] Elide long items in the middle when showing the completion popup for tags, to make it easier to use with hierarchical tags - -- [1925988] E-book viewer: Read aloud: Fix soft hyphens causing read aloud words to be broken up - -- [1925390] E-book viewer: Right clicking when text is selected should extend the selection instead of doing nothing - -- Fix settings in the ToC Editor tool being forgotten when calibre is closed - -- [1926025] DOCX Output: Fix conversion failing if the input document has missing images - -- [1925961] Content server viewer: Fix mouse wheel scrolling not working on the box used to edit notes for highlights - -- E-book viewer: Fix read aloud word tracking in flow mode not very reliable - -- E-book viewer: Fix navigation shortcuts not working in Read aloud mode - -:: improved recipes -- Barrons -- Krebs On Security - -}}} - -{{{ 5.16.1 2021-04-17 - -:: new features - -:: bug fixes - -- [1924703] CHM Input: Fix handling of some CHM files that use non-ASCII internal filenames and don't specify a character encoding in their metadata - -- [1924824] Fix a regression in the previous release that prevented calibre from starting if there was a failure in a third party plugin - -- Content server viewer: Fix a regression in the previous that broke handling of URLs in stylesheets - -- [1924767] Fix a regression in the previous release that broke changing sections in the convert single book dialog after changing the input or output formats - -- [1924675] Fix using 'is set' rules not working for column rules with rating values - -- E-book viewer: Fix a regression in 5.15 that causes the viewer to ignore page-break CSS properties (in 5.16.1) - -- Fix a regression that broke using stored templates (in 5.16.1) - -- [1924875] Fix auto scroll books shortcut not working in standalone cover browser window (in 5.16.1) - -- [1924890] E-book viewer: Fix jumping to previous section sometimes not working in flow mode (in 5.16.1) - -- [1924853] E-book viewer: Fix a regression in 5.15 that broke changing keyboard shortcuts in the viewer preferences (in 5.16.1) - -- E-book viewer: Fix a regression in 5.15 that broke using the delete key to delete highlights (in 5.16.1) - -:: improved recipes -- Granta -- New Scientist - -:: new recipes -- The Saturday paper by Alistair Francis -- Crikey by Alistair Francis - -}}} - -{{{ 5.15.0 2021-04-16 - -:: new features - -- [1917634] Allow auto scrolling through the list of books by pressing the X key or right clicking on the cover browser - -Useful to have a "slideshow" of book covers. The speed of scrolling can be controlled in Preferences->Look & feel->Cover browser - -- E-book viewer: Speed up first time open for EPUB files with lots of styling - -- E-book viewer preferences: Allow searching for keyboard shortcuts - -- Edit book: Allow editing WEBP images - -- Various improvements to the template language, see https://www.mobileread.com/forums/showthread.php?t=337573 - -- [1921610] E-book viewer: Show the current progress percentage in the bottom bar of the viewer controls. This can be customized in the viewer preferences under Headers and footers - -- [1921689] E-book viewer: Add an option under Preferences->Miscellaneous to not restore open panels such as Search, Table of Contents etc on restart - -- When exporting highlights as text or markdown also output top level chapter titles - -- [1922327] Allow downloading metadata from amazon.se - -- [1922591] Preferences->Tweaks: Allow specifying that calibre should open the book details window when double clicking on a book - -- [1922341] MOBI Output: Convert WebP images to PNG so they work with Amazon's software - -- [1921793] DOCX Input: When converting embedded fonts, replace spaces in the filename with underscores to keep the execrable epubcheck happy - -- E-book viewer: Selection bar: Add keyboard shortcuts for all buttons. Hover over a button in the bar to see the shortcut - - -:: bug fixes - -- [1924232] FB2 Output: Fix a regression in calibre 5 that caused paragraphs containing only non-breaking spaces to be removed - -- [1924187] Metadata comments editor: Fix setting block alignment destroying other block level properties - -- E-book viewer: Fix margins not being adjusted immediately when preferences are changed - -- [1921604] Edit book: Upgrade book internals: Fix skipping the NCX removal dialog not remembering the chosen option - -- [1922570] Conversion: When specifying a line-height do not apply it to the tag, as it is pointless and causes the execrable epubcheck to complain - -- E-book viewer: Improve scrolling behavior when extending the selection using keyboard shortcuts - -- E-book viewer: Fix read percent for HTML files that are rendered in a single screen being 0% rather than 100% - -- [1924598] E-book viewer: Highlights panel: Do not expand all sections when adding/deleting/modifying highlights - -- [1922503] CHM Input: Fix a regression in calibre 5.0 that broke opening of some files that don't specify a character encoding - -- EPUB2 metadata: Read ISBNs in identifier elements without schemes if they are valid ISBNs and no properly identified isbns are present - -- [1922309] Update Amazon metadata plugin for changes to amazon websites - -- Edit book: Fix detection of class names containing hyphens/underscores - -:: improved recipes -- New Scientist -- Irish Times -- 1843 -- The Straits Times - -:: new recipes -- Los Danieles – Columnas sin techo by CAVALENCIA -}}} - -{{{ 5.14.0 2021-03-26 - -:: new features - -- Edit book: When right clicking on a class in a HTML file, add an option to rename the class throughout the book - -- [1919103] Adding from ISBN: Add an option to check if there are existing books with the specified ISBNs already in the library - -- [1920576] Template tester: Instead of next/last, the template tester now shows the template values for selected books - -- [1918047] Content server: Allow swiping left and right to show next/previous book on the book details page - -- [1919072] E-book viewer: Make the commonly used Shift+Arrow key shortcuts for modifying selections using the keyboard work. Also add shortcuts for selecting by character, line and paragraph. - -:: bug fixes - -- [1920613] MOBI Output: Fix latest Kindle firmware not displaying select publisher font option for calibre produced AZW3 files - -- [1919033] E-book viewer: Fix errors when viewing books with mathematics that are split over multiple internal files - -- [1918436] Content server viewer: Fix highlights not sorted correctly in the highlights panel - -- [1918737] Get books: Fix Smashwords plugin not working because of website changes - -- [1920733] Improve the performance of QuickView especially when using composite columns - -- [1918428] Improve performance of the virtual_libraries() template function - -- [1920250] Browser viewer: Fix Go to Location not working for positions - -- [1905257] E-book viewer: Fix searching in the Table of Contents not working - -- [1918105] E-book viewer: In paged mode, fix scrolling not working correctly when margins are set to zero - -- [1918437] E-book viewer: Fix incorrect ToC navigation in books that link the entries to inline tags that wrap block tags that span multiple pages - -- [1920592] Category editor: Searching in library shouldn't automatically open Quickview - -- [1919260] Conversion: Fix a hang caused by long sequences of non-word characters when heuristics are enabled, either explicitly or for some input formats such as TXT - -- Fix a regression in calibre 5 that broke --explode-book and --implode-book actions for calibre-debug.exe - -- Fix a regression in calibre 5 that broke setting metadata in RTF files - -:: new recipes -- ZackZack.at by Dirk Gomez -}}} - -{{{ 5.13.0 2021-03-10 - -:: new features - -- [1917967] E-book Viewer: Allow editing the current book by pressing Ctrl+D or adding a button for it to the viewer tool bar - -- Edit book: Add a command line flag to allow selecting the specified text when opening a book - -- [1917363] Edit metadata dialog: When trimming covers, show the size of the current trim region - -:: bug fixes - -- Fix a regression in the previous release that broke sending of emails with text longer than 900 characters - -- E-book viewer: Fix using keyboard to extend selection not turning pages - -- [1918030] Fix searching for items from the Manage dialog not working correctly - -- [1917386] PDF input: Replace paragraph separator characters with spaces - -:: improved recipes -- The Conversation - -}}} - -{{{ 5.12.0 2021-02-26 - -:: new features - -- [1915773] E-book viewer: When searching start the search from the current position, jumping to the first match at or after the current page - -- [1916411] E-book viewer: Have the Table of Contents view automatically scroll to keep the chapter being read currently visible - -- Various improvements to the calibre template language, see https://www.mobileread.com/forums/showthread.php?t=337573 - -- Edit book: File browser: Show total size of items in category when hovering over category with mouse - -- Kobo driver: Add support for latest firmware and also an option to choose the color used for cover letterboxing - -:: bug fixes - -- [1915685] E-book viewer: Fix selection popup bar sometimes going off screen when dragging up to the top line of text - -- Get books: Update the Biblio and Chitanka stores for website changes - -- [1915770] Edit book: Fix editing of JavaScript files not working - -:: improved recipes -- Slate -- Harper's Magazine - -}}} - -{{{ 5.11.0 2021-02-12 - -:: new features - -- [1912958] Edit book: Add a tool to split the tag at the current cursor position, creating a new tag with the same style and class attributes. To add the tool go to the Toolbars section in the editor preferences - -- [1911107] E-book viewer: Show the URL when hovering over external links - -- E-book viewer: Redesign the reference mode to also work on touch screens without a mouse. Now in reference mode paragraph numbers are displayed for all paragraphs - -- Edit book: Allow Ctrl-clicking on class names to jump to the first style rule that matches the tag and class - -- Content server: When browsing highlights for a book allow selecting multiple highlights to delete or export quickly - -- [1912954] Allow creating keyboard shortcuts to copy show and view URLs for selected books to clipboard - -- calibredb: Add a timeout option to control the timeout when connecting to the calibre server - -:: bug fixes - -- [1913854] Content server: Fix dragging selection handles not working in Safari - -- [1915303] E-book viewer: Fix links with a href of "#" not working - -- E-book viewer: Hide the controls when clicking the back or forward buttons - -- [1914921] E-book viewer: When jumping to a highlight using the highlights panel, the back button should return to position before jump - -- [1914157] E-book viewer: Fix incorrect tooltip when hovering over a section title in the search results list - -- HTMLZ Output: Fix a regression in calibre 5 that broke creating HTMLZ documents when using the option to place CSS inline - -:: improved recipes -- Endgadget - -}}} - -{{{ 5.10.1 2021-01-22 - -:: new features - -- [1911888] Bulk metadata edit: Add a new control to compress the cover image files for all selected books - -- [1912212] Add support for the CB7 comic file file format - -- [1912070] E-book viewer: Allow adding a button to the selection bar that copies the - currently selected text along with a calibre:// URL to show the text in the book - -- Conversion: Insert metadata: Allow showing identifiers such as ISBN in the jacket page template - -- Conversion: Insert metadata: Allow hiding entries in the jacket template when they are not present in the metadata - -- [1912337] calibre-server --manage-users: Add a scriptable interface: calibre-server --manage-users -- help - -- [1912070] E-book viewer: Add keyboard shortcuts to copy the current location to the clipboard - -- [1912003] E-book viewer: Highlight all currently visible Table of Contents entries, not just the first - -- Add a new tweak under Preferences->Tweaks->Author sort name algorithm to optionally recognize - common surname prefixes such as von, van, de, etc. when generating sort names. - -:: bug fixes - -- [1911470] E-book viewer: Move read aloud pop-up bar to the bottom of the screen in flow mode - -- [1911218] E-book viewer: Fix scrolling with two fingers on touch pad on macOS not smooth - -- [1911466] PDF Output: When converting fixed layout input documents fix anchors inserted for navigation sometimes being rendered as blue boxes - -- [1904350] Edit book: Remove unused CSS: Fix selectors that don't match from CSS rules containing multiple selectors not being removed - -- Bulk metadata download: Fix series number not being changes if the series is the same as the existing series - -- E-book viewer: Fix a regression that caused non-HTML descriptions to not be displayed in the metadata page - -- E-book viewer: Fix clock being displayed in 24 hr format on some systems even though system locale is set to use 12 hr format. - -- Bulk metadata edit: Fix regression that inverted the meaning of the case sensitivity setting in the Search & replace tab - -- calibredb list: Fix incorrect output when redirecting to file - -- 5.10.1 fixes a regression in 5.10.0 that broke conversion when including metadata as a jacket page and using long text custom columns - -:: improved recipes -- Jacobin -- Japan Times -- The Wall Street Journal -- Mediapart - -}}} - -{{{ 5.9.0 2021-01-08 - -:: new features - -- Annotations: Allow exporting highlights and bookmarks in Markdown format, with a link to open the book at the highlight location - -- [1909529] Content server viewer: Allow exporting all highlights - -- [1909339] Icon rules editor: Add a button to open the icons folder - -- [1909258] Quickview: Dropdown menu for all selectable columns - -:: bug fixes - -- [1909880 1906152] Fix a regression that caused the Content server to crash if a client closed a connection during a file transfer on macOS and Linux. - -- [1909224] LRF Output: Fix conversion broken in calibre 5 when font size rescaling is active - -- E-book viewer: Cancel any speech in progress when hiding the selection popup bar after triggering the speak aloud action on it - -- [1909332] Color/icon rules editor: Fix duplicate rule button not working correctly - -- [1909291] Fix dropping files onto Book details causing an error if the confirmation dialog is disabled - -:: improved recipes -- General Knowledge Today -- El Pais -- USA Today -- WirtchaftsWoche Online -- The Guardian -- Arcamax -- Miami Herald -- The Seattle Times - -:: new recipes - -- Mallorca Zeitung by VoHegg -- T-Online by VoHegg -- El Diario by Dirk Gómez - -}}} - -{{{ 5.8.1 2020-12-24 - -:: new features - -- Happy holidays to all calibre users! - -- E-book viewer: Add a mode to follow links with only the keyboard (triggered by Alt+F) - -- [1908929] Edit book: A new option to show a configurable number lines above the current line when syncing the position of the preview panel to the current position in the code editor (under Preview settings in the Editor preferences). - -- [1907410] Windows: Automatically resolve shortcuts (.lnk files) when adding books to calibre. - -- Content server viewer: Don't enter full screen mode automatically when reading on desktop like devices (this can be controlled via a setting in the viewer preferences under Page layout) - -- E-book viewer tool bar: Add a select all action and a Read aloud action (can be added by right clicking the tool bar and configuring it) - -- Template/formatter enhancements: Add a 'for' statement and add the ability for a developer to pass extra information to a template. - -- [1907919 1907918] Rules editors for icon/coloring rules: Add a button to duplicate rules and to convert a rule to advanced template mode - -:: bug fixes - -- Content server viewer: Fix regression in 5.0 that broke scrolling on iOS - -- [1908000] E-book viewer: Fix error when scrolling to some search results in flow mode - -- [1898394] AZW3 Input: Fix rare AID based links not working. - -- [1907907] E-book viewer: Fix clock in header/footer not using system time format - -- Windows: Fix Read aloud not working with books that have a single large internal text file, such as MOBI or DOCX books - -- PDF Output: Fix a regression causing conversion to fail when typesetting Chinese text - -- Amazon metadata download: Fix no results being found when using the automatic or Google servers because of a change in the markup of the Google search results page. - -- [1909217 1909197] Version 5.8.1 fixes a couple of regressions that broke the Save to disk function and changing Page layout settings in the viewer. - -:: improved recipes - -- The Australian -- The Atlantic -- Zerohedge -- New York Times Book Review - -:: new recipes - -- SchwarzerPfeil by tastytea -- Substack by topynate -}}} - -{{{ 5.7.2 2020-12-12 - -:: new features - -- [major] E-book viewer: Add a "Read aloud" function that works via the operating system's Text-to-speech engine. - - Click the "Read aloud" button in the viewer controls to start reading the book text aloud from the current page. - -- A new busy spinner for waiting animations - -- [1907140] Edit metadata: Add buttons to easily set yes/no fields also shortcut to clear the field. - -:: bug fixes - -- [1905967] PDF Output: Fix a regression in the previous release that broke text rendering for some fonts due to a bug in Qt WebEngine (full fix is in 5.7.2). - -- [1905736] PDF Output: Fix conversion failing when adding header/footer and the input document defines margins/padding on the tag. - -- [1907159] Windows: Fix the case of library names in copied calibre:// links sometimes incorrect. - -- [1906459] AZW3 Input: Fix a regression in calibre 5 that broke processing of files with inline flow replacements. - -- [1906149] Fix hiding and showing Book details panel changes its size by a pixel or two. - -- [1907067] Get books: Fix amazon.fr not working because of website changes. - -- Bulk metadata search/replace: Fix text transform function not being applied to the test result in character mode - -- [1906464] Book list: Improve rendering of column headers when they don't fit, by eliding them instead of just cutting off rendering. - -- [1906063] Fix template function "first_non_empty" fails if no argument evaluates non-empty - -- [1907773] Fix regression in 5.7.0 causing failure to start if one of the previously used libraries had a stored path ending in a slash. - -:: improved recipes -- The Guardian -- The Atlantic - -}}} - -{{{ 5.6.0 2020-11-27 - -:: new features - -- Edit book: Show a non-modal popup for a few seconds to allow undoing file delete operations - -- [1903418] Device books view: Add an action to the context menu to easily jump to the matching book in the calibre library view - -- [1903270] Add ability to undo Generate cover in the Edit metadata screen, by long clicking the Generate cover button - -- Edit book: Allow adding tags to the list of tags for the insert tag button - -- calibredb add: New option --automerge to automatically merge duplicates - -- [1905646] Add an edit notes action to the context menu in the viewer highlights panel - -- [1903333] Content server viewer: Allow viewing images in a new window by right clicking them, matches the calibre builtin viewer behavior - -- [1903403] Book details window: Allow opening the edit metadata window using either the keyboard shortcut or the context menu - -- Allow copying the current search as a calibre:// URL by right clicking the search box - -- Book details panel: Add entries to copy calibre:// links for the current book to the context menu - -- Edit book: Upgrade book: Ask whether to keep the NCX based Table of Contents - -:: bug fixes - -- [1904310] Windows: Fix calibre portable launcher not working correctly from root folder in calibre 5.5 - -- [1904505] macOS: Fix rendering of space after punctuation incorrect in Big Sur - -- [1905319] DOCX Input: When converting images placed using the obsolete VML markup default them to being inline rather than block images - -- [1905479] Metadata edit dialog: Fix keyboard shortcuts for bold/italic/underline not working when more than one comments editor widget is present on a single tab - -- [1905113] When using calibre://show-book URLs and the book is not found, clear any Virtual library or search restriction and then show the book - -- macOS: Fix calibre:// URLs not working from other documents - -- Fix calibre:// URL search action not changing library if needed - -- [1904305] annotation_count() template function displaying value for deleted annotations - -- [1905806] Edit book: Fix syntax highlighting not recognizing ends-with selector - -:: improved recipes -- The BBC -- Folha de Sao Paolo -- Netzpolitik - -}}} - -{{{ 5.5.0 2020-11-13 - -:: new features - -- [major] Support the calibre:// URL scheme - - Clicking on calibre:// URLs can be used to have calibre perform various actions. - For details, see: https://manual.calibre-ebook.com/url_scheme.html - -- Viewer: Show calibre:// URL for current book in the Goto->Location panel - -- [1902518] Edit book: Add an action to the right click menu for tabs to close tabs to the right of the current tab. - -- [1902413] Add ability to copy tag, publisher and author in Book details panel by right-clicking - -- [1902326] Annotations browser: Add a context menu for common actions. - -- [1899839] Annotations browser: Show a dot for highlights with notes. - -- [1902227] Edit book: Saved searches panel: Allow copying the current saved search to the regular search panel by clicking the Export button. - -- Windows: Fix command line arguments not working for the portable.exe launchers - -- Viewer: back and forward buttons on the mouse now trigger the back and forward actions - -- [1902313] Option to mark all books with annotations/bookmarks - -- Various minor improvements to how keyboard focus is handled in the Tag browser - -:: bug fixes - -- [1903294] Edit book: Fix regression in calibre 5 that caused an error when managing user dictionaries. - -- [1903831] Viewer: Fix previous/next buttons iterating over removed bookmarks - -- [1903825] Welcome wizard: Fix changing the language causing a empty folder to be created. - -- [1903699] Viewer: Fix regression causing custom shortcuts with Shift key pressed not working - -- [1903423] Viewer: In dark mode when showing images with transparency in the popup use a light background color as most images are designed with a light background color in mind. - -- [1903428] Fix a regression in the previous release that broke conversion of DJVU files - -- [1903363] Metadata edit: "Set to Undefined" button missing on integer columns. - -- [1903086] Entry remains underlined in Tag browser after clearing filter - -- [1902126] XML Catalogs: Fix languages field missing - -- Fix a regression that broke loading of third party plugins that are encoded in an encoding other than UTF-8 - -- macOS: Fix viewer not responding to cmd+c to copy text by default - -- Windows: Fix sending email on computers with non-ASCII computer names - -:: improved recipes -- Spectator Magazine - -}}} - -{{{ 5.4.2 2020-10-30 - -:: new features - -- [1900761] Windows: Allow adding of books to calibre from folders whose path length is larger than 260 characters - -- Tag browser: Add an option in Preferences->Look & feel->Tag browser to allow the tag browser to get keyboard focus - -- Tag browser: Allow editing the set of permissible values and colors for a custom column with fixed values, by right clicking on it. - -- Edit metadata dialog: Make most custom metadata controls use only a single line. - - Elide the names of custom columns that are longer than a fixed width, instead of using multiple lines. - Configurable via Preferences->Tweaks->Edit metadata custom column label length - -- Edit book: Remove unused CSS: Add an option to merge CSS rules that have identical properties - -- [1901379] Book details window: Double clicking on the cover now shows it in the default system image viewer. - -- [1900874] News download: Allow passing username/password in feed URLs. - -- [1900890] Open with: Allow renaming Open with applications. - -:: bug fixes - -- [1900868] Viewer: Fix jumping to search result not always working in flow mode. - -- Get Books: Update Google and Gutenberg plugins for website changes - -- [1900946] Viewer: Fix keyboard shortcuts using Ctrl+Alt+letter key not working on Windows. - -- [1900942] Viewer: Fix keyboard shortcuts to shrink/grow selection not working. - -- [1900938] Viewer: Fix keyboard shortcut to toggle highlights panel not working when the highlights panel is itself focused. - -- [1900358] Viewer: handle editing of missing highlights more gracefully - - -- [1901289] Drivers: Fix regression in calibre 5 that broke connecting to SONY devices - -- [1901276] Open with: On Linux when reading names from .desktop files, use the first matching language - -- [1901593] Conversion: Fix a regression in calibre 5 that broke conversion of some malformed CSS stylesheets. - -- [1901957] Conversion: Fix a regression in calibre 5 that broke processing of some PDB files with images. - -- [1901278] Conversion: Fix regression in calibre 5 in the handling of @import() rules in stylesheets that import from folders above themselves. - -- [1901232 1901230] Content server: Fix various controls on the book details page not working when viewing a random book. - -- [1901273] Tag browser: Fix a crash when renaming a saved search to a value that already exists. - -- [1900921] Tag browser: Fix renaming of custom column with fixed set of values not working. - -- [1901630] Fix a regression in calibre 5 that broke auto-sync of generated catalogs to devices. - -- [1901113] Fix setting rating via ebook-meta command line tool broken in calibre 5. - -- [1900099] When boolean columns are set to bistate, checking "show checkmarks" results in all non-boolean values being shown as false - -- Spell check: Fix using non UTF-8 dictionaries broken in calibre 5 - -- Fix a rare issue where restarting calibre from within the program would fail. - -- Version 5.4.2 fixes bugs in 5.4.0 that prevented calibre from starting on macOS and from choosing save file names on Windows. - -:: improved recipes - -- Psychology Today -- Washington Post -- LifeHacker -- The Atlantic -- New York Review of Books - -}}} - -{{{ 5.3.0 2020-10-16 -:: new features - -- Tag browser: When grouping items by first letter if adjacent letters have few entries combine them into a single group. - -Can be controlled via an option in Preferences->Look & feel->Tag browser to combine 'first letters' together if there are a small number of items under adjacent letters - -- [1899163] Viewer: Allow displaying the current page / total pages in the header/footer. Useful in paged mode to see exactly how many pages are left. - -- [1899834] Viewer search panel: Show more result context in a tooltip when hovering over a search result. - -- DOCX Input: Add support for Word controls used to rotate or flip images - -Only works with output formats such as EPUB that support CSS transforms - -- [1899762] Viewer image pop-up: Allow maximizing/minimizing the window. - -- Edit metadata dialog: Use only a single line for custom column date fields - -- [1899341] Add an item to search for categories to the category editor context menu. - -- [1899316] Category editor: Add a right click menu to change case of the selected entries. - - -:: bug fixes - -- [1898221] Fix a regression in 5.0 that caused performance of dialogs that contain title/series/tags edit fields to be very poor with large libraries. - -- Fix a regression in 5.0 that caused some pop-up menus to appear on the wrong monitor in multi-monitor setups - -- macOS: Fix a long standing bug that prevented drag and drop of multiple items - -- Windows: Fix a regression that broke scanning for default programs for the Open with action - -- [1898894] Windows: Fix a regression in 5.0 causing some MOBI files with non-BMP characters not being processed correctly. - -- E-book viewer: Fix scrolling past the end of chapter boundaries not working in books that have negative margins - -- [1899466] Edit metadata dialog: fix incorrect rendering of custom column names that start with emoji. - -- [1899318] Quickview: Fix nothing shown after clearing the search. - -- [1900022] Dark mode: Fix radio buttons having no outline. - -- [1900066] Annotations browser: When showing a book in the calibre library that is not currently visible, display an error. - - -:: new recipes - -- Deutsche Welle by VoHe - -:: improved recipes - -- Reuters - - -}}} - -{{{ 5.2.0 2020-10-07 -:: new features - -- [1897354] Browse annotations: Add a check box to restrict the displayed annotations to only the books currently selected in the calibre library. - -- Allow storing and calling functions in the calibre template language (Preferences->Template functions) - -- [1897336] Add a shortcut (Shift+V) to open the last read book. - - -:: bug fixes - -- Fix import errors with some third party plugin on Windows - -- [1898598] Viewer: Fix toolbar show controls button not working when no book is open. - -- [1898577] Viewer: Fix incorrect positioning of context menu for viewer toolbar. - -- MOBI Input: Fix regression that broke reading of some documents - -- Bulk metadata search and replace: Fix some regular expression causing errors with the new regex engine - -- Fix a regression that broke application of plugboards when sending by e-mail - -- [1898441] ODT Input: Fix a regression that broke conversion of ODT files with footnotes. - -- [1898413] Viewer: Fix mouse wheel not working while selection bar is visible - -- Viewer: Fix selection popup bar not always close to mouse when ending select-to-drag - -- Fix calibre-server not exiting on Ctrl+C on Windows - -- [1897410] Content server OPDS feeds: Fix error if the metadata for a book contains particular Unicode characters. - -- [1898167] Edit book: Reports tool: Fix a regression that broke sorting. - -- HTMLZ Output: Fix an error when converting a document that has SVG images - - - -}}} - -{{{ 5.1.0 2020-10-02 -:: new features - -- Enhancement: allow using templates in search expressions - -- [1897435] Viewer lookup panel: Add a checkbox to disable automatic update of lookup when the selected text changes. - -- [1897415] Viewer: Allow skipping the confirmation when using the remove highlight button in the popup bar. - - -:: bug fixes - -- [1897618] ToC Editor: Fix bulk rename of entries not working - -- [1897571] Linux: Fix right clicking on system tray icon not working - -- [1897356] Fix User category based searches unreliable dues to caching issues - -- PDF Output: Don't fail if the input document has an html file identified as the cover - -- [1897467] Catalog generation: Fix a regression that broke generating catalogs if books with no comments are present. - -- [1897314] Windows: Fix error on first run of calibre after install/upgrade - -- Viewer: Fix copy to clipboard button in toolbar not working - -- [1897297] Viewer: Fix copy to clipboard not copying text as HTML to clipboard in addition to plain text - -- [1897409] macOS: Fix some drop down menus such as in the ToC editor and cover generation in the Edit metadata dialog not working - - - -}}} - -{{{ 5.0.1 2020-09-25 -:: new features - -- [major] For details on the major changes in calibre between 4.0 and 5.0, see https://calibre-ebook.com/new-in/fourteen - -- [major] E-book viewer: Add support for highlighting - -https://manual.calibre-ebook.com/viewer.html#highlighting-text - -- [major] E-book viewer: Add support for right-to-left and vertical text - -- [major] Switch calibre to Python 3. This means that some no longer maintained third party plugins will not work. See https://www.mobileread.com/forums/showthread.php?t=326405 - -- Dark mode support for the Content server and E-book viewer user interfaces - -- Content server's in-browser viewer now supports bookmarking. Bookmarks and highlights are auto-synced across devices. - -- Regular expression engine used for searching the book list and reading metadata from filenames has been made more powerful. - - -:: bug fixes - -- Version 5.0.1 fixes a bug in 5.0.0 that broke connecting to devices on macOS - - - -}}} - -{{{ 4.23.0 2020-08-21 -:: new features - -- Kobo driver: Add support for new firmware. Also add recognition of 'Kobo Plus' subscription books - -- [1889925] Edit book: Allow customizing the base background/foreground and link colors for the preview window. - -- [1891765] Quickview panel: Add actions to the context menu to search for book in library, open in E-book viewer, etc. - -- Allow forcing calibre to remember the column width for the On device column by right clicking the header of the column - -- Template language: Add functions to perform math operations to convert fractional numbers to integers - - -:: bug fixes - -- [1849958] Content server: Fix Esc key not working in several views. - -- [1889973] Fix unable to delete default value for custom columns of boolean type, once set - -- EPUB Output: When splitting don't consider files with only a single character empty - -- Comments editor: Fix syntax coloring for HTML view in dark mode - -- Book list: When a series column is not wide enough, elide text in the middle so that the series number is visible - - -:: new recipes - -- linuxnews.de and t3n.de by Volker Heggemann - -:: improved recipes - -- El Diplo -- calibre Blog -- ESPN -- LA Times -- Winnipeg Free Press -- Popular Science -- Science News Recent Issues - - -}}} - diff --git a/README.md b/README.md index 0884d4e7a7..af8eded26e 100644 --- a/README.md +++ b/README.md @@ -45,4 +45,4 @@ calibre binaries and installers for all the platforms calibre supports. ## calibre package versions in various repositories -[![Packaging Status](https://repology.org/badge/vertical-allrepos/calibre.svg)](https://repology.org/project/calibre/versions) +[![Packaging Status](https://repology.org/badge/vertical-allrepos/calibre.svg?columns=3&header=calibre)](https://repology.org/project/calibre/versions) diff --git a/bypy/README.rst b/bypy/README.rst index 52b7f578cb..c51e9f1351 100644 --- a/bypy/README.rst +++ b/bypy/README.rst @@ -26,7 +26,8 @@ page for details). Once the dependencies are installed, run:: ./setup.py bootstrap -To make the Windows and macOS builds it uses QEMU VMs. Instructions on +All building is done inside QEMU VMs. Linux VMs are auto-created as needed, +Windows and macOS VMs must be created manually. Instructions on creating the VMs are in the bypy repo under :file:`virtual_machine/README.rst`. Required software for the VMs are listed in :file:`bypy/windows.conf` and :file:`bypy/macos.conf`. @@ -34,10 +35,10 @@ Required software for the VMs are listed in :file:`bypy/windows.conf` and Linux ------- -To build the 64bit and 32bit dependencies for calibre, run:: +To build the Intel and ARM dependencies for calibre, run:: ./setup.py build_dep linux - ./setup.py build_dep linux 32 + ./setup.py build_dep linux-arm64 The output (after a very long time) will be in :literal:`bypy/b/linux/[32|64]` @@ -45,7 +46,7 @@ Now you can build the calibre Linux tarballs with:: ./setup.py linux -The output will be in :literal:`dist` +The output will be in :file:`dist` macOS @@ -62,7 +63,7 @@ Now you can build the calibre ``.dmg`` with:: ./setup.py osx --dont-sign --dont-notarize -The output will be in :literal:`dist` +The output will be in :file:`dist` Windows @@ -74,11 +75,10 @@ Make sure all software mentioned in :file:`bypy/windows.conf` is installed. To build the dependencies for calibre, run:: ./setup.py build_dep windows - ./setup.py build_dep windows 32 The output (after a very long time) will be in :literal:`bypy/b/windows/[32|64]`. Now you can build the calibre windows installers with:: ./setup.py win --dont-sign -The output will be in :literal:`dist` +The output will be in :file:`dist` diff --git a/bypy/linux.conf b/bypy/linux.conf index 7b2bb31d76..57cb3a4691 100644 --- a/bypy/linux.conf +++ b/bypy/linux.conf @@ -2,4 +2,4 @@ image 'https://cloud-images.ubuntu.com/releases/focal/release/ubuntu-20.04-serve # Build time deps for Qt. See https://doc.qt.io/qt-6/linux-requirements.html and # https://doc.qt.io/qt-6/qtwebengine-platform-notes.html -deps 'flex bison gperf ruby python2 libx11-dev libxext-dev libxfixes-dev libxi-dev libxrender-dev libxcb1-dev libx11-xcb-dev libxcb-glx0-dev libxcb-keysyms1-dev libxcb-image0-dev libxcb-shm0-dev libxcb-icccm4-dev libxcb-sync0-dev libxcb-xfixes0-dev libxcb-shape0-dev libxcb-randr0-dev libxcb-render-util0-dev libxcb-xinerama0-dev libxcb-util-dev xkb-data libglu1-mesa-dev libxkbcommon-dev libinput-dev libxkbcommon-x11-dev libxkbfile-dev libgtk2.0-dev libvulkan-dev libwayland-dev libwayland-egl1-mesa libxcb-xkb-dev libegl1-mesa-dev libxtst-dev libnss3-dev libfreetype6-dev libfontconfig-dev libdrm-dev libxshmfence-dev libcups2-dev' +deps 'flex bison gperf ruby python2 libx11-dev libxext-dev libxfixes-dev libxi-dev libxrender-dev libxcb1-dev libx11-xcb-dev libxcb-glx0-dev libxcb-keysyms1-dev libxcb-image0-dev libxcb-shm0-dev libxcb-icccm4-dev libxcb-sync0-dev libxcb-xfixes0-dev libxcb-shape0-dev libxcb-randr0-dev libxcb-render-util0-dev libxcb-xinerama0-dev libxcb-util-dev xkb-data libglu1-mesa-dev libxkbcommon-dev libinput-dev libxkbcommon-x11-dev libxkbfile-dev libgtk2.0-dev libvulkan-dev libwayland-dev libwayland-egl1-mesa libxcb-xkb-dev libegl1-mesa-dev libxtst-dev libnss3-dev libfreetype6-dev libfontconfig-dev libdrm-dev libxshmfence-dev libcups2-dev libxcb-cursor-dev' diff --git a/bypy/linux/__main__.py b/bypy/linux/__main__.py index 025a47e02c..ebdfe96644 100644 --- a/bypy/linux/__main__.py +++ b/bypy/linux/__main__.py @@ -13,13 +13,14 @@ import time from functools import partial from bypy.constants import ( - OUTPUT_DIR, PREFIX, SRC as CALIBRE_DIR, python_major_minor_version + LIBDIR, OUTPUT_DIR, PREFIX, SRC as CALIBRE_DIR, python_major_minor_version, ) from bypy.freeze import ( - extract_extension_modules, fix_pycryptodome, freeze_python, path_to_freeze_dir + extract_extension_modules, fix_pycryptodome, freeze_python, is_package_dir, + path_to_freeze_dir, ) from bypy.utils import ( - create_job, get_dll_path, mkdtemp, parallel_build, py_compile, run, walk + create_job, get_dll_path, mkdtemp, parallel_build, py_compile, run, walk, ) j = os.path.join @@ -38,15 +39,15 @@ qt_get_dll_path = partial(get_dll_path, loc=os.path.join(QT_PREFIX, 'lib')) def binary_includes(): return [ - j(PREFIX, 'bin', x) for x in ('pdftohtml', 'pdfinfo', 'pdftoppm', 'pdftotext', 'optipng', 'JxrDecApp')] + [ + j(PREFIX, 'bin', x) for x in ('pdftohtml', 'pdfinfo', 'pdftoppm', 'pdftotext', 'optipng', 'cwebp', 'JxrDecApp')] + [ j(PREFIX, 'private', 'mozjpeg', 'bin', x) for x in ('jpegtran', 'cjpeg')] + [ ] + list(map( get_dll_path, ('usb-1.0 mtp expat sqlite3 ffi z lzma openjp2 poppler dbus-1 iconv xml2 xslt jpeg png16' - ' webp webpmux webpdemux exslt ncursesw readline chm hunspell-1.7 hyphen' + ' webp webpmux webpdemux sharpyuv exslt ncursesw readline chm hunspell-1.7 hyphen' ' icudata icui18n icuuc icuio stemmer gcrypt gpg-error uchardet graphite2' - ' brotlicommon brotlidec brotlienc' + ' brotlicommon brotlidec brotlienc zstd podofo ssl crypto tiff' ' gobject-2.0 glib-2.0 gthread-2.0 gmodule-2.0 gio-2.0 dbus-glib-1').split() )) + [ # debian/ubuntu for for some typical stupid reason use libpcre.so.3 @@ -56,8 +57,8 @@ def binary_includes(): # than libc and libpthread we bundle the Ubuntu one here glob.glob('/usr/lib/*/libpcre.so.3')[0], - get_dll_path('podofo', 3), get_dll_path('bz2', 2), j(PREFIX, 'lib', 'libunrar.so'), - get_dll_path('ssl', 2), get_dll_path('crypto', 2), get_dll_path('python' + py_ver, 2), + get_dll_path('bz2', 2), j(PREFIX, 'lib', 'libunrar.so'), + get_dll_path('python' + py_ver, 2), get_dll_path('jbig', 2), # We dont include libstdc++.so as the OpenGL dlls on the target # computer fail to load in the QPA xcb plugin if they were compiled @@ -90,9 +91,8 @@ def ignore_in_lib(base, items, ignored_dirs=None): for name in items: path = j(base, name) if os.path.isdir(path): - if name in ignored_dirs or not os.path.exists(j(path, '__init__.py')): - if name != 'plugins': - ans.append(name) + if name != 'plugins' and (name in ignored_dirs or not is_package_dir(path)): + ans.append(name) else: if name.rpartition('.')[-1] not in ('so', 'py'): ans.append(name) @@ -112,7 +112,7 @@ def import_site_packages(srcdir, dest): src = os.path.abspath(j(srcdir, line)) if os.path.exists(src) and os.path.isdir(src): import_site_packages(src, dest) - elif os.path.exists(j(f, '__init__.py')): + elif is_package_dir(f): shutil.copytree(f, j(dest, x), ignore=ignore_in_lib) @@ -125,6 +125,8 @@ def copy_libs(env): os.chmod(j( dest, os.path.basename(x)), stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) + for x in ('ossl-modules',): + shutil.copytree(os.path.join(LIBDIR, x), os.path.join(env.lib_dir, x)) base = j(QT_PREFIX, 'plugins') dest = j(env.lib_dir, '..', 'plugins') @@ -161,7 +163,7 @@ def copy_python(env, ext_dir): elif os.path.isfile(c): shutil.copy2(c, j(dest, x)) shutil.copytree(j(env.src_root, 'resources'), j(env.base, 'resources')) - for pak in glob.glob(j(QT_PREFIX, 'resources', '*.pak')): + for pak in glob.glob(j(QT_PREFIX, 'resources', '*')): shutil.copy2(pak, j(env.base, 'resources')) os.mkdir(j(env.base, 'translations')) shutil.copytree(j(QT_PREFIX, 'translations', 'qtwebengine_locales'), j(env.base, 'translations', 'qtwebengine_locales')) diff --git a/bypy/linux/launcher.c b/bypy/linux/launcher.c index fc9cbbf7c6..d49204f45c 100644 --- a/bypy/linux/launcher.c +++ b/bypy/linux/launcher.c @@ -26,17 +26,25 @@ int main(int argc, char **argv) { } strncpy(lib, buf, PATHLEN); strncpy(base, dirname(lib), PATHLEN); - snprintf(exe, PATHLEN, "%s/bin/%s", base, basename(buf)); + int ret = snprintf(exe, PATHLEN, "%s/bin/%s", base, basename(buf)); + if (ret < 0 || ret > (PATHLEN-2)) { fprintf(stderr, "Path to executable too long: %s/bin/%s", base, basename(buf)); return 1; } memset(lib, 0, PATHLEN); - snprintf(lib, PATHLEN, "%s/lib", base); + ret = snprintf(lib, PATHLEN, "%s/lib", base); + if (ret < 0 || ret > (PATHLEN-2)) { fprintf(stderr, "Path to lib too long: %s/lib", base); return 1; } SET("CALIBRE_QT_PREFIX", base) memset(buf, 0, PATHLEN); ldp = getenv("LD_LIBRARY_PATH"); if (ldp == NULL) strncpy(buf, lib, PATHLEN); - else snprintf(buf, PATHLEN, "%s:%s", lib, ldp); + else { + ret = snprintf(buf, PATHLEN, "%s:%s", lib, ldp); + if (ret < 0 || ret > (PATHLEN-2)) { fprintf(stderr, "LD_LIBRARY_PATH too long: %s:%s", lib, ldp); return 1; } + } SET("LD_LIBRARY_PATH", buf) + ret = snprintf(buf, PATHLEN, "%s/ossl-modules", lib); + if (ret < 0 || ret > (PATHLEN-2)) { fprintf(stderr, "OPENSSL_MODULES too long: %s/ossl-modules", lib); return 1; } + SET("OPENSSL_MODULES", buf) argv[0] = exe; if (execv(exe, argv) == -1) { diff --git a/bypy/macos.conf b/bypy/macos.conf index 7f278e1bb4..031a5b51e4 100644 --- a/bypy/macos.conf +++ b/bypy/macos.conf @@ -1,7 +1,9 @@ -# Requires installation of XCode 10.3 and Python 3 and +# Requires installation of XCode 14.3 and # python3 -m pip install certifi html5lib -vm_name 'macos-calibre-qt6' +vm_name 'macos-calibre' root '/Users/Shared/calibre-build' -python '/usr/local/bin/python3' +python '/usr/bin/python3' +rsync '/usr/local/bin/rsync' +deploy_target '11.0' universal 'true' diff --git a/bypy/macos/__main__.py b/bypy/macos/__main__.py index 3627540407..f0f9d43a93 100644 --- a/bypy/macos/__main__.py +++ b/bypy/macos/__main__.py @@ -20,13 +20,14 @@ from functools import partial, reduce from itertools import repeat from bypy.constants import ( - OUTPUT_DIR, PREFIX, PYTHON, SRC as CALIBRE_DIR, python_major_minor_version + OUTPUT_DIR, PREFIX, PYTHON, SRC as CALIBRE_DIR, python_major_minor_version, ) from bypy.freeze import ( - extract_extension_modules, fix_pycryptodome, freeze_python, path_to_freeze_dir + extract_extension_modules, fix_pycryptodome, freeze_python, is_package_dir, + path_to_freeze_dir, ) from bypy.utils import ( - current_dir, get_arches_in_binary, mkdtemp, py_compile, timeit, walk + current_dir, get_arches_in_binary, mkdtemp, py_compile, timeit, walk, ) abspath, join, basename, dirname = os.path.abspath, os.path.join, os.path.basename, os.path.dirname @@ -44,11 +45,14 @@ ENV = dict( FONTCONFIG_PATH='@executable_path/../Resources/fonts', FONTCONFIG_FILE='@executable_path/../Resources/fonts/fonts.conf', SSL_CERT_FILE='@executable_path/../Resources/resources/mozilla-ca-certs.pem', + OPENSSL_ENGINES='@executable_path/../Frameworks/engines-3', + OPENSSL_MODULES='@executable_path/../Frameworks/ossl-modules', ) APPNAME, VERSION = calibre_constants['appname'], calibre_constants['version'] basenames, main_modules, main_functions = calibre_constants['basenames'], calibre_constants['modules'], calibre_constants['functions'] ARCH_FLAGS = '-arch x86_64 -arch arm64'.split() EXPECTED_ARCHES = {'x86_64', 'arm64'} +MINIMUM_SYSTEM_VERSION = '13.0.0' def compile_launcher_lib(contents_dir, gcc, base, pyver, inc_dir): @@ -99,7 +103,7 @@ def compile_launchers(contents_dir, inc_dir, xprograms, pyver): is_gui = 'true' if ptype == 'gui' else 'false' cmd = [gcc] + ARCH_FLAGS + [ '-Wall', f'-DPROGRAM=L"{program}"', f'-DMODULE=L"{module}"', f'-DFUNCTION=L"{func}"', f'-DIS_GUI={is_gui}', - '-I' + base, src, lib, '-o', out, '-headerpad_max_install_names' + '-I' + base, src, lib, '-o', out, '-headerpad_max_install_names', ] # print('\t'+' '.join(cmd)) sys.stdout.flush() @@ -270,12 +274,10 @@ class Freeze: @flush def get_local_dependencies(self, path_to_lib): for x, is_id in self.get_dependencies(path_to_lib): - if x.startswith('@rpath/Qt') or x.startswith('@rpath/libexpat'): - yield x, x[len('@rpath/'):], is_id - elif x in ('libunrar.dylib', 'libstemmer.0.dylib', 'libstemmer.dylib') and not is_id: + if x in ('libunrar.dylib', 'libstemmer.0.dylib', 'libstemmer.dylib', 'libjbig.2.1.dylib') and not is_id: yield x, x, is_id else: - for y in (PREFIX + '/lib/', PREFIX + '/python/Python.framework/'): + for y in ('@rpath/', PREFIX + '/lib/', PREFIX + '/python/Python.framework/'): if x.startswith(y): if y == PREFIX + '/python/Python.framework/': y = PREFIX + '/python/' @@ -395,7 +397,7 @@ class Freeze: CFBundlePackageType='APPL', CFBundleSignature='????', CFBundleExecutable='pdftohtml', - LSMinimumSystemVersion='10.15.0', + LSMinimumSystemVersion=MINIMUM_SYSTEM_VERSION, LSRequiresNativeExecution=True, NSAppleScriptEnabled=False, CFBundleIconFile='', @@ -414,9 +416,6 @@ class Freeze: raise SystemExit('No calibre plugins found in: ' + self.ext_dir) for f in plugins: self.fix_dependencies_in_lib(f) - if f.endswith('/podofo.so'): - self.change_dep('libpodofo.0.9.7.dylib', - '@executable_path/../Frameworks/libpodofo.0.9.7.dylib', False, f) @flush def create_plist(self): @@ -448,7 +447,7 @@ class Freeze: CFBundleExecutable='calibre', CFBundleDocumentTypes=docs, CFBundleURLTypes=url_handlers, - LSMinimumSystemVersion='10.15.0', + LSMinimumSystemVersion=MINIMUM_SYSTEM_VERSION, LSRequiresNativeExecution=True, NSAppleScriptEnabled=False, NSSupportsAutomaticGraphicsSwitching=True, @@ -476,13 +475,13 @@ class Freeze: @flush def add_podofo(self): print('\nAdding PoDoFo') - pdf = join(PREFIX, 'lib', 'libpodofo.0.9.7.dylib') + pdf = join(PREFIX, 'lib', 'libpodofo.2.dylib') self.install_dylib(pdf) @flush def add_poppler(self): print('\nAdding poppler') - for x in ('libopenjp2.7.dylib', 'libpoppler.115.dylib',): + for x in ('libopenjp2.7.dylib', 'libpoppler.130.dylib',): self.install_dylib(join(PREFIX, 'lib', x)) for x in ('pdftohtml', 'pdftoppm', 'pdfinfo', 'pdftotext'): self.install_dylib( @@ -491,9 +490,9 @@ class Freeze: @flush def add_imaging_libs(self): print('\nAdding libjpeg, libpng, libwebp, optipng and mozjpeg') - for x in ('jpeg.8', 'png16.16', 'webp.7', 'webpmux.3', 'webpdemux.2'): + for x in ('jpeg.8', 'png16.16', 'webp.7', 'webpmux.3', 'webpdemux.2', 'sharpyuv.0'): self.install_dylib(join(PREFIX, 'lib', 'lib%s.dylib' % x)) - for x in 'optipng', 'JxrDecApp': + for x in 'optipng', 'JxrDecApp', 'cwebp': self.install_dylib(join(PREFIX, 'bin', x), set_id=False, dest=self.helpers_dir) for x in ('jpegtran', 'cjpeg'): self.install_dylib( @@ -527,10 +526,10 @@ class Freeze: def add_misc_libraries(self): for x in ( 'usb-1.0.0', 'mtp.9', 'chm.0', 'sqlite3.0', 'hunspell-1.7.0', - 'icudata.70', 'icui18n.70', 'icuio.70', 'icuuc.70', 'hyphen.0', 'uchardet.0', + 'icudata.73', 'icui18n.73', 'icuio.73', 'icuuc.73', 'hyphen.0', 'uchardet.0', 'stemmer.0', 'xslt.1', 'exslt.0', 'xml2.2', 'z.1', 'unrar', 'lzma.5', - 'brotlicommon.1', 'brotlidec.1', 'brotlienc.1', - 'crypto.1.1', 'ssl.1.1', 'iconv.2', # 'ltdl.7' + 'brotlicommon.1', 'brotlidec.1', 'brotlienc.1', 'zstd.1', 'jbig.2.1', 'tiff.6', + 'crypto.3', 'ssl.3', 'iconv.2', # 'ltdl.7' ): print('\nAdding', x) x = 'lib%s.dylib' % x @@ -540,6 +539,16 @@ class Freeze: self.set_id(dest, self.FID + '/' + x) self.fix_dependencies_in_lib(dest) + # OpenSSL modules and engines + for x in ('ossl-modules', 'engines-3'): + dest = join(self.frameworks_dir, x) + shutil.copytree(join(PREFIX, 'lib', x), dest) + for dylib in os.listdir(dest): + if dylib.endswith('.dylib'): + dylib = join(dest, dylib) + self.set_id(dylib, self.FID + '/' + x + '/' + os.path.basename(dylib)) + self.fix_dependencies_in_lib(dylib) + @flush def add_site_packages(self): print('\nAdding site-packages') @@ -587,7 +596,7 @@ class Freeze: def add_packages_from_dir(self, src): for x in os.listdir(src): x = join(src, x) - if os.path.isdir(x) and os.path.exists(join(x, '__init__.py')): + if os.path.isdir(x) and is_package_dir(x): if self.filter_package(basename(x)): continue self.add_package_dir(x) @@ -742,15 +751,24 @@ class Freeze: e = plist['CFBundleDocumentTypes'][0] e['CFBundleTypeExtensions'] = [x.lower() for x in formats] + def headless_plist(plist): + plist['CFBundleDisplayName'] = 'calibre worker process' + plist['CFBundleExecutable'] = 'calibre-parallel' + plist['CFBundleIdentifier'] = 'com.calibre-ebook.calibre-parallel' + plist['LSBackgroundOnly'] = '1' + plist.pop('CFBundleDocumentTypes') + self.create_app_clone('ebook-viewer.app', partial(specialise_plist, 'ebook-viewer', input_formats)) self.create_app_clone('ebook-edit.app', partial(specialise_plist, 'ebook-edit', edit_formats), base_dir=join(self.contents_dir, 'ebook-viewer.app', 'Contents')) + self.create_app_clone('headless.app', headless_plist, + base_dir=join(self.contents_dir, 'ebook-viewer.app', 'Contents', 'ebook-edit.app', 'Contents')) # We need to move the webengine resources into the deepest sub-app # because the sandbox gets set to the nearest enclosing app which # means that WebEngine will fail to access its resources when running # in the sub-apps unless they are present inside the sub app bundle # somewhere - base_dest = join(self.contents_dir, 'ebook-viewer.app', 'Contents', 'ebook-edit.app', 'Contents', 'SharedSupport') + base_dest = join(self.contents_dir, 'ebook-viewer.app', 'Contents', 'ebook-edit.app', 'Contents', 'headless.app', 'Contents', 'SharedSupport') os.mkdir(base_dest) base_src = os.path.realpath(join(self.frameworks_dir, 'QtWebEngineCore.framework/Resources')) items = [join(base_src, 'qtwebengine_locales')] + glob.glob(join(base_src, '*.pak')) + glob.glob(join(base_src, '*.dat')) @@ -810,7 +828,8 @@ def main(args, ext_dir, test_runner): build_dir = abspath(join(mkdtemp('frozen-'), APPNAME + '.app')) inc_dir = abspath(mkdtemp('include')) if args.skip_tests: - test_runner = lambda *a: None + def test_runner(*a): + return None Freeze(build_dir, ext_dir, inc_dir, test_runner, dont_strip=args.dont_strip, sign_installers=args.sign_installers, notarize=args.notarize) diff --git a/bypy/rsync.conf b/bypy/rsync.conf index 8625f7dcaa..64bd448c51 100644 --- a/bypy/rsync.conf +++ b/bypy/rsync.conf @@ -1 +1 @@ -to_vm_excludes '/imgsrc /build /dist /manual /format_docs /translations /.build-cache /tags /Changelog* *.so *.pyd' +to_vm_excludes '/imgsrc /build /dist /manual /format_docs /translations /.build-cache /.cache /tags /Changelog* *.so *.pyd' diff --git a/bypy/sources.json b/bypy/sources.json index 8419f30e2a..810f5e5905 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -3,9 +3,14 @@ "name": "nasm", "os": "macos,windows", "unix": { - "filename": "nasm-2.15.05.tar.xz", - "hash": "sha256:3caf6729c1073bf96629b57cee31eeb54f4f8129b01902c73428836550b30a3f", - "urls": ["https://www.nasm.us/pub/nasm/releasebuilds/2.15.05/{filename}"] + "filename": "nasm-2.16.01.tar.xz", + "hash": "sha256:c77745f4802375efeee2ec5c0ad6b7f037ea9c87c92b149a9637ff099f162558", + "urls": ["https://www.nasm.us/pub/nasm/releasebuilds/2.16.01/{filename}"] + }, + "windows": { + "filename": "nasm-2.16.01-win64.zip", + "hash": "sha256:029eed31faf0d2c5f95783294432cbea6c15bf633430f254bb3c1f195c67ca3a", + "urls": ["https://www.nasm.us/pub/nasm/releasebuilds/2.16.01/win64/{filename}"] } }, @@ -13,9 +18,9 @@ "name": "cmake", "os": "macos", "unix": { - "filename": "cmake-3.22.0.tar.gz", - "hash": "sha256:998c7ba34778d2dfdb3df8a695469e24b11e2bfa21fbe41b361a3f45e1c9345e", - "urls": ["https://github.com/Kitware/CMake/releases/download/v3.22.0/cmake-3.22.0.tar.gz"] + "filename": "cmake-3.27.6.tar.gz", + "hash": "sha256:ef3056df528569e0e8956f6cf38806879347ac6de6a4ff7e4105dc4578732cfb", + "urls": ["https://github.com/Kitware/CMake/releases/download/v3.27.6/{filename}"] } }, @@ -23,9 +28,9 @@ "name": "autoconf", "os": "macos", "unix": { - "filename": "autoconf-2.69.tar.xz", - "hash": "sha256:64ebcec9f8ac5b2487125a86a7760d2591ac9e1d3dbd59489633f9de62a57684", - "urls": ["ftp://ftp.gnu.org/gnu/autoconf/{filename}"] + "filename": "autoconf-2.71.tar.xz", + "hash": "sha256:f14c83cfebcc9427f2c3cea7258bd90df972d92eb26752da4ddad81c87a0faa4", + "urls": ["https://ftp.gnu.org/gnu/autoconf/{filename}"] } }, @@ -33,9 +38,9 @@ "name": "automake", "os": "macos", "unix": { - "filename": "automake-1.16.tar.xz", - "hash": "sha256:f98f2d97b11851cbe7c2d4b4eaef498ae9d17a3c2ef1401609b7b4ca66655b8a", - "urls": ["ftp://ftp.gnu.org/gnu/automake/{filename}"] + "filename": "automake-1.16.5.tar.xz", + "hash": "sha256:f01d58cd6d9d77fbdca9eb4bbd5ead1988228fdb73d6f7a201f5f8d6b118b469", + "urls": ["https://ftp.gnu.org/gnu/automake/{filename}"] } }, @@ -43,9 +48,9 @@ "name": "libtool", "os": "macos", "unix": { - "filename": "libtool-2.4.6.tar.xz", - "hash": "sha256:7c87a8c2c8c0fc9cd5019e402bed4292462d00a718a7cd5f11218153bf28b26f", - "urls": ["ftp://ftp.gnu.org/gnu/libtool/{filename}"] + "filename": "libtool-2.4.7.tar.xz", + "hash": "sha256:4f7f217f057ce655ff22559ad221a0fd8ef84ad1fc5fcb6990cecc333aa1635d", + "urls": ["https://ftp.gnu.org/gnu/libtool/{filename}"] } }, @@ -62,8 +67,8 @@ { "name": "zlib", "unix": { - "filename": "zlib-1.2.11.tar.xz", - "hash": "sha256:4ff941449631ace0d4d203e3483be9dbc9da454084111f97ea0a2114e19bf066", + "filename": "zlib-1.3.tar.xz", + "hash": "sha256:8a9ba2898e1d0d774eca6ba5b4627a11e5588ba85c8851336eb38de4683050a7", "urls": ["https://zlib.net/{filename}"] } }, @@ -82,8 +87,8 @@ "name": "xz", "os": "macos,linux", "unix": { - "filename": "xz-5.2.5.tar.gz", - "hash": "md5:0d270c997aff29708c74d53f599ef717", + "filename": "xz-5.4.4.tar.gz", + "hash": "sha256:aae39544e254cfd27e942d35a048d592959bd7a79f9a624afb0498bb5613bdf8", "urls": ["https://tukaani.org/xz/{filename}"] } }, @@ -91,8 +96,8 @@ { "name": "unrar", "unix": { - "filename": "unrarsrc-6.1.2.tar.gz", - "hash": "sha256:3e96421f568e438af6dcdaef717c48eb93b825d97058ebcb173b9bfc57807be3", + "filename": "unrarsrc-6.2.11.tar.gz", + "hash": "sha256:a805e150d56445770f71a85c3fbdc9ab4b04fbe61b01cb57182d86fe3e7e6cec", "urls": ["https://www.rarlab.com/rar/{filename}"] } }, @@ -100,28 +105,36 @@ { "name": "brotli", "unix": { - "filename": "brotli-1.0.9.tar.gz", - "hash": "sha256:f9e8d81d0405ba66d181529af42a3354f838c939095ff99930da6aa9cdf6fe46", - "urls": ["https://github.com/google/brotli/archive/refs/tags/v1.0.9.tar.gz"] + "filename": "libbrotli-1.1.0.tar.gz", + "hash": "sha256:e720a6ca29428b803f4ad165371771f5398faba397edf6778837a18599ea13ff", + "urls": ["https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz"] } }, + { + "name": "zstd", + "unix": { + "filename": "zstd-1.5.5.tar.gz", + "hash": "sha256:9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4", + "urls": ["https://github.com/facebook/zstd/releases/download/v1.5.5/{filename}"] + } + }, { "name": "expat", "unix": { - "filename": "expat-2.4.1.tar.bz2", - "hash": "sha256:2f9b6a580b94577b150a7d5617ad4643a4301a6616ff459307df3e225bcfbf40", - "urls": ["https://github.com/libexpat/libexpat/releases/download/R_2_4_1/{filename}"] + "filename": "expat-2.5.0.tar.bz2", + "hash": "sha256:6f0e6e01f7b30025fa05c85fdad1e5d0ec7fd35d9f61b22f34998de11969ff67", + "urls": ["https://github.com/libexpat/libexpat/releases/download/R_2_5_0/{filename}"] } }, { "name": "sqlite", "unix": { - "filename": "sqlite-autoconf-3360000.tar.gz", - "hash": "sha256:bd90c3eb96bee996206b83be7065c9ce19aef38c3f4fb53073ada0d0b69bbce3", - "urls": ["https://www.sqlite.org/2021/{filename}"] + "filename": "sqlite-autoconf-3430000.tar.gz", + "hash": "sha256:49008dbf3afc04d4edc8ecfc34e4ead196973034293c997adad2f63f01762ae1", + "urls": ["https://www.sqlite.org/2023/{filename}"] } }, @@ -129,9 +142,9 @@ "name": "libffi", "os": "linux", "unix": { - "filename": "libffi-3.4.2.tar.gz", - "hash": "sha256:540fb721619a6aba3bdeef7d940d8e9e0e6d2c193595bc243241b77ff9e93620", - "urls": ["https://github.com/libffi/libffi/releases/download/v3.4.2/{filename}"] + "filename": "libffi-3.4.4.tar.gz", + "hash": "sha256:d66c56ad259a82cf2a9dfc408b32bf5da52371500b84745f7fb8b645712df676", + "urls": ["https://github.com/libffi/libffi/releases/download/v3.4.4/{filename}"] } }, @@ -147,8 +160,8 @@ { "name": "openssl", "unix": { - "filename": "openssl-1.1.1l.tar.gz", - "hash": "sha256:0b7a3e5e59c34827fe0c3a74b7ec8baef302b98fa80088d7f9153aa16fa76bd1", + "filename": "openssl-3.1.3.tar.gz", + "hash": "sha256:f0316a2ebd89e7f2352976445458689f80302093788c466692fb2a188b2eacf6", "urls": ["https://www.openssl.org/source/{filename}"] } }, @@ -157,9 +170,9 @@ "name": "ncurses", "os": "linux", "unix": { - "filename": "ncurses-6.3.tar.gz", - "hash": "sha256:97fc51ac2b085d4cde31ef4d2c3122c21abc217e9090a43a30fc5ec21684e059", - "urls": ["ftp://ftp.gnu.org/gnu/ncurses/{filename}"] + "filename": "ncurses-6.4.tar.gz", + "hash": "sha256:6931283d9ac87c5073f30b6290c4c75f21632bb4fc3603ac8100812bed248159", + "urls": ["https://ftp.gnu.org/gnu/ncurses/{filename}"] } }, @@ -167,32 +180,32 @@ "name": "readline", "os": "linux", "unix": { - "filename": "readline-8.1.tar.gz", - "hash": "sha256:f8ceb4ee131e3232226a17f51b164afc46cd0b9e6cef344be87c65962cb82b02", - "urls": ["http://ftp.gnu.org/gnu/readline/{filename}"] + "filename": "readline-8.2.tar.gz", + "hash": "sha256:3feb7171f16a84ee82ca18a36d7b9be109a52c04f492a053331d7d1095007c35", + "urls": ["https://ftp.gnu.org/gnu/readline/{filename}"] } }, { "name": "python", "unix": { - "filename": "Python-3.10.1.tar.xz", - "hash": "sha256:a7f1265b6e1a5de1ec5c3ec7019ab53413469934758311e9d240c46e5ae6e177", - "urls": ["https://www.python.org/ftp/python/3.10.1/{filename}"] + "filename": "Python-3.11.5.tar.xz", + "hash": "sha256:85cd12e9cf1d6d5a45f17f7afe1cebe7ee628d3282281c492e86adf636defa3f", + "urls": ["https://www.python.org/ftp/python/3.11.5/{filename}"] } }, { "name": "icu", "unix": { - "filename": "icu4c-70_1-src.tgz", - "hash": "sha256:8d205428c17bf13bb535300669ed28b338a157b1c01ae66d31d0d3e2d47c3fd5", - "urls": ["https://github.com/unicode-org/icu/releases/download/release-70-1/{filename}"] + "filename": "icu4c-73_2-src.tgz", + "hash": "sha256:818a80712ed3caacd9b652305e01afc7fa167e6f2e94996da44b90c2ab604ce1", + "urls": ["https://github.com/unicode-org/icu/releases/download/release-73-2/{filename}"] }, "windows": { - "filename": "icu4c-70_1-src.zip", - "hash": "sha256:0d41e13364af260e330fdc5d2d60531564108d6a1234209f5712f8d9693315a7", - "urls": ["https://github.com/unicode-org/icu/releases/download/release-70-1/{filename}"] + "filename": "icu4c-73_2-src.zip", + "hash": "sha256:2c4300315141942178502f85f1167c0a6a65847c9b683e95c79535b373c1e369", + "urls": ["https://github.com/unicode-org/icu/releases/download/release-73-2/{filename}"] } }, @@ -209,26 +222,45 @@ { "name": "libjpeg", "unix": { - "filename": "libjpeg-turbo-2.1.2.tar.gz", - "hash": "sha256:09b96cb8cbff9ea556a9c2d173485fd19488844d55276ed4f42240e1e2073ce5", - "urls": ["http://downloads.sourceforge.net/project/libjpeg-turbo/2.1.2/{filename}"] + "filename": "libjpeg-turbo-3.0.0.tar.gz", + "hash": "sha256:c77c65fcce3d33417b2e90432e7a0eb05f59a7fff884022a9d931775d583bfaa", + "urls": ["https://downloads.sourceforge.net/project/libjpeg-turbo/3.0.0/{filename}"] } }, { "name": "libpng", "unix": { - "filename": "libpng-1.6.37.tar.xz", - "hash": "sha256:505e70834d35383537b6491e7ae8641f1a4bed1876dbfe361201fc80868d88ca", - "urls": ["http://downloads.sourceforge.net/sourceforge/libpng/{filename}"] + "filename": "libpng-1.6.40.tar.xz", + "hash": "sha256:535b479b2467ff231a3ec6d92a525906fb8ef27978be4f66dbe05d3f3a01b3a1", + "urls": ["https://downloads.sourceforge.net/sourceforge/libpng/{filename}"] + } + }, + + { + "name": "libjbig", + "comment": "Needed for libtiff", + "unix": { + "filename": "jbigkit-2.1.tar.gz", + "hash": "sha256:de7106b6bfaf495d6865c7dd7ac6ca1381bd12e0d81405ea81e7f2167263d932", + "urls": ["https://www.cl.cam.ac.uk/~mgk25/jbigkit/download/{filename}"] + } + }, + + { + "name": "libtiff", + "unix": { + "filename": "tiff-4.6.0.tar.xz", + "hash": "sha256:e178649607d1e22b51cf361dd20a3753f244f022eefab1f2f218fc62ebaf87d2", + "urls": ["http://download.osgeo.org/libtiff/{filename}"] } }, { "name": "libwebp", "unix": { - "filename": "libwebp-1.2.1.tar.gz", - "hash": "sha256:808b98d2f5b84e9b27fdef6c5372dac769c3bda4502febbfa5031bd3c4d7d018", + "filename": "libwebp-1.3.2.tar.gz", + "hash": "sha256:2a499607df669e40258e53d0ade8035ba4ec0175244869d1025d460562aa09b4", "urls": ["http://downloads.webmproject.org/releases/webp/{filename}"] } }, @@ -246,8 +278,8 @@ "name": "freetype", "os": "macos,windows", "unix": { - "filename": "freetype-2.11.0.tar.xz", - "hash": "sha256:8bee39bd3968c4804b70614a0a3ad597299ad0e824bc8aad5ce8aaf48067bde7", + "filename": "freetype-2.13.2.tar.xz", + "hash": "sha256:12991c4e55c506dd7f9b765933e62fd2be2e06d421505d7950a132e4f1bb484d", "urls": ["https://download.savannah.gnu.org/releases/freetype/{filename}"] } }, @@ -262,46 +294,44 @@ } }, - { "name": "fontconfig", "os": "macos", "unix": { - "filename": "fontconfig-2.13.94.tar.xz", - "hash": "sha256:a5f052cb73fd479ffb7b697980510903b563bbb55b8f7a2b001fcfb94026003c", + "filename": "fontconfig-2.14.2.tar.xz", + "hash": "sha256:dba695b57bce15023d2ceedef82062c2b925e51f5d4cc4aef736cf13f60a468b", "urls": ["https://www.fontconfig.org/release/{filename}"] } }, - { "name": "iconv", "unix": { - "filename": "libiconv-1.16.tar.gz", - "hash": "sha256:e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04", + "filename": "libiconv-1.17.tar.gz", + "hash": "sha256:8f74213b56238c85a50a5329f77e06198771e70dd9a739779f4c02f65d971313", "urls": ["https://ftp.gnu.org/pub/gnu/libiconv/{filename}"] }, "windows": { "filename": "libiconv-for-Windows-1.16.zip", - "hash": "sha256:d542e635cad954a62a2b451b5644e855b848398917e93adf46d8da4c9cc88b6d", - "urls": ["https://github.com/pffang/libiconv-for-Windows/archive/v1.16.zip"] + "hash": "sha256:61b4445e67a3855945f88908c3c86801ef328feac15ef558b7e471269eca49fb", + "urls": ["https://github.com/pffang/libiconv-for-Windows/archive/1353455a6c4e15c9db6865fd9c2bf7203b59c0ec.zip"] } }, { "name": "libxml2", "unix": { - "filename": "libxml2-2.9.14.tar.xz", - "hash": "sha256:60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee", - "urls": ["https://download.gnome.org/sources/libxml2/2.9/{filename}"] + "filename": "libxml2-2.12.1.tar.xz", + "hash": "sha256:8982b9ccdf7f456e30d8f7012d50858c6623e495333b6191def455c7e95427eb", + "urls": ["https://download.gnome.org/sources/libxml2/2.12/{filename}"] } }, { "name": "libxslt", "unix": { - "filename": "libxslt-1.1.35.tar.xz", - "hash": "sha256:8247f33e9a872c6ac859aa45018bc4c4d00b97e2feac9eebc10c93ce1f34dd79", + "filename": "libxslt-1.1.39.tar.xz", + "hash": "sha256:2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0", "urls": ["https://download.gnome.org/sources/libxslt/1.1/{filename}"] } }, @@ -327,9 +357,9 @@ { "name": "mozjpeg", "unix": { - "filename": "mozjpeg-4.0.3.tar.gz", - "hash": "sha256:4f22731db2afa14531a5bf2633d8af79ca5cb697a550f678bf43f24e5e409ef0", - "urls": ["https://github.com/mozilla/mozjpeg/archive/v4.0.3.tar.gz"] + "filename": "mozjpeg-4.1.4.tar.gz", + "hash": "sha256:8a5018dc93c08a49fba3aff8d1a0be0a3e26460e315d1db45abb64402935e0db", + "urls": ["https://github.com/mozilla/mozjpeg/archive/v4.1.4.tar.gz"] } }, @@ -337,9 +367,9 @@ "name": "libusb", "os": "macos, linux", "unix": { - "filename": "libusb-1.0.24.tar.bz2", - "hash": "sha256:7efd2685f7b327326dcfb85cee426d9b871fd70e22caa15bb68d595ce2a2b12a", - "urls": ["https://github.com/libusb/libusb/releases/download/v1.0.24/{filename}"] + "filename": "libusb-1.0.26.tar.bz2", + "hash": "sha256:12ce7a61fc9854d1d2a1ffe095f7b5fac19ddba095c259e6067a46500381b5a5", + "urls": ["https://github.com/libusb/libusb/releases/download/v1.0.26/{filename}"] } }, @@ -347,8 +377,8 @@ "name": "libmtp", "os": "macos, linux", "unix": { - "filename": "libmtp-1.1.20.tar.gz", - "hash": "sha256:c9191dac2f5744cf402e08641610b271f73ac21a3c802734ec2cedb2c6bc56d0", + "filename": "libmtp-1.1.21.tar.gz", + "hash": "sha256:c4ffa5ab8c8f48c91b0047f2e253c101c418d5696a5ed65c839922a4280872a7", "urls": ["https://downloads.sourceforge.net/libmtp/{filename}"] } }, @@ -356,17 +386,17 @@ { "name": "openjpeg", "unix": { - "filename": "openjpeg-2.4.0.tar.gz", - "hash": "sha256:8702ba68b442657f11aaeb2b338443ca8d5fb95b0d845757968a7be31ef7f16d", - "urls": ["https://github.com/uclouvain/openjpeg/archive/v2.4.0/{filename}"] + "filename": "openjpeg-2.5.0.tar.gz", + "hash": "sha256:0333806d6adecc6f7a91243b2b839ff4d2053823634d4f6ed7a59bc87409122a", + "urls": ["https://github.com/uclouvain/openjpeg/archive/v2.5.0/{filename}"] } }, { "name": "poppler", "unix": { - "filename": "poppler-21.11.0.tar.xz", - "hash": "sha256:31b76b5cac0a48612fdd154c02d9eca01fd38fb8eaa77c1196840ecdeb53a584", + "filename": "poppler-23.08.0.tar.xz", + "hash": "sha256:4a4bf7fc903b9f1a2ab7d04b7c5d8220db9bc6261cc73fdb9a826dc272f49aa8", "urls": ["https://poppler.freedesktop.org/{filename}"] } }, @@ -374,9 +404,9 @@ { "name": "podofo", "unix": { - "filename": "podofo-0.9.7.tar.gz", - "hash": "sha256:7cf2e716daaef89647c54ffcd08940492fd40c385ef040ce7529396bfadc1eb8", - "urls": ["https://downloads.sourceforge.net/podofo/{filename}"] + "filename": "podofo-0.10.3.tar.gz", + "hash": "sha256:61f4d2175b75726d2881ad027c0275b8f5814b53058625d7b3411a055e2dca79", + "urls": ["https://github.com/podofo/podofo/archive/f7797f620f151475d05c87c1fab3db20b2f00c0e.tar.gz"] } }, @@ -384,9 +414,9 @@ "name": "libgpg-error", "os": "linux", "unix": { - "filename": "libgpg-error-1.43.tar.bz2", - "hash": "sha256:a9ab83ca7acc442a5bd846a75b920285ff79bdb4e3d34aa382be88ed2c3aebaf", - "urls": ["ftp://ftp.gnupg.org/gcrypt/libgpg-error/{filename}"] + "filename": "libgpg-error-1.47.tar.bz2", + "hash": "sha256:9e3c670966b96ecc746c28c2c419541e3bcb787d1a73930f5e5f5e1bcbbb9bdb", + "urls": ["https://gnupg.org/ftp/gcrypt/libgpg-error/{filename}"] } }, @@ -394,9 +424,9 @@ "name": "libgcrypt", "os": "linux", "unix": { - "filename": "libgcrypt-1.9.4.tar.bz2", - "hash": "sha256:ea849c83a72454e3ed4267697e8ca03390aee972ab421e7df69dfe42b65caaf7", - "urls": ["ftp://ftp.gnupg.org/gcrypt/libgcrypt/{filename}"] + "filename": "libgcrypt-1.10.2.tar.bz2", + "hash": "sha256:3b9c02a004b68c256add99701de00b383accccf37177e0d6c58289664cce0c03", + "urls": ["https://gnupg.org/ftp/gcrypt/libgcrypt/{filename}"] } }, @@ -404,9 +434,9 @@ "name": "glib", "os": "linux", "unix": { - "filename": "glib-2.70.1.tar.xz", - "hash": "sha256:f9b7bce7f51753a1f43853bbcaca8bf09e15e994268e29cfd7a76f65636263c0", - "urls": ["https://ftp.gnome.org/pub/GNOME/sources/glib/2.70/{filename}"] + "filename": "glib-2.78.0.tar.xz", + "hash": "sha256:44eaab8b720877ce303c5540b657b126f12dc94972d9880b52959f43fb537b30", + "urls": ["https://ftp.gnome.org/pub/GNOME/sources/glib/2.78/{filename}"] } }, @@ -414,8 +444,8 @@ "name": "dbus", "os": "linux", "unix": { - "filename": "dbus-1.12.20.tar.gz", - "hash": "md5:dfe8a71f412e0b53be26ed4fbfdc91c4", + "filename": "dbus-1.15.8.tar.xz", + "hash": "sha256:84fc597e6ec82f05dc18a7d12c17046f95bad7be99fc03c15bc254c4701ed204", "urls": ["https://dbus.freedesktop.org/releases/dbus/{filename}"] } }, @@ -443,14 +473,14 @@ { "name": "hunspell", "unix": { - "filename": "hunspell-1.7.0.tar.gz", - "hash": "sha256:57be4e03ae9dd62c3471f667a0d81a14513e314d4d92081292b90435944ff951", - "urls": ["https://github.com/hunspell/hunspell/files/2573619/{filename}"] + "filename": "hunspell-1.7.2.tar.gz", + "hash": "sha256:11ddfa39afe28c28539fe65fc4f1592d410c1e9b6dd7d8a91ca25d85e9ec65b8", + "urls": ["https://github.com/hunspell/hunspell/releases/download/v1.7.2/{filename}"] }, "windows": { - "filename": "hunspell-1.7.0.zip", - "hash": "sha256:7089cc01ffd9122c960e1f8c7930a75be989ca4e9468773cef1b0a655bfb4368", - "urls": ["https://github.com/hunspell/hunspell/archive/v1.7.0.zip"] + "filename": "hunspell-1.7.2.zip", + "hash": "sha256:1d6b3ea1ab9ad4603335f0a663f260e88814d5f35b0ab69f0f24f6c19c02a3e1", + "urls": ["https://github.com/hunspell/hunspell/archive/refs/tags/v1.7.2.zip"] } }, @@ -460,8 +490,8 @@ "os": "macos,windows", "unix": { "filename": "ninja-src.tar.gz", - "hash": "sha256:ce35865411f0490368a8fc383f29071de6690cbadc27704734978221f25e2bed", - "urls": ["https://github.com/ninja-build/ninja/archive/refs/tags/v1.10.2.tar.gz"] + "hash": "sha256:31747ae633213f1eda3842686f83c2aa1412e0f5691d1c14dbbcc67fe7400cea", + "urls": ["https://github.com/ninja-build/ninja/archive/refs/tags/v1.11.1.tar.gz"] } }, @@ -471,58 +501,58 @@ "os": "macos,linux", "unix": { "filename": "nodejs-src.tar.gz", - "hash": "sha256:394991ab9c259a68b0a72735d2fcee957a9c78138bf8d9e02cf8c04f6be5c673", - "urls": ["https://github.com/nodejs/node/archive/refs/tags/v12.22.8.tar.gz"] + "hash": "sha256:059ea8ab31a1a4fe090751e8c82fca16d2f7339f309e15b2c65820dff83b579a", + "urls": ["https://github.com/nodejs/node/archive/refs/tags/v20.7.0.tar.gz"] } }, { "name": "qt-base", - "version": "6.3.1", + "version": "6.5.3", "hashes": { - "unix": "sha256:0a64421d9c2469c2c48490a032ab91d547017c9cc171f3f8070bc31888f24e03" + "unix": "md5:9b3b8e45d0923bc6928833d185b29e40" } }, { "name": "qt-svg", "hashes": { - "unix": "sha256:7b19f418e6f7b8e23344082dd04440aacf5da23c5a73980ba22ae4eba4f87df7" + "unix": "md5:6e29e50c706d1ef870d232f260cf05c2" } }, { "name": "qt-shadertools", "hashes": { - "unix": "sha256:59b77176961528cc7b0c9325134655e273aa87b4cb386c0f4683d8f2852e435a" + "unix": "md5:e751ba53872f74546e01277b51e0218d" } }, { "name": "qt-declarative", "hashes": { - "unix": "sha256:03e7694123820fcca397f95ce312e0b7f3039493c8754c836da098a1a04346e8" + "unix": "md5:c6f3840960803d8c1654ba48f9742814" } }, { "name": "qt-imageformats", "hashes": { - "unix": "sha256:ad0312b8dfbbb67f729bfadbfcd47246ee4a128b717731ba158c41d01fde212f" + "unix": "md5:47838764e104e7cb2390038989ba91ea" } }, { "name": "qt-webchannel", "hashes": { - "unix": "sha256:aaa20ac23f86992721b7ee487c379a3fd68caa8cdcea0a77a37e0d8b47ff2668" + "unix": "md5:d92af561c15f5af61656eabcdbaf1d27" } }, { "name": "qt-positioning", "hashes": { - "unix": "sha256:06788e4ecae1920094b30e4046f0abd23c8189d8a51e9c939b02f0b6abe2e86c" + "unix": "md5:c1eb22c2cdd18a8228e433e4e5d1577a" } }, @@ -530,29 +560,89 @@ "name": "qt-wayland", "os": "linux", "hashes": { - "unix": "sha256:6f14fea2d172a5b4170be3efcb0e58535f6605b61bcd823f6d5c9d165bb8c0f0" + "unix": "md5:b73feecf0fef221ed748ea7c984f45be" } }, { "name": "qt-sensors", "hashes": { - "unix": "sha256:4b240b59edba9a42b4735758a25f279a26841b982864e7b38f6ef0b81e0d60cc" + "unix": "md5:c4a48b39a3aaba6534f7fa5fbc363b0f" } }, { "name": "qt-webengine", "hashes": { - "unix": "sha256:ad7a33b21a956deda37c587d50f821ca3816403ae31ba9b5d59d01561ad66e47" + "unix": "md5:b418c1b4e333a06609dac451c4484e23" + } + }, + + { + "name": "installer", + "comment": "Needed infrastructure for installing pure python packages (wheels)", + "unix": { + "filename": "installer-0.7.0-py3-none-any.whl", + "hash": "sha256:05d1933f0a5ba7d8d6296bb6d5018e7c94fa473ceb10cf198a92ccea19c27b53", + "urls": ["pypi"] + } + }, + + { + "name": "packaging", + "comment": "Needed for build which needed for setuptools based packages", + "unix": { + "filename": "packaging-23.1-py3-none-any.whl", + "hash": "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", + "urls": ["pypi"] + } + }, + + { + "name": "pyproject_hooks", + "comment": "Needed for build which needed for setuptools based packages", + "unix": { + "filename": "pyproject_hooks-1.0.0-py3-none-any.whl", + "hash": "sha256:283c11acd6b928d2f6a7c73fa0d01cb2bdc5f07c57a2eeb6e83d5e56b97976f8", + "urls": ["pypi"] + } + }, + + { + "name": "wheel", + "comment": "Needed for build which needed for setuptools based packages", + "unix": { + "filename": "wheel-0.41.2-py3-none-any.whl", + "hash": "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8", + "urls": ["pypi"] + } + }, + + { + "name": "build", + "comment": "Needed for setuptools based packages", + "unix": { + "filename": "build-1.0.3-py3-none-any.whl", + "hash": "sha256:589bf99a67df7c9cf07ec0ac0e5e2ea5d4b37ac63301c4986d1acb126aa83f8f", + "urls": ["pypi"] } }, { "name": "setuptools", "unix": { - "filename": "setuptools-57.4.0.zip", - "hash": "sha256:46bd862894ed22c2edff033c758c2dc026324788d758e96788e8f7c11f4e9707", + "filename": "setuptools-68.2.2-py3-none-any.whl", + "hash": "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a", + "urls": ["pypi"] + } + }, + + { + "name": "setuptools_scm", + "comment": "Some packages (xxhash/py7zr/etc) erroneously declare this as a dependency even though it is needed only for sdist not building wheels", + "unix": { + "filename": "setuptools_scm-8.0.3-py3-none-any.whl", + "hash": "sha256:813822234453438a13c78d05c8af29918fbc06f88efb33d38f065340bbb48c39", "urls": ["pypi"] } }, @@ -560,8 +650,8 @@ { "name": "six", "unix": { - "filename": "six-1.16.0.tar.gz", - "hash": "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "filename": "six-1.16.0-py2.py3-none-any.whl", + "hash": "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", "urls": ["pypi"] } }, @@ -569,8 +659,8 @@ { "name": "unrardll", "unix": { - "filename": "unrardll-0.1.5.tar.gz", - "hash": "sha256:8bebb480b96cd49d4290d814914f39cff75cf0fa0514c4790bb32b1757227c78", + "filename": "unrardll-0.1.7.tar.gz", + "hash": "sha256:e1067fe27bb4de204ef8f3692f23d93c5d3b4292f78b292c6fc7dc4f75749f76", "urls": ["pypi"] } }, @@ -578,8 +668,8 @@ { "name": "lxml", "unix": { - "filename": "lxml-4.9.1.tar.gz", - "hash": "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f", + "filename": "lxml-4.9.3.tar.gz", + "hash": "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c", "urls": ["pypi"] } }, @@ -596,8 +686,8 @@ { "name": "html5-parser", "unix": { - "filename": "html5-parser-0.4.10.tar.gz", - "hash": "sha256:f9294418c0da95c2d5facc19d3dc32941093a6b8e3b3e4b36cc7b5a1697fbca4", + "filename": "html5-parser-0.4.12.tar.gz", + "hash": "sha256:3d7f89841aa48b976311f43863178c34c141abcf1dd45b67a7339e61cffe5306", "urls": ["pypi"] } }, @@ -605,8 +695,8 @@ { "name": "css-parser", "unix": { - "filename": "css-parser-1.0.8.tar.gz", - "hash": "sha256:994d6a0115a8f4e2b6648e2b12733036d05fa164278aa234630c9f03e52c8c80", + "filename": "css_parser-1.0.10-py2.py3-none-any.whl", + "hash": "sha256:d2e955a114829f0a327cc5535c2e65fe2e40b883b892881017d419a3b6dd05b7", "urls": ["pypi"] } }, @@ -614,8 +704,8 @@ { "name": "dateutil", "unix": { - "filename": "python-dateutil-2.8.2.tar.gz", - "hash": "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "filename": "python_dateutil-2.8.2-py2.py3-none-any.whl", + "hash": "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9", "urls": ["pypi"] } }, @@ -624,8 +714,8 @@ "name": "jeepney", "os": "linux", "unix": { - "filename": "jeepney-0.7.1.tar.gz", - "hash": "sha256:fa9e232dfa0c498bd0b8a3a73b8d8a31978304dcef0515adc859d4e096f96f4f", + "filename": "jeepney-0.8.0-py3-none-any.whl", + "hash": "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755", "urls": ["pypi"] } }, @@ -633,8 +723,8 @@ { "name": "dnspython", "unix": { - "filename": "dnspython-2.1.0.zip", - "hash": "sha256:e4a87f0b573201a0f3727fa18a516b055fd1107e0e5477cded4a2de497df1dd4", + "filename": "dnspython-2.4.2-py3-none-any.whl", + "hash": "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8", "urls": ["pypi"] } }, @@ -642,8 +732,8 @@ { "name": "mechanize", "unix": { - "filename": "mechanize-0.4.7.tar.gz", - "hash": "sha256:1773a8f5818398e0010e781dc0f942cd88b107a57424c904d545cd827c216809", + "filename": "mechanize-0.4.8-py2.py3-none-any.whl", + "hash": "sha256:961fd171b5eb37a7578fce62ba81ba85803dff3c5ba4ac24f6f569ae27198439", "urls": ["pypi"] } }, @@ -651,8 +741,8 @@ { "name": "feedparser", "unix": { - "filename": "feedparser-6.0.8.tar.bz2", - "hash": "sha256:5ce0410a05ab248c8c7cfca3a0ea2203968ee9ff4486067379af4827a59f9661", + "filename": "feedparser-6.0.10-py3-none-any.whl", + "hash": "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f", "urls": ["pypi"] } }, @@ -669,8 +759,8 @@ { "name": "markdown", "unix": { - "filename": "Markdown-3.3.6.tar.gz", - "hash": "sha256:76df8ae32294ec39dcf89340382882dfa12975f87f45c3ed1ecdb1e8cefc7006", + "filename": "Markdown-3.4.4-py3-none-any.whl", + "hash": "sha256:a4c1b65c0957b4bd9e7d86ddc7b3c9868fb9670660f6f99f6d1bca8954d5a941", "urls": ["pypi"] } }, @@ -678,8 +768,8 @@ { "name": "html2text", "unix": { - "filename": "html2text-2020.1.16.tar.gz", - "hash": "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb", + "filename": "html2text-2020.1.16-py3-none-any.whl", + "hash": "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b", "urls": ["pypi"] } }, @@ -687,8 +777,8 @@ { "name": "soupsieve", "unix": { - "filename": "soupsieve-2.3.1.tar.gz", - "hash": "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9", + "filename": "soupsieve-2.5-py3-none-any.whl", + "hash": "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7", "urls": ["pypi"] } }, @@ -696,8 +786,8 @@ { "name": "beautifulsoup4", "unix": { - "filename": "beautifulsoup4-4.10.0.tar.gz", - "hash": "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891", + "filename": "beautifulsoup4-4.12.2-py3-none-any.whl", + "hash": "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a", "urls": ["pypi"] } }, @@ -705,8 +795,8 @@ { "name": "regex", "unix": { - "filename": "regex-2021.11.10.tar.gz", - "hash": "sha256:f341ee2df0999bfdf7a95e448075effe0db212a59387de1a70690e4acb03d4c6", + "filename": "regex-2023.8.8.tar.gz", + "hash": "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e", "urls": ["pypi"] } }, @@ -714,8 +804,8 @@ { "name": "chardet", "unix": { - "filename": "chardet-4.0.0.tar.gz", - "hash": "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", + "filename": "chardet-5.2.0-py3-none-any.whl", + "hash": "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", "urls": ["pypi"] } }, @@ -723,8 +813,8 @@ { "name": "uchardet", "unix": { - "filename": "uchardet-0.0.7.tar.xz", - "hash": "sha256:3fc79408ae1d84b406922fa9319ce005631c95ca0f34b205fad867e8b30e45b1", + "filename": "uchardet-0.0.8.tar.xz", + "hash": "sha256:e97a60cfc00a1c147a674b097bb1422abd9fa78a2d9ce3f3fdcc2e78a34ac5f0", "urls": ["https://www.freedesktop.org/software/uchardet/releases/{filename}"] } }, @@ -732,8 +822,8 @@ { "name": "msgpack", "unix": { - "filename": "msgpack-1.0.3.tar.gz", - "hash": "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e", + "filename": "msgpack-1.0.7.tar.gz", + "hash": "sha256:572efc93db7a4d27e404501975ca6d2d9775705c2d922390d878fcf768d92c87", "urls": ["pypi"] } }, @@ -741,8 +831,8 @@ { "name": "pygments", "unix": { - "filename": "Pygments-2.10.0.tar.gz", - "hash": "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6", + "filename": "Pygments-2.16.1-py3-none-any.whl", + "hash": "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692", "urls": ["pypi"] } }, @@ -750,8 +840,8 @@ { "name": "pycryptodome", "unix": { - "filename": "pycryptodome-3.11.0.tar.gz", - "hash": "sha256:428096bbf7a77e207f418dfd4d7c284df8ade81d2dc80f010e92753a3e406ad0", + "filename": "pycryptodome-3.19.0.tar.gz", + "hash": "sha256:bc35d463222cdb4dbebd35e0784155c81e161b9284e567e7e933d722e533331e", "urls": ["pypi"] } }, @@ -759,17 +849,17 @@ { "name": "apsw", "unix": { - "filename": "apsw-3.36.0-r1.zip", - "hash": "sha256:17355c39a8cdb9b9cd75b76d883b624c0dd05f80a6677c693b67c343c2381871", - "urls": ["https://github.com/rogerbinns/apsw/releases/download/3.36.0-r1/{filename}"] + "filename": "apsw-3.43.0.0.zip", + "hash": "sha256:7440d5f976a369ab87477e064545a8a62a93e413ead7bfe0be4efc75b1f30bb0", + "urls": ["https://github.com/rogerbinns/apsw/releases/download/3.43.0.0/{filename}"] } }, { "name": "webencodings", "unix": { - "filename": "webencodings-0.5.1.tar.gz", - "hash": "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", + "filename": "webencodings-0.5.1-py2.py3-none-any.whl", + "hash": "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", "urls": ["pypi"] } }, @@ -777,8 +867,8 @@ { "name": "html5lib", "unix": { - "filename": "html5lib-1.1.tar.gz", - "hash": "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f", + "filename": "html5lib-1.1-py2.py3-none-any.whl", + "hash": "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d", "urls": ["pypi"] } }, @@ -786,8 +876,8 @@ { "name": "pillow", "unix": { - "filename": "Pillow-8.4.0.tar.gz", - "hash": "sha256:b8e2f83c56e141920c39464b852de3719dfbfb6e3c99a2d8da0edf4fb33176ed", + "filename": "Pillow-10.0.1.tar.gz", + "hash": "sha256:d72967b06be9300fed5cfbc8b5bafceec48bf7cdc7dab66b1d2549035287191d", "urls": ["pypi"] } }, @@ -804,8 +894,8 @@ { "name": "psutil", "unix": { - "filename": "psutil-5.8.0.tar.gz", - "hash": "sha256:0c9ccb99ab76025f2f0bbecf341d4656e9c1351db8cc8a03ccd62e318ab4b5c6", + "filename": "psutil-5.9.5.tar.gz", + "hash": "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c", "urls": ["pypi"] } }, @@ -813,8 +903,8 @@ { "name": "ifaddr", "unix": { - "filename": "ifaddr-0.1.7.tar.gz", - "hash": "sha256:1f9e8a6ca6f16db5a37d3356f07b6e52344f6f9f7e806d618537731669eb1a94", + "filename": "ifaddr-0.2.0.tar.gz", + "hash": "sha256:cc0cbfcaabf765d44595825fb96a99bb12c79716b73b44330ea38ee2b0c4aed4", "urls": ["pypi"] } }, @@ -823,8 +913,8 @@ "name": "texttable", "comment": "needed for py7zr", "unix": { - "filename": "texttable-1.6.4.tar.gz", - "hash": "sha256:42ee7b9e15f7b225747c3fa08f43c5d6c83bc899f80ff9bae9319334824076e9", + "filename": "texttable-1.6.7-py2.py3-none-any.whl", + "hash": "sha256:b7b68139aa8a6339d2c320ca8b1dc42d13a7831a346b446cb9eb385f0c76310c", "urls": ["pypi"] } }, @@ -833,8 +923,8 @@ "name": "multivolumefile", "comment": "Needed by py7zr", "unix": { - "filename": "multivolumefile-0.2.3.tar.gz", - "hash": "sha256:a0648d0aafbc96e59198d5c17e9acad7eb531abea51035d08ce8060dcad709d6", + "filename": "multivolumefile-0.2.3-py3-none-any.whl", + "hash": "sha256:237f4353b60af1703087cf7725755a1f6fcaeeea48421e1896940cd1c920d678", "urls": ["pypi"] } }, @@ -843,8 +933,8 @@ "name": "python-brotli", "comment": "Needed by py7zr", "unix": { - "filename": "brotli-1.0.9.zip", - "hash": "sha256:4d1b810aa0ed773f81dceda2cc7b403d01057458730e309856356d4ef4188438", + "filename": "Brotli-1.1.0.tar.gz", + "hash": "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724", "urls": ["pypi"] } }, @@ -853,29 +943,57 @@ "name": "pyzstd", "comment": "Needed by py7zr", "unix": { - "filename": "pyzstd-0.15.0.tar.gz", - "hash": "sha256:bf15a39cb3c9b662775e22ffa8c4da09fdde6a15ece5e0ed710b6d3b4329cf36", + "filename": "pyzstd-0.15.9.tar.gz", + "hash": "sha256:cbfdde6c5768ffa5d2f14127bbc1d7c3c2d03c0ceaeb0736946197e06275ccc7", "urls": ["pypi"] } }, - { "name": "pyppmd", "comment": "Needed by py7zr", "unix": { - "filename": "pyppmd-0.17.3.tar.gz", - "hash": "sha256:05a4765833623d5cc1c9afc27d9b05fb64f0319711beffe0b44d9695eb1beb0a", + "filename": "pyppmd-1.0.0.tar.gz", + "hash": "sha256:075c9bd297e3b0a87dd7aeabca7fee668218acbe69ecc1c6511064558de8840f", "urls": ["pypi"] } }, + { + "name": "pybcj", + "comment": "Needed by py7zr", + "unix": { + "filename": "pybcj-1.0.1.tar.gz", + "hash": "sha256:8b682ed08caabfb7c042d4be083e28ddc692afb1deff5567111f8855071b75c3", + "urls": ["pypi"] + } + }, + + { + "name": "inflate64", + "comment": "Needed by py7zr", + "unix": { + "filename": "inflate64-0.3.1.tar.gz", + "hash": "sha256:b52dd8fefd2ba179e5dfa18d6eca7e2fc822584616271c039d5ef1f9ca90c71c", + "urls": ["pypi"] + } + }, { "name": "py7zr", "unix": { - "filename": "py7zr-0.16.3.tar.gz", - "hash": "sha256:9ae93b801470c554dc1546a297aafb1dd4dfa799224aa929ad02c443607b9844", + "filename": "py7zr-0.20.6-py3-none-any.whl", + "hash": "sha256:c7cfb7183fb8f48038f1036a116ca89dc8bd57979d05b75567f00e88a5afe698", + "urls": ["pypi"] + } + }, + + { + "name": "poetry_core", + "comment": "Needed for building zeroconf", + "unix": { + "filename": "poetry_core-1.7.0-py3-none-any.whl", + "hash": "sha256:38e174cdb00a84ee4a1cab66a378b435747f72414f5573bc18cfc3850a94df38", "urls": ["pypi"] } }, @@ -883,28 +1001,17 @@ { "name": "zeroconf", "unix": { - "filename": "zeroconf-0.37.0.tar.gz", - "hash": "sha256:f901eda390160bc270aeba95ef2d6aa0a736503301dac393e7d5fd95fa17043a", + "filename": "zeroconf-0.115.0.tar.gz", + "hash": "sha256:da8c12f0a4b15578c22bf274f479fffd90a944407ebad904c891bf771aad83e7", "urls": ["pypi"] } }, { - "name": "toml", - "comment": "Needed for sip (build time dependency)", + "name": "fonttools", "unix": { - "filename": "toml-0.10.2.tar.gz", - "hash": "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", - "urls": ["pypi"] - } - }, - - { - "name": "pyparsing", - "comment": "Needed for packaging (build time dependency)", - "unix": { - "filename": "pyparsing-3.0.6.tar.gz", - "hash": "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81", + "filename": "fonttools-4.47.0-py3-none-any.whl", + "hash": "sha256:d6477ba902dd2d7adda7f0fd3bfaeb92885d45993c9e1928c9f28fc3961415f7", "urls": ["pypi"] } }, @@ -913,18 +1020,8 @@ "name": "ply", "comment": "Needed for sip (build time dependency)", "unix": { - "filename": "ply-3.11.tar.gz", - "hash": "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", - "urls": ["pypi"] - } - }, - - { - "name": "packaging", - "comment": "Needed for sip (build time dependency)", - "unix": { - "filename": "packaging-21.3.tar.gz", - "hash": "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", + "filename": "ply-3.11-py2.py3-none-any.whl", + "hash": "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", "urls": ["pypi"] } }, @@ -933,8 +1030,8 @@ "name": "sip", "comment": "build time dependency", "unix": { - "filename": "sip-6.6.2.tar.gz", - "hash": "sha256:0e3efac1c5dfd8e525ae57140927df26993e13f58b89d1577c314f4105bfd90d", + "filename": "sip-6.7.11.tar.gz", + "hash": "sha256:f0dc3287a0b172e5664931c87847750d47e4fdcda4fe362b514af8edd655b469", "urls": ["pypi"] } }, @@ -943,8 +1040,8 @@ "name": "pyqt-builder", "comment": "build time dependency", "unix": { - "filename": "PyQt-builder-1.13.0.tar.gz", - "hash": "sha256:4877580c38ceb5320e129b381d083b0a8601c68166d8b99707f08fa0a1689eef", + "filename": "PyQt_builder-1.15.2-py3-none-any.whl", + "hash": "sha256:9e5acbe6ddffc2b8e7a619c5053d1f7da53f5db0faff8ce2b573f0e6604ad981", "urls": ["pypi"] } }, @@ -953,8 +1050,8 @@ "name": "pyqt-sip", "comment": "runtime sip module for PyQt", "unix": { - "filename": "PyQt6_sip-13.4.0.tar.gz", - "hash": "sha256:6d87a3ee5872d7511b76957d68a32109352caf3b7a42a01d9ee20032b350d979", + "filename": "PyQt6_sip-13.5.2.tar.gz", + "hash": "sha256:ebf6264b6feda01ba37d3b60a4bb87493bdb87be70f7b2a5384a7acd4902d88d", "urls": ["pypi"] } }, @@ -962,8 +1059,8 @@ { "name": "pyqt", "unix": { - "filename": "PyQt6-6.3.1.tar.gz", - "hash": "sha256:8cc6e21dbaf7047d1fc897e396ccd9710a12f2ef976563dad65f06017d2c9757", + "filename": "PyQt6-6.5.2.tar.gz", + "hash": "sha256:1487ee7350f9ffb66d60ab4176519252c2b371762cbe8f8340fd951f63801280", "urls": ["pypi"] } }, @@ -971,20 +1068,19 @@ { "name": "pyqt-webengine", "unix": { - "filename": "PyQt6_WebEngine-6.3.1.tar.gz", - "hash": "sha256:c3d1f5527b4b15f44102d617c59b1d74d9af50f821629e9335f13df47de8f007", + "filename": "PyQt6_WebEngine-6.5.0.tar.gz", + "hash": "sha256:8ba9db56c4c181a2a2fab1673ca35e5b63dc69113f085027ddc43c710b6d6ee9", "urls": ["pypi"] } }, - { "name": "speech-dispatcher-client", "os": "linux", "unix": { - "filename": "speech-dispatcher-0.11.1.tar.gz", - "hash": "sha256:d1da12ed3dac84f13799b6a2ec39ba2d3ca21a8493f44dc1855bd0ba96c2ddc6", - "urls": ["https://github.com/brailcom/speechd/releases/download/0.11.1/{filename}"] + "filename": "speech-dispatcher-0.11.5.tar.gz", + "hash": "sha256:1ce4759ffabbaf1aeb433a5ec0739be0676e9bdfbae9444a7b3be1b2af3ec12b", + "urls": ["https://github.com/brailcom/speechd/releases/download/0.11.5/{filename}"] } }, @@ -993,8 +1089,16 @@ "name": "macfsevents", "os": "macos", "unix": { - "filename": "MacFSEvents-0.8.1.tar.gz", - "hash": "sha256:1324b66b356051de662ba87d84f73ada062acd42b047ed1246e60a449f833e10", + "filename": "MacFSEvents-0.8.4.tar.gz", + "hash": "sha256:bf7283f1d517764ccdc8195b21631dbbac1c506b920bf9a8ea2956b3127651cb", + "urls": ["pypi"] + } + }, + { + "name": "xxhash", + "unix": { + "filename": "xxhash-3.3.0.tar.gz", + "hash": "sha256:c3f9e322b1ebeebd44e3d9d2d9b124e0c550c1ef41bd552afdcdd719516ee41a", "urls": ["pypi"] } } diff --git a/bypy/windows.conf b/bypy/windows.conf index 3134411853..2ca18428e2 100644 --- a/bypy/windows.conf +++ b/bypy/windows.conf @@ -1,17 +1,16 @@ -# Requires installation of Visual Studio 2019 Community Edition, WiX Toolset, Git, Ruby, NodeJS, Python (2 and 3) and Perl -# git.exe must be in PATH. Must have ~120GB available disk space and 8GB RAM +# Requires installation of Visual Studio 2022 Community Edition, .NET SDK, WiX Toolset, Git, Ruby (without devkit), NodeJS, Python and Perl +# git.exe must be in PATH. Must have ~120GB available disk space and 24GB RAM to build Qt WebEngine # Install certifi in python 3 with: # py.exe -m pip install certifi -# Note that python2 is needed to build Qt WebEngine # Copy opengl32sw.dll from # https://download.qt.io/development_releases/prebuilt/llvmpipe/windows/ -# to C:/mesa/32 and C:/mesa/64 +# to C:/mesa/64 vm_name 'windows-calibre' root 'C:/r' python 'py.exe' python2 'C:/Python27/python.exe' perl 'C:/Strawberry/perl/bin/perl.exe' -ruby 'C:/Ruby30-x64/bin/ruby.exe' -nodejs 'C:/nodejs/node.exe' +ruby 'C:/Ruby32-x64/bin/ruby.exe' +nodejs 'C:/Program Files/nodejs/node.exe' mesa 'C:/mesa' diff --git a/bypy/windows/__main__.py b/bypy/windows/__main__.py index fc62b117b6..ba6aaad858 100644 --- a/bypy/windows/__main__.py +++ b/bypy/windows/__main__.py @@ -128,17 +128,21 @@ def freeze(env, ext_dir, incdir): printf('\tAdding misc binary deps') - def copybin(x): - shutil.copy2(x, env.dll_dir) + def copybin(x, dest=env.dll_dir): + shutil.copy2(x, dest) with contextlib.suppress(FileNotFoundError): - shutil.copy2(x + '.manifest', env.dll_dir) + shutil.copy2(x + '.manifest', dest) bindir = os.path.join(PREFIX, 'bin') - for x in ('pdftohtml', 'pdfinfo', 'pdftoppm', 'pdftotext', 'jpegtran-calibre', 'cjpeg-calibre', 'optipng-calibre', 'JXRDecApp-calibre'): + for x in ('pdftohtml', 'pdfinfo', 'pdftoppm', 'pdftotext', 'jpegtran-calibre', 'cjpeg-calibre', 'optipng-calibre', 'cwebp-calibre', 'JXRDecApp-calibre'): copybin(os.path.join(bindir, x + '.exe')) for f in glob.glob(os.path.join(bindir, '*.dll')): if re.search(r'(easylzma|icutest)', f.lower()) is None: copybin(f) + ossm = os.path.join(env.dll_dir, 'ossl-modules') + os.mkdir(ossm) + for f in glob.glob(os.path.join(PREFIX, 'lib', 'ossl-modules', '*.dll')): + copybin(f, ossm) copybin(os.path.join(env.python_base, 'python%s.dll' % env.py_ver.replace('.', ''))) copybin(os.path.join(env.python_base, 'python%s.dll' % env.py_ver[0])) @@ -315,6 +319,7 @@ def build_portable_installer(env): 'Ole32.lib', 'Shlwapi.lib', 'Kernel32.lib', 'Psapi.lib'] run(*cmd) os.remove(zf) + os.remove(manifest) def build_portable(env): @@ -496,6 +501,7 @@ def build_launchers(env, incdir, debug=False): '/LIBPATH:' + env.obj_dir, '/SUBSYSTEM:' + subsys, '/LIBPATH:%s/libs' % env.python_base, '/RELEASE', '/MANIFEST:EMBED', '/MANIFESTINPUT:' + mf, + '/STACK:2097152', # Set stack size to 2MB which is what python expects. Default on windows is 1MB 'user32.lib', 'kernel32.lib', '/OUT:' + exe] + u32 + dlflags + [embed_resources(env, exe), dest, lib] run(*cmd) @@ -570,7 +576,7 @@ def main(): run_tests(os.path.join(env.base, 'calibre-debug.exe'), env.base) if args.sign_installers: sign_executables(env) - create_installer(env) + create_installer(env, args.compression_level) build_portable(env) build_portable_installer(env) if args.sign_installers: diff --git a/bypy/windows/en-us.xml b/bypy/windows/en-us.xml index 9c47b99dbe..2a2b88d0a5 100644 --- a/bypy/windows/en-us.xml +++ b/bypy/windows/en-us.xml @@ -1,9 +1,7 @@ - - - Click Advanced to change installation settings. - Computing space requirements, this may take up to five minutes... - Computing space requirements, this may take up to five minutes... - Computing space requirements, this may take up to five minutes... - Please wait while the installer finishes determining your disk space requirements, this may take up to five minutes... + + + + + + - diff --git a/bypy/windows/site.py b/bypy/windows/site.py index 146027bcdf..1563c330f2 100644 --- a/bypy/windows/site.py +++ b/bypy/windows/site.py @@ -60,6 +60,16 @@ def set_quit(): builtins.exit = _sitebuiltins.Quitter('exit', eof) +def workaround_lxml_bug(): + # Without calling xmlInitParser() import lxml causes a segfault + import ctypes + x = ctypes.WinDLL('libxml2.dll') + x.xmlInitParser() + workaround_lxml_bug.libxml2 = x + from lxml import etree + del etree + + def main(): sys.meta_path.insert(0, PydImporter()) os.add_dll_directory(os.path.abspath(os.path.join(sys.app_dir, 'app', 'bin'))) @@ -75,6 +85,8 @@ def main(): set_helper() set_quit() + workaround_lxml_bug() + return run_entry_point() diff --git a/bypy/windows/util.c b/bypy/windows/util.c index 70f2abbe43..db889dd0d2 100644 --- a/bypy/windows/util.c +++ b/bypy/windows/util.c @@ -43,6 +43,8 @@ get_app_dirw(void) { static void get_install_locations(void) { get_app_dirw(); + _snwprintf_s(qt_prefix_dir, MAX_PATH-1, _TRUNCATE, L"%ls\\ossl-modules", interpreter_data.executables_path); + _wputenv_s(L"OPENSSL_MODULES", qt_prefix_dir); _snwprintf_s(qt_prefix_dir, MAX_PATH-1, _TRUNCATE, L"%ls\\app", interpreter_data.app_dir); _wputenv_s(L"CALIBRE_QT_PREFIX", qt_prefix_dir); } diff --git a/bypy/windows/wix-template.xml b/bypy/windows/wix-template.xml index 69a80b9e11..178b19f2ac 100644 --- a/bypy/windows/wix-template.xml +++ b/bypy/windows/wix-template.xml @@ -1,13 +1,8 @@ - - + - - - + + + - + @@ -32,64 +27,45 @@ - - + + - + - + - + - - - - - - - - - - + - + + + + + + + + + - {app_components} - - - + {app_components} + + + - - + + - + - - + + @@ -101,136 +77,103 @@ - - - - - - - + + + + + + + - - + + - - - - + + + + - + - + - - + + - - + + - - + + - - + + - - + + - - + + - - - = 10240)]]> - + + + - + - NEWPRODUCTFOUND - {fix_wix} + + - NEWPRODUCTFOUND - {fix_wix} + + - + - WIXUI_EXITDIALOGOPTIONALCHECKBOX = 1 and NOT Installed + - - + - + - + + + + + + diff --git a/bypy/windows/wix.py b/bypy/windows/wix.py index 46cb1672ec..b404bed6d9 100644 --- a/bypy/windows/wix.py +++ b/bypy/windows/wix.py @@ -9,19 +9,22 @@ import shutil from bypy.constants import is64bit from bypy.utils import run -WIXP = r'C:\Program Files (x86)\WiX Toolset v3.14' +WIX = os.path.expanduser('~/.dotnet/tools/wix.exe') if is64bit: UPGRADE_CODE = '5DD881FF-756B-4097-9D82-8C0F11D521EA' else: UPGRADE_CODE = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1' calibre_constants = globals()['calibre_constants'] -CANDLE = WIXP + r'\bin\candle.exe' -LIGHT = WIXP + r'\bin\light.exe' j, d, a, b = os.path.join, os.path.dirname, os.path.abspath, os.path.basename -def create_installer(env): +def create_installer(env, compression_level='9'): + cl = int(compression_level) + if cl > 4: + dcl = 'high' + else: + dcl = {1: 'none', 2: 'low', 3: 'medium', 4: 'mszip'}[cl] if os.path.exists(env.installer_dir): shutil.rmtree(env.installer_dir) os.makedirs(env.installer_dir) @@ -32,12 +35,9 @@ def create_installer(env): components, smap = get_components_from_files(env) wxs = template.format( app=calibre_constants['appname'], - appfolder='Calibre2' if is64bit else 'Calibre', version=calibre_constants['version'], upgrade_code=UPGRADE_CODE, - ProgramFilesFolder='ProgramFiles64Folder' if is64bit else 'ProgramFilesFolder', x64=' 64bit' if is64bit else '', - fix_wix='' if is64bit else '', compression='high', app_components=components, exe_map=smap, @@ -45,6 +45,9 @@ def create_installer(env): viewer_icon=j(env.src_root, 'icons', 'viewer.ico'), editor_icon=j(env.src_root, 'icons', 'ebook-edit.ico'), web_icon=j(env.src_root, 'icons', 'web.ico'), + license=j(env.src_root, 'LICENSE.rtf'), + banner=j(env.src_root, 'icons', 'wix-banner.bmp'), + dialog=j(env.src_root, 'icons', 'wix-dialog.bmp'), ) with open(j(d(__file__), 'en-us.xml'), 'rb') as f: template = f.read().decode('utf-8') @@ -56,30 +59,16 @@ def create_installer(env): f.write(wxs.encode('utf-8')) with open(enusf, 'wb') as f: f.write(enus.encode('utf-8')) - wixobj = j(env.installer_dir, calibre_constants['appname'] + '.wixobj') arch = 'x64' if is64bit else 'x86' - cmd = [CANDLE, '-nologo', '-arch', arch, '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf] - run(*cmd) installer = j(env.dist, '%s%s-%s.msi' % ( calibre_constants['appname'], ('-64bit' if is64bit else ''), calibre_constants['version'])) - license = j(env.src_root, 'LICENSE.rtf') - banner = j(env.src_root, 'icons', 'wix-banner.bmp') - dialog = j(env.src_root, 'icons', 'wix-dialog.bmp') - cmd = [LIGHT, '-nologo', '-ext', 'WixUIExtension', - '-cultures:en-us', '-loc', enusf, wixobj, - '-ext', 'WixUtilExtension', - '-o', installer, - '-dWixUILicenseRtf=' + license, - '-dWixUIBannerBmp=' + banner, - '-dWixUIDialogBmp=' + dialog] - cmd.extend([ - '-sice:ICE60', # No language in dlls warning - '-sice:ICE61', # Allow upgrading with same version number - '-sice:ICE40', # Re-install mode overridden - '-sice:ICE69', # Shortcut components are part of a different feature than the files they point to - ]) - cmd.append('-sval') # Disable all checks since they fail when running under ssh + run(WIX, 'extension', 'add', '-g', 'WixToolset.Util.wixext') + run(WIX, 'extension', 'add', '-g', 'WixToolset.UI.wixext') + cmd = [WIX, 'build', '-arch', arch, '-culture', 'en-us', '-loc', enusf, '-dcl', dcl, + '-ext', 'WixToolset.Util.wixext', '-ext', 'WixToolset.UI.wixext', '-o', installer, wxsf] run(*cmd) + pdb = installer.rpartition('.')[0] + '.wixpdb' + os.remove(pdb) def get_components_from_files(env): diff --git a/imgsrc/context_menu.svg b/imgsrc/context_menu.svg index 7dc82b32c9..d555595801 100644 --- a/imgsrc/context_menu.svg +++ b/imgsrc/context_menu.svg @@ -1,11 +1 @@ - - - - - - - - - - - + \ No newline at end of file diff --git a/imgsrc/external-link-for-dark-theme.svg b/imgsrc/external-link-for-dark-theme.svg new file mode 100644 index 0000000000..eb3baed22b --- /dev/null +++ b/imgsrc/external-link-for-dark-theme.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/external-link.svg b/imgsrc/external-link.svg new file mode 100644 index 0000000000..df1f6fa494 --- /dev/null +++ b/imgsrc/external-link.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/folder_saved_search.svg b/imgsrc/folder_saved_search.svg index dbe20e41a9..d7d1a34b6e 100644 --- a/imgsrc/folder_saved_search.svg +++ b/imgsrc/folder_saved_search.svg @@ -1,8 +1 @@ - - - - - - - - \ No newline at end of file + \ No newline at end of file diff --git a/imgsrc/layout.svg b/imgsrc/layout.svg new file mode 100644 index 0000000000..ee7953c673 --- /dev/null +++ b/imgsrc/layout.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/notes-for-dark-theme.svg b/imgsrc/notes-for-dark-theme.svg new file mode 100644 index 0000000000..1a8a826615 --- /dev/null +++ b/imgsrc/notes-for-dark-theme.svg @@ -0,0 +1 @@ + diff --git a/imgsrc/notes.svg b/imgsrc/notes.svg new file mode 100644 index 0000000000..807b056aa1 --- /dev/null +++ b/imgsrc/notes.svg @@ -0,0 +1 @@ + diff --git a/imgsrc/plugins/plugin_upgrade_ok.svg b/imgsrc/plugins/plugin_upgrade_ok.svg new file mode 100644 index 0000000000..88747145d5 --- /dev/null +++ b/imgsrc/plugins/plugin_upgrade_ok.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/srv/external-link.svg b/imgsrc/srv/external-link.svg new file mode 100644 index 0000000000..0ce4f1bb35 --- /dev/null +++ b/imgsrc/srv/external-link.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/srv/fit-to-screen.svg b/imgsrc/srv/fit-to-screen.svg new file mode 100644 index 0000000000..16522c4228 --- /dev/null +++ b/imgsrc/srv/fit-to-screen.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/srv/fts.svg b/imgsrc/srv/fts.svg index 39f49bdc71..7bf2900ce9 100644 --- a/imgsrc/srv/fts.svg +++ b/imgsrc/srv/fts.svg @@ -1,26 +1 @@ - - - - - - - - - - - - + \ No newline at end of file diff --git a/imgsrc/srv/off.svg b/imgsrc/srv/off.svg new file mode 100644 index 0000000000..249281985f --- /dev/null +++ b/imgsrc/srv/off.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/imgsrc/width.svg b/imgsrc/width.svg index e82af93610..12aed74c37 100644 --- a/imgsrc/width.svg +++ b/imgsrc/width.svg @@ -1 +1 @@ - + \ No newline at end of file diff --git a/manual/creating_plugins.rst b/manual/creating_plugins.rst index 5033ef1127..38689a1b10 100644 --- a/manual/creating_plugins.rst +++ b/manual/creating_plugins.rst @@ -144,8 +144,8 @@ calibre's plugin loading system defines a couple of built-in functions that allo a forward slash as the path separator, even on Windows. When you pass in a single name, the function will return the raw bytes of that file or None if the name was not found in the ZIP file. If you pass in more - than one name then it returns a dict mapping the names to bytes. If a - name is not found, it will not be present in the returned dict. + than one name then it returns a dictionary mapping the names to bytes. If a + name is not found, it will not be present in the returned dictionary. **get_icons(name_or_list_of_names, plugin_name='')** A wrapper for get_resources() that creates QIcon objects @@ -292,6 +292,11 @@ with the ``CALIBRE_OVERRIDE_LANG`` environment variable set. For example:: Replace ``de`` with the language code of the language you want to test. +For translations with plurals, use the ``ngettext()`` function instead of +``_()``. For example:: + + ngettext('Delete a book', 'Delete {} books', num_books).format(num_books) + The plugin API -------------------------------- diff --git a/manual/develop.rst b/manual/develop.rst index 3a78dc90aa..f2d00a6ad8 100644 --- a/manual/develop.rst +++ b/manual/develop.rst @@ -179,7 +179,7 @@ Setting this environment variable means that calibre will now load all its Pytho That's it! You are now ready to start hacking on the calibre code. For example, open the file :file:`src\\calibre\\__init__.py` in your favorite editor and add the line:: - print ("Hello, world!") + print("Hello, world!") near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``. @@ -259,7 +259,7 @@ Setting this environment variable means that calibre will now load all its Pytho That's it! You are now ready to start hacking on the calibre code. For example, open the file :file:`src/calibre/__init__.py` in your favorite editor and add the line:: - print ("Hello, world!") + print("Hello, world!") near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``. @@ -302,9 +302,9 @@ Similarly, you can start the E-book viewer as:: calibre-debug -w /path/to/file/to/be/viewed -The e-book-editor can be started as:: +The e-book editor can be started as:: - calibre-debug -t /path/to/be/edited + calibre-debug --edit-book /path/to/be/edited Using an interactive Python interpreter ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -406,7 +406,7 @@ you can also directly import calibre, as follows:: import init_calibre import calibre - print calibre.__version__ + print(calibre.__version__) It is essential that you import the init_calibre module before any other calibre modules/packages as it sets up the interpreter to run calibre code. diff --git a/manual/edit.rst b/manual/edit.rst index a15be4b97d..18d070c97b 100644 --- a/manual/edit.rst +++ b/manual/edit.rst @@ -133,11 +133,14 @@ Changing text file order ^^^^^^^^^^^^^^^^^^^^^^^^^^ You can re-arrange the order in which text (HTML) files are opened when reading -the book by simply dragging and dropping them in the Files browser. For the -technically inclined, this is called re-ordering the book spine. Note that you -have to drop the items *between* other items, not on top of them, this can be a -little fiddly until you get used to it. Dropping on top of another file will -cause the files to be merged. +the book by simply dragging and dropping them in the :guilabel:`File browser` or clicking +on the file to move and then pressing the :kbd:`Ctrl+Shift` modifiers with the +:kbd:`Up`, :kbd:`Down`, :kbd:`Home` or :kbd:`End` keys. For the technically +inclined, this is called re-ordering the book spine. + +Note that you have to drop the items *between* other items, not on top of them, +this can be a little fiddly until you get used to it. Dropping on top of +another file will cause the files to be merged. Marking the cover ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/manual/faq.rst b/manual/faq.rst index 2e95051bc8..b2d4c620ed 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -252,7 +252,7 @@ In such apps you can go to the online catalog screen and add the IP address of the calibre server to browse and download books from your calibre library within the app. -How do I use calibre with my Android phone/tablet or Kindle Fire HD? +How do I use calibre with my Android phone/tablet or Kindle Fire? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are two ways that you can connect your Android device to calibre. Using a USB cable -- or wirelessly, over the air. @@ -792,6 +792,10 @@ calibre is not starting on Windows? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There can be several causes for this: + * If you get no errors but the calibre window does not appear, it has + probably just appeared off screen. You can gather all windows onto the + current screen using one of the techniques described `here `__. + * If you get an error about calibre not being able to open a file because it is in use by another program, do the following: * Uninstall calibre @@ -880,7 +884,7 @@ incompatibility with your system's GPU (graphics) drivers. Try updating these first, and reboot. If that does not fix it, you can set the ``QTWEBENGINE_CHROMIUM_FLAGS`` environment variable to the value ``--disable-gpu`` to turn off hardware acceleration. See -`this page `_ for details. +`this page `_ for details. Using the viewer or doing any conversions results in a permission denied error on Windows @@ -1085,7 +1089,7 @@ a modern Linux distribution, you should have no problems installing calibre onto because of Qt, which is used for various image processing tasks, and links against these libraries. If you get an ImportError about some Qt modules, you are likely missing some X libraries. Typical candidates are: - ``libxcb-xinerama0``, ``libegl1``, ``libopengl0``. + ``libxcb-cursor0``, ``libxcb-xinerama0``, ``libegl1``, ``libopengl0``. You can run the calibre server via the command:: diff --git a/manual/function_mode.rst b/manual/function_mode.rst index f2852d4c00..41b6cd5290 100644 --- a/manual/function_mode.rst +++ b/manual/function_mode.rst @@ -176,9 +176,9 @@ Contents based on these headings. Create the custom function below: parent = root.children[-1] if tag_name == 'h2' and root.children else root parent.add(text, file_name, anchor) toc = toc_to_html(root, current_container(), 'toc.html', 'Table of Contents for ' + metadata.title, metadata.language) - print (xml2str(toc)) + print(xml2str(toc)) else: - print ('No headings to build ToC from found') + print('No headings to build ToC from found') else: # Add an entry corresponding to this match to the Table of Contents if 'toc' not in data: @@ -207,7 +207,7 @@ HTML Table of Contents, ready to be pasted into :file:`toc.html`. The function above is heavily commented, so it should be easy to follow. The key new feature is the use of another useful extra argument to the ``replace()`` function, the ``data`` object. The ``data`` object is a Python -*dict* that persists between all successive invocations of ``replace()`` during +*dictionary* that persists between all successive invocations of ``replace()`` during a single :guilabel:`Replace All` operation. Another new feature is the use of ``call_after_last_match`` -- setting that to @@ -278,9 +278,9 @@ for the current book's language. The ``data`` argument ^^^^^^^^^^^^^^^^^^^^^ -This a simple Python ``dict``. When you run +This a simple Python ``dictionary``. When you run :guilabel:`Replace all`, every successive match will cause ``replace()`` to be -called with the same ``dict`` as data. You can thus use it to store arbitrary +called with the same ``dictionary`` as data. You can thus use it to store arbitrary data between invocations of ``replace()`` during a :guilabel:`Replace all` operation. diff --git a/manual/gui.rst b/manual/gui.rst index 96983a58b3..d69370bb43 100644 --- a/manual/gui.rst +++ b/manual/gui.rst @@ -57,7 +57,9 @@ Add books 6. **Add files to selected book records**: Allows you to add or update the files associated with an existing book in your library. - 7. **Add an empty file to selected book records**: Allows you to add an empty file of the specified format to the selected book records. + 7. **Add data files to selected book records**: Allows you to add any number of extra files that will be stored in a :file:`data` sub-directory in the book directory. See :ref:`data_files` for details. + + 8. **Add an empty file to selected book records**: Allows you to add an empty file of the specified format to the selected book records. The :guilabel:`Add books` action can read metadata from a wide variety of e-book formats. In addition, it tries to guess metadata from the filename. See the :ref:`config_filename_metadata` section, to learn how to configure this. @@ -83,6 +85,7 @@ Edit metadata 2. **Edit metadata in bulk**: Allows you to edit common metadata fields for large numbers of books simultaneously. It operates on all the books you have selected in the :ref:`Library view `. 3. **Download metadata and covers**: Downloads metadata and covers (if available) for the books that are selected in the book list. 4. **Merge book records**: Gives you the capability of merging the metadata and formats of two or more book records. You can choose to either delete or keep the records that were not clicked first. + 5. **Manage data files**: Manage the extra data files associated with the selected books. For more details, see :ref:`metadata`. @@ -270,8 +273,10 @@ Remove books 6. **Remove matching books from device**: Allows you to remove e-book files from a connected device that match the books that are selected in the book list. + 7. **Restore recently deleted**: Allows you to undo the removal of books or formats. + .. note:: - Note that when you use :guilabel:`Remove books` to delete books from your calibre library, the book record is permanently deleted, but the files are placed into the :guilabel:`Recycle Bin/Trash`. This allows you to recover the files if you change your mind. + Note that when you use :guilabel:`Remove books` to delete books from your calibre library, the book record is deleted, but the books are temporarily stored, for a few days, in a trash folder. You can undo the delete by right clicking the :guilabel:`Remove books` button and choosing to :guilabel:`Restore recently deleted` books. .. _configuration: @@ -311,7 +316,10 @@ Search & sort The Search & Sort section allows you to perform several powerful actions on your book collections. - * You can sort them by title, author, date, rating, etc. by clicking on the column titles. You can also sub-sort, i.e. sort on multiple columns. For example, if you click on the title column and then the author column, the book will be sorted by author and then all the entries for the same author will be sorted by title. + * You can sort them by title, author, date, rating, etc. by clicking on the column titles. + You can also sub-sort, i.e. sort on multiple columns. + For example, if you click on the title column and then the author column, the book will be sorted by + author and then all the entries for the same author will be sorted by title. * You can search for a particular book or set of books using the Search bar. More on that below. @@ -325,6 +333,9 @@ The Search & Sort section allows you to perform several powerful actions on your * You can configure which fields you want displayed by using the :ref:`configuration` dialog. + * To perform complex multiple column based sub-sorting add the :guilabel:`Sort by` + tool to a toolbar via :guilabel:`Preferences->Toolbars & menus`. + .. _search_interface: The search interface @@ -489,11 +500,11 @@ search. More about saving searches below. *Virtual libraries* -The special field ``vl`` is used to search for books in a virtual library. For -example, ``vl:Read`` will find all the books in the *Read* virtual library. The search +The special field ``vl`` is used to search for books in a Virtual library. For +example, ``vl:Read`` will find all the books in the *Read* Virtual library. The search ``vl:Read and vl:"Science Fiction"`` will find all the books that are in both the *Read* and *Science Fiction* virtual libraries. The value following ``vl:`` must be the name of a -virtual library. If the virtual library name contains spaces then surround it with quotes. +Virtual library. If the Virtual library name contains spaces then surround it with quotes. *Whether a field has a value* @@ -611,7 +622,7 @@ learn how to create and use Virtual libraries, see the tutorial: Temporarily marking books ---------------------------- -You can temporarily mark arbitrary sets of books. Marked books will have a pin on them and can be found with the search ``marked:true``. To mark a book press :kbd:`Ctrl+m` or go to :guilabel:`Preferences->Toolbars & menus` and add the :guilabel:`Mark books` button to the main toolbar. +You can temporarily mark arbitrary sets of books. Marked books will have a pin on them and can be found with the search ``marked:true``. To mark a book press :kbd:`Ctrl+M` or go to :guilabel:`Preferences->Toolbars & menus` and add the :guilabel:`Mark books` button to the main toolbar. You can mark books with a specific text label by right clicking the :guilabel:`Mark books` button and choosing :guilabel:`Mark books with text label`. Books marked with text labels can later be found using the search ``marked:"=the-text-you-entered"``. @@ -745,6 +756,31 @@ corner of the main window. In :guilabel:`Preferences->Interface->Look & feel->Co browser` you can change the number of covers displayed, and even have the :guilabel:`Cover browser` display itself in a separate popup window. +Adding notes for authors, series, etc. +------------------------------------------ + +.. image:: images/notes.png + :class: float-left-img + +You can add notes for an author/series/tag/publisher/etc. to your calibre +library. To do so right click on the author name in the :guilabel:`Tag browser` on the left +or the :guilabel:`Book details` panel on the right and choose :guilabel:`Create note` +or :guilabel:`Edit note`. + +A simple popup window will allow you to enter your notes using basic +formatting and supporting links and images. Once a note for an author is +created, it can be viewed easily from the :guilabel:`Book details` panel by +clicking the little pencil icon next to the author name. + +You can search through all the notes in your library using the +:guilabel:`Browse notes` tool by pressing :kbd:`Ctrl+Shift+N` or adding +it to the toolbar via :guilabel:`Preferences->Toolbars & menus`. + +.. raw:: html epub + +
    + + Quickview ---------- @@ -800,7 +836,7 @@ The Jobs panel shows the number of currently running jobs. Jobs are tasks that r Keyboard shortcuts --------------------- -calibre has several keyboard shortcuts to save you time and mouse movement. These shortcuts are active in the book list view (when you're not editing the details of a particular book), and most of them affect the title you have selected. The calibre E-book viewer has its own shortcuts which can be customised by clicking the :guilabel:`Preferences` button in the viewer. +calibre has several keyboard shortcuts to save you time and mouse movement. These shortcuts are active in the book list view (when you're not editing the details of a particular book), and most of them affect the title you have selected. The calibre E-book viewer :ref:`has its own shortcuts ` which can be customised in the viewer :guilabel:`Preferences`. .. note:: @@ -919,3 +955,9 @@ calibre has several keyboard shortcuts to save you time and mouse movement. Thes * - :kbd:`Ctrl+Alt+Shift+F` - Restrict the displayed books to only those books that are in a category currently displayed in the :guilabel:`Tag browser` + * - :kbd:`B` + - Browse annotations (highlights and bookmarks) made in the calibre viewer for all books in the library + * - :kbd:`Ctrl+Shift+N` + - Browse notes associated with authors/series/tags/etc. + * - :kbd:`Alt+Shift+L` + - Toggle the layout between wide and narrow views diff --git a/manual/images/added_books.png b/manual/images/added_books.png index ce5ba25a9b..9819374b3e 100644 Binary files a/manual/images/added_books.png and b/manual/images/added_books.png differ diff --git a/manual/images/bbc_advanced.png b/manual/images/bbc_advanced.png index 174e6c681e..ba1da94643 100644 Binary files a/manual/images/bbc_advanced.png and b/manual/images/bbc_advanced.png differ diff --git a/manual/images/bbc_altered.png b/manual/images/bbc_altered.png index e1a2bbb30a..f918a3ac86 100644 Binary files a/manual/images/bbc_altered.png and b/manual/images/bbc_altered.png differ diff --git a/manual/images/bbc_altered1.png b/manual/images/bbc_altered1.png index f93b5297d4..e2a347cb18 100644 Binary files a/manual/images/bbc_altered1.png and b/manual/images/bbc_altered1.png differ diff --git a/manual/images/catalog_options.png b/manual/images/catalog_options.png index 94fc481131..3db5e05298 100644 Binary files a/manual/images/catalog_options.png and b/manual/images/catalog_options.png differ diff --git a/manual/images/check-book.png b/manual/images/check-book.png index 12f2680121..4eca861996 100644 Binary files a/manual/images/check-book.png and b/manual/images/check-book.png differ diff --git a/manual/images/cli.png b/manual/images/cli.png index d99f043294..70b308367d 100644 Binary files a/manual/images/cli.png and b/manual/images/cli.png differ diff --git a/manual/images/convert_ebooks.png b/manual/images/convert_ebooks.png index 0a72f2f37d..be5432d3f8 100644 Binary files a/manual/images/convert_ebooks.png and b/manual/images/convert_ebooks.png differ diff --git a/manual/images/cover_browser.png b/manual/images/cover_browser.png index 71b693537b..6d475dc718 100644 Binary files a/manual/images/cover_browser.png and b/manual/images/cover_browser.png differ diff --git a/manual/images/cover_grid.png b/manual/images/cover_grid.png index 6916ab4d0a..c74c8e1a68 100644 Binary files a/manual/images/cover_grid.png and b/manual/images/cover_grid.png differ diff --git a/manual/images/custom_cover.png b/manual/images/custom_cover.png index 45c3214cac..8c40e17a37 100644 Binary files a/manual/images/custom_cover.png and b/manual/images/custom_cover.png differ diff --git a/manual/images/custom_news.png b/manual/images/custom_news.png index 481bd72c02..18d5463605 100644 Binary files a/manual/images/custom_news.png and b/manual/images/custom_news.png differ diff --git a/manual/images/debug.png b/manual/images/debug.png index 100383cbdb..523e2ee633 100644 Binary files a/manual/images/debug.png and b/manual/images/debug.png differ diff --git a/manual/images/diff.png b/manual/images/diff.png index 7dc1692009..9856dea281 100644 Binary files a/manual/images/diff.png and b/manual/images/diff.png differ diff --git a/manual/images/edit-book-spell.png b/manual/images/edit-book-spell.png index 173dceb4eb..ffa73a28cc 100644 Binary files a/manual/images/edit-book-spell.png and b/manual/images/edit-book-spell.png differ diff --git a/manual/images/edit-book.png b/manual/images/edit-book.png index 1a16c3f5d6..2943057cac 100644 Binary files a/manual/images/edit-book.png and b/manual/images/edit-book.png differ diff --git a/manual/images/edit_meta_information.png b/manual/images/edit_meta_information.png index 6bbb1a2708..53f5408a33 100644 Binary files a/manual/images/edit_meta_information.png and b/manual/images/edit_meta_information.png differ diff --git a/manual/images/fetch_news.png b/manual/images/fetch_news.png index 468571ba32..50503cfa55 100644 Binary files a/manual/images/fetch_news.png and b/manual/images/fetch_news.png differ diff --git a/manual/images/files_browser.png b/manual/images/files_browser.png index 9c2ee4c0db..0d31946d8c 100644 Binary files a/manual/images/files_browser.png and b/manual/images/files_browser.png differ diff --git a/manual/images/fts-button.png b/manual/images/fts-button.png index c32eb109bf..7b9c65d0ef 100644 Binary files a/manual/images/fts-button.png and b/manual/images/fts-button.png differ diff --git a/manual/images/function_replace.png b/manual/images/function_replace.png index 8bec8a658f..4e5755ae6c 100644 Binary files a/manual/images/function_replace.png and b/manual/images/function_replace.png differ diff --git a/manual/images/live-preview.png b/manual/images/live-preview.png index 79fb86a5a8..65b0072ffb 100644 Binary files a/manual/images/live-preview.png and b/manual/images/live-preview.png differ diff --git a/manual/images/live_css.png b/manual/images/live_css.png index c3dcffad40..1f579133e4 100644 Binary files a/manual/images/live_css.png and b/manual/images/live_css.png differ diff --git a/manual/images/notes.png b/manual/images/notes.png new file mode 100644 index 0000000000..c0414e4db9 Binary files /dev/null and b/manual/images/notes.png differ diff --git a/manual/images/other_options.png b/manual/images/other_options.png index 2a817cfc90..5812f9a293 100644 Binary files a/manual/images/other_options.png and b/manual/images/other_options.png differ diff --git a/manual/images/pipeline.png b/manual/images/pipeline.png index 1db72efae3..4a61b96ce6 100644 Binary files a/manual/images/pipeline.png and b/manual/images/pipeline.png differ diff --git a/manual/images/preferences.png b/manual/images/preferences.png index 1962bff278..38c463a266 100644 Binary files a/manual/images/preferences.png and b/manual/images/preferences.png differ diff --git a/manual/images/python_template_example.png b/manual/images/python_template_example.png index 90aaf33762..9aa4fd5539 100644 Binary files a/manual/images/python_template_example.png and b/manual/images/python_template_example.png differ diff --git a/manual/images/remove_books.png b/manual/images/remove_books.png index 87813d4656..e955924b70 100644 Binary files a/manual/images/remove_books.png and b/manual/images/remove_books.png differ diff --git a/manual/images/reports-ss.png b/manual/images/reports-ss.png index a3aaa0053c..e8819c47de 100644 Binary files a/manual/images/reports-ss.png and b/manual/images/reports-ss.png differ diff --git a/manual/images/save_to_disk.png b/manual/images/save_to_disk.png index 50a98ffb9e..2eb8d480a5 100644 Binary files a/manual/images/save_to_disk.png and b/manual/images/save_to_disk.png differ diff --git a/manual/images/search.png b/manual/images/search.png index d3d48ca74d..61a006e26b 100644 Binary files a/manual/images/search.png and b/manual/images/search.png differ diff --git a/manual/images/search_button.png b/manual/images/search_button.png index fb1bd298ff..501b8c9556 100644 Binary files a/manual/images/search_button.png and b/manual/images/search_button.png differ diff --git a/manual/images/search_sort.png b/manual/images/search_sort.png index f9c44da876..ad2df9d1fa 100644 Binary files a/manual/images/search_sort.png and b/manual/images/search_sort.png differ diff --git a/manual/images/sg_pref.png b/manual/images/sg_pref.png index d695377832..f1ba9d44b6 100644 Binary files a/manual/images/sg_pref.png and b/manual/images/sg_pref.png differ diff --git a/manual/images/snippets-editor.png b/manual/images/snippets-editor.png index 2fb690f143..6d625f9d79 100644 Binary files a/manual/images/snippets-editor.png and b/manual/images/snippets-editor.png differ diff --git a/manual/images/sr.png b/manual/images/sr.png index c7bfbc75ea..941186f039 100644 Binary files a/manual/images/sr.png and b/manual/images/sr.png differ diff --git a/manual/images/tag_browser.png b/manual/images/tag_browser.png index 86fd6cd942..dd80561934 100644 Binary files a/manual/images/tag_browser.png and b/manual/images/tag_browser.png differ diff --git a/manual/images/tocedit-location.png b/manual/images/tocedit-location.png index 6e6cfa6b15..a64a63f4fb 100644 Binary files a/manual/images/tocedit-location.png and b/manual/images/tocedit-location.png differ diff --git a/manual/images/tocedit.png b/manual/images/tocedit.png index d89f236338..3b222cb443 100644 Binary files a/manual/images/tocedit.png and b/manual/images/tocedit.png differ diff --git a/manual/images/view.png b/manual/images/view.png index 032abc59e7..246bd480aa 100644 Binary files a/manual/images/view.png and b/manual/images/view.png differ diff --git a/manual/images/virtual_library_button.png b/manual/images/virtual_library_button.png index 9e4aa80e1d..217fb840da 100644 Binary files a/manual/images/virtual_library_button.png and b/manual/images/virtual_library_button.png differ diff --git a/manual/images/vl_by_author.png b/manual/images/vl_by_author.png index bce6437026..89be2f603e 100644 Binary files a/manual/images/vl_by_author.png and b/manual/images/vl_by_author.png differ diff --git a/manual/metadata.rst b/manual/metadata.rst index dce9b0a1f0..cef7452a0c 100644 --- a/manual/metadata.rst +++ b/manual/metadata.rst @@ -82,3 +82,19 @@ Bulk downloading of metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you want to download the metadata for multiple books at once, right-click the :guilabel:`Edit metadata` button and select :guilabel:`Download metadata`. You can choose to download only metadata, only covers, or both. + + +.. _data_files: + +Adding extra data files to a book +-------------------------------------- + +calibre can store any number of extra data files associated to a book. These +can be alternate covers, supplementary material, etc. They cannot be viewed +directly or used as conversion sources. Nor are they indexed by the Full text +search engine in calibre. To view/add/delete them select the book and right +click the :guilabel:`Edit metadata` button and choose :guilabel:`Manage data +files`. This will pop-up a window where you can perform operations on these +files. Alternately, you can right click the :guilabel:`Add books` button and +choose :guilabel:`Add data files to selected book records` to more quickly add +data files. diff --git a/manual/plugin_examples/editor_demo/images/icon.png b/manual/plugin_examples/editor_demo/images/icon.png index 26db5241af..ed08be7eb4 100644 Binary files a/manual/plugin_examples/editor_demo/images/icon.png and b/manual/plugin_examples/editor_demo/images/icon.png differ diff --git a/manual/plugin_examples/interface_demo/images/icon.png b/manual/plugin_examples/interface_demo/images/icon.png index 26db5241af..ed08be7eb4 100644 Binary files a/manual/plugin_examples/interface_demo/images/icon.png and b/manual/plugin_examples/interface_demo/images/icon.png differ diff --git a/manual/resources/logo.png b/manual/resources/logo.png index 81adc85c63..a13dc3bdfb 100644 Binary files a/manual/resources/logo.png and b/manual/resources/logo.png differ diff --git a/manual/simple_index.rst b/manual/simple_index.rst index a9b9d12dbb..824155b46a 100644 --- a/manual/simple_index.rst +++ b/manual/simple_index.rst @@ -21,7 +21,7 @@ available `. .. only:: online - **An e-book version of this User Manual is available in** `EPUB format `_, `AZW3 (Kindle Fire) format `_ and `PDF format `_. + **An e-book version of this User Manual is available in** `EPUB format `_, `AZW3 (Kindle) format `_ and `PDF format `_. .. rubric:: Sections diff --git a/manual/template_lang.rst b/manual/template_lang.rst index 84a1051ce0..7650afbf68 100644 --- a/manual/template_lang.rst +++ b/manual/template_lang.rst @@ -116,6 +116,32 @@ Composite columns can use any template option, including formatting. Note: You cannot edit the data displayed in a composite column. Instead you edit the source columns. If you edit a composite column, for example by double-clicking it, calibre will open the template for editing, not the underlying data. + +Templates and plugboards +--------------------------- + +Plugboards are used for changing the metadata written into books during send-to-device and save-to-disk operations. A plugboard permits you to specify a template to provide the data to write into the book's metadata. You can use plugboards to modify the following fields: authors, author_sort, language, publisher, tags, title, title_sort. This feature helps people who want to use different metadata in books on devices to solve sorting or display issues. + +When you create a plugboard, you specify the format and device for which the plugboard is to be used. A special device is provided, ``save_to_disk``, that is used when saving formats (as opposed to sending them to a device). Once you have chosen the format and device, you choose the metadata fields to change, providing templates to supply the new values. These templates are `connected` to their destination fields, hence the name `plugboards`. You can of course use composite columns in these templates. + +Plugboards are quite flexible and can be written in Single Function Mode, Template Program Mode, General Program Mode, or Python Template mode. + +When a plugboard might apply (Content server, save to disk, or send to device), calibre searches the +defined plugboards to choose the correct one for the given format and device. For example, to find the appropriate plugboard for an EPUB book being sent to an ANDROID device, calibre searches +the plugboards using the following search order: + +* a plugboard with an exact match on format and device, e.g., ``EPUB`` and ``ANDROID`` +* a plugboard with an exact match on format and the special ``any device`` choice, e.g., ``EPUB`` and ``any device`` +* a plugboard with the special ``any format`` choice and an exact match on device, e.g., ``any format`` and ``ANDROID`` +* a plugboard with ``any format`` and ``any device`` + +The tags and authors fields have special treatment, because both of these fields can hold more than one item. A book can have many tags and many authors. When you specify that one of these two fields is to be changed, the template's result is examined to see if more than one item is there. For tags, the result is cut apart wherever calibre finds a comma. For example, if the template produces +the value ``Thriller, Horror``, then the result will be two tags, ``Thriller`` and ``Horror``. There is no way to put a comma in the middle of a tag. + +The same thing happens for authors, but using a different character for the cut, a `&` (ampersand) instead of a comma. For example, if the template produces the value ``Blogs, Joe&Posts, Susan``, then the book will end up with two authors, ``Blogs, Joe`` and ``Posts, Susan``. If the template produces the value ``Blogs, Joe;Posts, Susan``, then the book will have one author with a rather strange name. + +Plugboards affect the metadata written into the book when it is saved to disk or written to the device. Plugboards do not affect the metadata used by ``save to disk`` and ``send to device`` to create the file names. Instead, file names are constructed using the templates entered on the appropriate preferences window. + .. _single_mode: Using functions in templates - Single Function Mode @@ -216,7 +242,7 @@ General Program Mode top_expression ::= or_expression or_expression ::= and_expression [ '||' and_expression ]* and_expression ::= not_expression [ '&&' not_expression ]* - not_expression ::= [ '!' not_expression ]* | compare_exp + not_expression ::= [ '!' not_expression ]* | concatenate_expr concatenate_expr::= compare_expr [ '&' compare_expr ]* compare_expr ::= add_sub_expr [ compare_op add_sub_expr ] compare_op ::= '==' | '!=' | '>=' | '>' | '<=' | '<' | 'in' | 'inlist' | @@ -389,7 +415,7 @@ Examples: * ``program: field('series') == 'foo'`` returns ``'1'`` if the book's series is 'foo', otherwise ``''``. * ``program: 'f.o' in field('series')`` returns ``'1'`` if the book's series matches the regular expression ``f.o`` (e.g., `foo`, `Off Onyx`, etc.), otherwise ``''``. - * ``program: 'science' inlist field('#genre')`` returns ``'1'`` if any of the book's genres match the regular expression ``science``, e.g., `Science`, `History of Science`, `Science Fiction` etc.), otherwise ``''``. + * ``program: 'science' inlist field('#genre')`` returns ``'1'`` if any of the book's genres match the regular expression ``science``, e.g., `Science`, `History of Science`, `Science Fiction` etc., otherwise ``''``. * ``program: '^science$' inlist field('#genre')`` returns ``'1'`` if any of the book's genres exactly match the regular expression ``^science$``, e.g., `Science`. The genres `History of Science` and `Science Fiction` don't match. If there isn't a match then returns ``''``. * ``program: if field('series') != 'foo' then 'bar' else 'mumble' fi`` returns ``'bar'`` if the book's series is not ``foo``. Otherwise it returns ``'mumble'``. * ``program: if field('series') == 'foo' || field('series') == '1632' then 'yes' else 'no' fi`` returns ``'yes'`` if series is either ``'foo'`` or ``'1632'``, otherwise ``'no'``. @@ -443,7 +469,7 @@ In `GPM` the functions described in `Single Function Mode` all require an additi * ``booksize()`` -- returns the value of the calibre 'size' field. Returns '' if there are no formats. * ``check_yes_no(field_name, is_undefined, is_false, is_true)`` -- checks if the value of the yes/no field named by the lookup name ``field_name`` is one of the values specified by the parameters, returning ``'yes'`` if a match is found otherwise returning the empty string. Set the parameter ``is_undefined``, ``is_false``, or ``is_true`` to 1 (the number) to check that condition, otherwise set it to 0. Example: - ``check_yes_no("#bool", 1, 0, 1)`` returns ``'yes'`` if the yes/no field ``#bool`` is either True or undefined (neither True nor False). + ``check_yes_no("#bool", 1, 0, 1)`` returns ``'Yes'`` if the yes/no field ``#bool`` is either True or undefined (neither True nor False). More than one of ``is_undefined``, ``is_false``, or ``is_true`` can be set to 1. * ``ceiling(x)`` -- returns the smallest integer greater than or equal to ``x``. Throws an exception if ``x`` is not a number. @@ -467,6 +493,9 @@ In `GPM` the functions described in `Single Function Mode` all require an additi * ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string. * ``divide(x, y)`` -- returns ``x / y``. Throws an exception if either ``x`` or ``y`` are not numbers. This function can usually be replaced by the ``/`` operator. * ``eval(string)`` -- evaluates the string as a program, passing the local variables. This permits using the template processor to construct complex results from local variables. In :ref:`Template Program Mode `, because the `{` and `}` characters are interpreted before the template is evaluated you must use `[[` for the `{` character and `]]` for the ``}`` character. They are converted automatically. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using :ref:`Template Program Mode `. +* ``extra_file_size(file_name)`` -- returns the size in bytes of the extra file ``file_name`` in the book's ``data/`` folder if it exists, otherwise ``-1``. See also the functions ``has_extra_files()``, ``extra_file_names()`` and ``extra_file_modtime()``. This function can be used only in the GUI. +* ``extra_file_modtime(file_name, format_string)`` -- returns the modification time of the extra file ``file_name`` in the book's ``data/`` folder if it exists, otherwise ``-1``. The modtime is formatted according to ``format_string`` (see ``format_date()`` for details). If ``format_string`` is the empty string, returns the modtime as the floating point number of seconds since the epoch. See also the functions ``has_extra_files()``, ``extra_file_names()`` and ``extra_file_size()``. The epoch is OS dependent. This function can be used only in the GUI. +* ``extra_file_names(sep [, pattern])`` returns a ``sep``-separated list of extra files in the book's ``data/`` folder. If the optional parameter ``pattern``, a regular expression, is supplied then the list is filtered to files that match ``pattern``. The pattern match is case insensitive. See also the functions ``has_extra_files()``, ``extra_file_modtime()`` and ``extra_file_size()``. This function can be used only in the GUI. * ``field(lookup_name)`` -- returns the value of the metadata field with lookup name ``lookup_name``. * ``field_exists(field_name)`` -- checks if a field (column) with the lookup name ``field_name`` exists, returning ``'1'`` if so and the empty string if not. * ``finish_formatting(val, fmt, prefix, suffix)`` -- apply the format, prefix, and suffix to a value in the same way as done in a template like ``{series_index:05.2f| - |- }``. This function is provided to ease conversion of complex single-function- or template-program-mode templates to `GPM` Templates. For example, the following program produces the same output as the above template:: @@ -523,11 +552,36 @@ In `GPM` the functions described in `Single Function Mode` all require an additi format_date(raw_field('pubdate'), 'yyyy') +* ``format_date_field(field_name, format_string)`` -- format the value in the field ``field_name``, which must be the lookup name of date field, either standard or custom. See ``format_date()`` for the formatting codes. This function is much faster than format_date and should be used when you are formatting the value in a field (column). It can't be used for computed dates or dates in string variables. Examples:: + + format_date_field('pubdate', 'yyyy.MM.dd') + format_date_field('#date_read', 'MMM dd, yyyy') + * ``formats_modtimes(date_format_string)`` -- return a comma-separated list of colon-separated items ``FMT:DATE`` representing modification times for the formats of a book. The ``date_format_string`` parameter specifies how the date is to be formatted. See the ``format_date()`` function for details. You can use the ``select`` function to get the modification time for a specific format. Note that format names are always uppercase, as in EPUB. * ``formats_paths()`` -- return a comma-separated list of colon-separated items ``FMT:PATH`` giving the full path to the formats of a book. You can use the select function to get the path for a specific format. Note that format names are always uppercase, as in EPUB. * ``formats_sizes()`` -- return a comma-separated list of colon-separated ``FMT:SIZE`` items giving the sizes in bytes of the formats of a book. You can use the select function to get the size for a specific format. Note that format names are always uppercase, as in EPUB. * ``fractional_part(x)`` -- returns the value after the decimal point. For example, ``fractional_part(3.14)`` returns ``0.14``. Throws an exception if ``x`` is not a number. +* ``get_link(field_name, field_value)`` -- fetch the link for field ``field_name`` with value ``field_value``. If there is no attached link, return the empty string. Examples: + + * The following returns the link attached to the tag ``Fiction``:: + + get_link('tags', 'Fiction') + + * This template makes a list of the links for all the tags associated with a book in the form ``value:link, ...``:: + + program: + ans = ''; + for t in $tags: + l = get_link('tags', t); + if l then + ans = list_join(', ', ans, ',', t & ':' & get_link('tags', t), ',') + fi + rof; + ans + * ``has_cover()`` -- return ``'Yes'`` if the book has a cover, otherwise the empty string. +* ``has_extra_files([pattern])`` -- returns the count of extra files, otherwise '' (the empty string). If the optional parameter ``pattern`` (a regular expression) is supplied then the list is filtered to files that match ``pattern`` before the files are counted. The pattern match is case insensitive. See also the functions ``extra_file_names()``, ``extra_file_size()`` and ``extra_file_modtime()``. This function can be used only in the GUI. +* ``identifier_in_list(val, id_name [, found_val, not_found_val])`` -- treat ``val`` as a list of identifiers separated by commas. An identifier has the format ``id_name:value``. The ``id_name`` parameter is the id_name text to search for, either ``id_name`` or ``id_name:regexp``. The first case matches if there is any identifier matching that id_name. The second case matches if id_name matches an identifier and the regexp matches the identifier's value. If ``found_val`` and ``not_found_val`` are provided then if there is a match then return ``found_val``, otherwise return ``not_found_val``. If ``found_val`` and ``not_found_val`` are not provided then if there is a match then return the ``identifier:value`` pair, otherwise the empty string (``''``). * ``is_marked()`` -- check whether the book is `marked` in calibre. If it is then return the value of the mark, either ``'true'`` (lower case) or a comma-separated list of named marks. Returns ``''`` (the empty string) if the book is not marked. This function works only in the GUI. * ``language_codes(lang_strings)`` -- return the `language codes `_ for the language names passed in `lang_strings`. The strings must be in the language of the current locale. ``Lang_strings`` is a comma-separated list. * ``list_contains(value, separator, [ pattern, found_val, ]* not_found_val)`` -- (Alias of ``in_list``) Interpreting the value as a list of items separated by ``separator``, evaluate the ``pattern`` against each value in the list. If the ``pattern`` matches any value then return ``found_val``, otherwise return ``not_found_val``. The ``pattern`` and ``found_value`` can be repeated as many times as desired, permitting returning different values depending on the search. The patterns are checked in order. The first match is returned. Aliases: ``in_list()``, ``list_contains()`` @@ -610,6 +664,7 @@ In `GPM` the functions described in `Single Function Mode` all require an additi * ``strlen(value)`` -- Returns the length of the string ``value``. * ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If ``end`` is negative, then it indicates that many characters counting from the right. If ``end`` is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``. * ``subtract(x, y)`` -- returns ``x - y``. Throws an exception if either ``x`` or ``y`` are not numbers. This function can usually be replaced by the ``-`` operator. +* ``switch_if([test_expression, value_expression,]+ else_expression)`` -- for each ``test_expression, value_expression`` pair, checks if ``test_expression`` is True (non-empty) and if so returns the result of ``value_expression``. If no ``test_expression`` is True then the result of ``else_expression` is returned. You can have as many ``test_expression, value_expression`` pairs as you want. * ``today()`` -- return a date+time string for today (now). This value is designed for use in `format_date` or `days_between`, but can be manipulated like any other string. The date is in `ISO `_ date/time format. * ``template(x)`` -- evaluates ``x`` as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. * ``to_hex(val)`` -- returns the string ``val`` encoded in hex. This is useful when constructing calibre URLs. @@ -736,9 +791,9 @@ A developer can choose to pass additional information to the template processor, **Developer: how to pass additional information** -The additional information is a Python dictionary containing pairs ``variable_name: variable_value`` where the values must be strings. The template can access the dict, creating template local variables named ``variable_name`` containing the value ``variable_value``. The user cannot change the name so it is best to use names that won't collide with other template local variables, for example by prefixing the name with an underscore. +The additional information is a Python dictionary containing pairs ``variable_name: variable_value`` where the values must be strings. The template can access the dictionary, creating template local variables named ``variable_name`` containing the value ``variable_value``. The user cannot change the name so it is best to use names that won't collide with other template local variables, for example by prefixing the name with an underscore. -This dict is passed to the template processor (the ``formatter``) using the named parameter ``global_vars=your_dict``. The full method signature is:: +This dictionary is passed to the template processor (the ``formatter``) using the named parameter ``global_vars=your_dict``. The full method signature is:: def safe_format(self, fmt, kwargs, error_value, book, column_name=None, template_cache=None, @@ -748,17 +803,17 @@ This dict is passed to the template processor (the ``formatter``) using the name **Template writer: how to access the additional information** -You access the additional information (the ``globals`` dict) in a template using the template function:: +You access the additional information (the ``globals`` dictionary) in a template using the template function:: globals(id[=expression] [, id[=expression]]*) where ``id`` is any legal variable name. This function checks whether the additional information provided by the developer contains the name. If it does then the function assigns the provided value to a template local variable with that name. If the name is not in the additional information and if an ``expression`` is provided, the ``expression`` is evaluated and the result is assigned to the local variable. If neither a value nor an expression is provided, the function assigns the empty string (``''``) to the local variable. -A template can set a value in the ``globals`` dict using the template function:: +A template can set a value in the ``globals`` dictionary using the template function:: set_globals(id[=expression] [, id[=expression]]*) -This function sets the ``globals`` dict key:value pair ``id:value`` where ``value`` is the value of the template local variable ``id``. If that local variable doesn't exist then ``value`` is set to the result of evaluating ``expression``. +This function sets the ``globals`` dictionary key:value pair ``id:value`` where ``value`` is the value of the template local variable ``id``. If that local variable doesn't exist then ``value`` is set to the result of evaluating ``expression``. Notes on the difference between modes ----------------------------------------- @@ -820,30 +875,7 @@ To accomplish this, we: 1. Create a composite field (give it lookup name #aa) containing ``{series}/{series_index} - {title}``. If the series is not empty, then this template will produce `series/series_index - title`. 2. Create a composite field (give it lookup name #bb) containing ``{#genre:ifempty(Unknown)}/{author_sort}/{title}``. This template produces `genre/author_sort/title`, where an empty genre is replaced with `Unknown`. -3. Set the save template to ``{series:lookup(.,#aa,#bb}``. This template chooses composite field ``#aa`` if series is not empty and composite field ``#bb`` if series is empty. We therefore have two completely different save paths, depending on whether or not `series` is empty. - -Templates and plugboards ---------------------------- - -Plugboards are used for changing the metadata written into books during send-to-device and save-to-disk operations. A plugboard permits you to specify a template to provide the data to write into the book's metadata. You can use plugboards to modify the following fields: authors, author_sort, language, publisher, tags, title, title_sort. This feature helps people who want to use different metadata in books on devices to solve sorting or display issues. - -When you create a plugboard, you specify the format and device for which the plugboard is to be used. A special device is provided, ``save_to_disk``, that is used when saving formats (as opposed to sending them to a device). Once you have chosen the format and device, you choose the metadata fields to change, providing templates to supply the new values. These templates are `connected` to their destination fields, hence the name `plugboards`. You can of course use composite columns in these templates. - -When a plugboard might apply (Content server, save to disk, or send to device), calibre searches the -defined plugboards to choose the correct one for the given format and device. For example, to find the appropriate plugboard for an EPUB book being sent to an ANDROID device, calibre searches -the plugboards using the following search order: - -* a plugboard with an exact match on format and device, e.g., ``EPUB`` and ``ANDROID`` -* a plugboard with an exact match on format and the special ``any device`` choice, e.g., ``EPUB`` and ``any device`` -* a plugboard with the special ``any format`` choice and an exact match on device, e.g., ``any format`` and ``ANDROID`` -* a plugboard with ``any format`` and ``any device`` - -The tags and authors fields have special treatment, because both of these fields can hold more than one item. A book can have many tags and many authors. When you specify that one of these two fields is to be changed, the template's result is examined to see if more than one item is there. For tags, the result is cut apart wherever calibre finds a comma. For example, if the template produces -the value ``Thriller, Horror``, then the result will be two tags, ``Thriller`` and ``Horror``. There is no way to put a comma in the middle of a tag. - -The same thing happens for authors, but using a different character for the cut, a `&` (ampersand) instead of a comma. For example, if the template produces the value ``Blogs, Joe&Posts, Susan``, then the book will end up with two authors, ``Blogs, Joe`` and ``Posts, Susan``. If the template produces the value ``Blogs, Joe;Posts, Susan``, then the book will have one author with a rather strange name. - -Plugboards affect the metadata written into the book when it is saved to disk or written to the device. Plugboards do not affect the metadata used by ``save to disk`` and ``send to device`` to create the file names. Instead, file names are constructed using the templates entered on the appropriate preferences window. +3. Set the save template to ``{series:lookup(.,#aa,#bb)}``. This template chooses composite field ``#aa`` if series is not empty and composite field ``#bb`` if series is empty. We therefore have two completely different save paths, depending on whether or not `series` is empty. Tips ----- diff --git a/manual/template_ref_generate.py b/manual/template_ref_generate.py index 6ce7138991..07c84194cb 100644 --- a/manual/template_ref_generate.py +++ b/manual/template_ref_generate.py @@ -76,7 +76,7 @@ def generate_template_language_help(language): func_sig = getattr(func, 'doc') m = pat.search(func_sig) if m is None: - print ('No signature for template function ', class_name) + print('No signature for template function ', class_name) continue func_sig = func_sig[:m.start()+1].strip('`') func_cat = getattr(func, 'category') diff --git a/manual/url_scheme.rst b/manual/url_scheme.rst index 7ec7bae5cb..cfa20f2298 100644 --- a/manual/url_scheme.rst +++ b/manual/url_scheme.rst @@ -56,6 +56,20 @@ brackets at the end of the path to the book folder. You can copy a link to the current book displayed in calibre by right clicking the :guilabel:`Book details` panel and choosing :guilabel:`Copy link to book`. +If a search is active and the book is not matched by the search then the search is cleared. + +If a Virtual library is selected, calibre will use it when showing the book. If +the book isn't found in that virtual library then the virtual library is cleared. + +If you want to switch to a particular Virtual library when showing the book, use:: + + calibre://show-book/Library_Name/book_id?virtual_library=Library%20Name + or + calibre://show-book/Library_Name/book_id?encoded_virtual_library=hex_encoded_virtual_library_name + +replacing spaces in the Virtual library name by ``%20``. If the book isn't found in that +virtual library then the virtual library is ignored. + Open a specific book in the E-book viewer at a specific position ------------------------------------------------------------------- @@ -102,6 +116,39 @@ If you perform a search in calibre and want to generate a link for it you can do so by right clicking the search bar and choosing :guilabel:`Copy search as URL`. +Open a book details window on a book in some library +------------------------------------------------------ + +The URL syntax is:: + + calibre://book-details/Library_Name/book_id + +This opens a book details window on the specified book from the specified library without changing the +current library or the selected book. + + +Open the notes associated with an author/series/etc. +------------------------------------------------------ + +The URL syntax is:: + + calibre://book-details/Library_Name/Field_Name/id_Item_Id + +This opens a window showing the notes of the specified item. +The easiest way to create such URLs is to show the notes you want +in calibre and click the :guilabel:`Copy URL` button to copy the URL +to the clipboard and paste it wherever you need. + +Here ``Field_Name`` is the name of the columns such as ``authors`` or ``tags``. +For user created columns, replace the leading ``#`` in the field name with +an underscore, so ``#mytags`` becomes ``_mytags``. + +In addition to specifying items by id using ``Item_Id`` you can also specify +them by name using either ``val_Item_Name`` or ``hex_Hex_Encoded_Item_Name``. +For example:: + + calibre://book-details/Library_Name/authors/val_John%20Doe + .. _hex_encoding: diff --git a/manual/viewer.rst b/manual/viewer.rst index 192e590682..901117dd80 100644 --- a/manual/viewer.rst +++ b/manual/viewer.rst @@ -254,6 +254,145 @@ You can zoom in to show an image at full size in a separate window by either double clicking or long tapping on it. You can also right click on it and choose :guilabel:`View image`. +.. _viewer_shortcuts: + +Keyboard shortcuts +----------------------- + +The viewer has extensive keyboard shortcuts, like the rest of calibre. They can +be customised in the viewer :guilabel:`Preferences`. The default shortcuts are listed below: + + +.. list-table:: Keyboard shortcuts for the calibre viewer + :widths: 10 100 + :header-rows: 1 + + * - Keyboard shortcut + - Action + * - :kbd:`Home, Ctrl+ArrowUp, Ctrl+ArrowLeft` + - Scroll to the start of the current file in a multi file book + * - :kbd:`Ctrl+Home` + - Scroll to the beginning of the book + * - :kbd:`Ctrl+End` + - Scroll to the end of the book + * - :kbd:`End, Ctrl+ArrowDown, Ctrl+ArrowRight` + - Scroll to the end of the current file in a multi file book + * - :kbd:`ArrowUp` + - Scroll backwards, smoothly in flow mode and by screen fulls in paged mode + * - :kbd:`ArrowDown` + - Scroll forwards, smoothly in flow mode and by screen fulls in paged mode + * - :kbd:`ArrowLeft` + - Scroll leftwards by a little in flow mode and by a page in paged mode + * - :kbd:`ArrowRight` + - Scroll rightwards by a little in flow mode and by a page in paged mode + * - :kbd:`PageUp, Shift+Spacebar` + - Scroll backwards by screen-fulls + * - :kbd:`PageDown, Spacebar` + - Scroll forwards by screen-fulls + * - :kbd:`Ctrl+PageUp` + - Scroll to the previous section + * - :kbd:`Ctrl+PageDown` + - Scroll to the next section + * - :kbd:`Alt+ArrowLeft` + - Back + * - :kbd:`Alt+ArrowRight` + - Forward + * - :kbd:`Ctrl+T` + - Toggle Table of Contents + * - :kbd:`Ctrl+S` + - Read aloud + * - :kbd:`Alt+P` + - Change settings quickly by creating and switching to :guilabel:`profiles` + * - :kbd:`Alt+f` + - Follow links with the keyboard + * - :kbd:`Ctrl+C` + - Copy to clipboard + * - :kbd:`Alt+C` + - Copy current location to clipboard + * - :kbd:`Ctrl+Shift+C` + - Copy current location as calibre:// URL to clipboard + * - :kbd:`/, Ctrl+f, Cmd+f` + - Start search + * - :kbd:`F3, Enter` + - Find next + * - :kbd:`Shift+F3, Shift+Enter` + - Find previous + * - :kbd:`Ctrl+Plus, Meta+Plus` + - Increase font size + * - :kbd:`Ctrl+Minus, Meta+Minus` + - Decrease font size + * - :kbd:`Ctrl+0` + - Restore default font size + * - :kbd:`Ctrl+]` + - Increase number of pages per screen + * - :kbd:`Ctrl+[` + - Decrease number of pages per screen + * - :kbd:`Ctrl+Alt+C` + - Make number of pages per screen automatic + * - :kbd:`F11, Ctrl+Shift+F` + - Toggle full screen + * - :kbd:`Ctrl+M` + - Toggle between Paged mode and Flow mode for text layout + * - :kbd:`Ctrl+W` + - Toggle the scrollbar + * - :kbd:`Ctrl+X` + - Toggle the Reference mode + * - :kbd:`Ctrl+B` + - Show/hide bookmarks + * - :kbd:`Ctrl+Alt+B` + - New bookmark + * - :kbd:`Ctrl+N, Ctrl+E` + - Show the book metadata + * - :kbd:`Ctrl+Alt+F5, Ctrl+Alt+R` + - Reload book + * - :kbd:`Ctrl+Shift+ArrowRight` + - Alter the current selection forward by a word + * - :kbd:`Ctrl+Shift+ArrowLeft` + - Alter the current selection backwards by a word + * - :kbd:`Shift+ArrowRight` + - Alter the current selection forward by a character + * - :kbd:`Shift+ArrowLeft` + - Alter the current selection backwards by a character + * - :kbd:`Shift+ArrowDown` + - Alter the current selection forward by a line + * - :kbd:`Shift+Home` + - Extend the current selection to the start of the line + * - :kbd:`Shift+End` + - Extend the current selection to the end of the line + * - :kbd:`Ctrl+A` + - Select all + * - :kbd:`Shift+ArrowUp` + - Alter the current selection backwards by a line + * - :kbd:`Ctrl+Shift+ArrowDown` + - Alter the current selection forward by a paragraph + * - :kbd:`Ctrl+Shift+ArrowUp` + - Alter the current selection backwards by a paragraph + * - :kbd:`Esc, MenuKey` + - Show the E-book viewer controls + * - :kbd:`Ctrl+Comma, Ctrl+Esc, Meta+Esc, Meta+Comma` + - Show E-book viewer preferences + * - :kbd:`Ctrl+G, ;, :` + - Go to a specified book location or position + * - :kbd:`Ctrl+Spacebar` + - Toggle auto-scroll + * - :kbd:`Alt+ArrowUp` + - Auto scroll faster + * - :kbd:`Alt+ArrowDown` + - Auto scroll slower + * - :kbd:`Ctrl+I` + - Show/hide Inspector + * - :kbd:`Ctrl+L` + - Show/hide the word lookup panel + * - :kbd:`Ctrl+Q (Cmd+Q on macOS)` + - Quit + * - :kbd:`Ctrl+P` + - Print book to PDF + * - :kbd:`Ctrl+F11` + - Toggle the toolbar + * - :kbd:`Ctrl+H` + - Toggle the highlights panel + * - :kbd:`Ctrl+D` + - Edit this book Non re-flowable content -------------------------- @@ -287,6 +426,9 @@ the viewer will set the following classes on the ``body`` element: ``body.calibre-viewer-scrolling`` Set when in flow (non-paginated) mode +``body.calibre-footnote-container`` + Set when displaying a popup footnote + Finally, you can use the calibre color scheme colors via `CSS variables `_. The calibre viewer defines the following variables: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..5db79d222e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[tool.ruff] +line-length = 160 +target-version = 'py38' +builtins = ['_'] + +[tool.ruff.lint] +ignore = ['E741', 'E402', 'E722', 'E401'] +select = ['E', 'F'] + +[tool.ruff.lint.per-file-ignores] +"src/calibre/ebooks/unihandecode/unicodepoints.py" = ["E501"] +"src/qt/__init__.py" = ["E501"] + +[tool.black] +target-version = ['py38'] + +[tool.isort] +profile = "black" +combine_as_imports = true +multi_line_output = 5 +known_future_library = "__python__" +known_third_party = "qt" +known_standard_library = "aes,elementmaker,encodings" +known_first_party = "calibre_extensions" diff --git a/recipes/1843.recipe b/recipes/1843.recipe index 35380ad083..9543a3570b 100644 --- a/recipes/1843.recipe +++ b/recipes/1843.recipe @@ -2,14 +2,7 @@ # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal -from __future__ import absolute_import, division, print_function, unicode_literals -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +from calibre.web.feeds.news import BasicNewsRecipe, classes class E1843(BasicNewsRecipe): @@ -20,34 +13,41 @@ class E1843(BasicNewsRecipe): no_stylesheets = True remove_javascript = True encoding = 'utf-8' + # economist.com has started throttling after about 60% of the total has + # downloaded with connection reset by peer (104) errors. + delay = 1 + keep_only_tags = [ dict(id='content') ] remove_tags = [ - classes('advert ad ds-share-list article__wordmark related-articles newsletter-signup') + classes('advert ad ds-share-list article__wordmark related-articles newsletter-signup'), + dict(attrs={'data-test-id':'sharing-modal'}), ] def parse_index(self): soup = self.index_to_soup('https://economist.com/1843') ans = [] + main = soup.find(id='content') - for a in soup.findAll(**classes('headline-link')): + for h3 in main.find_all('h3'): + a = h3.find('a') url = a['href'] if url.startswith('/'): url = 'https://economist.com' + url title = self.tag_to_string(a) self.log(title, ' at ', url) desc = '' - d = a.parent.findNextSibling(itemprop='description') + d = a.parent.findNextSibling('p') if d is not None: desc = self.tag_to_string(d) ans.append({'title': title, 'url': url, 'description': desc}) return [('Articles', ans)] def postprocess_html(self, soup, *a): - main = soup.find(id='content') - header = soup.find(**classes('article__header')) - header.extract() - main.insert(0, header) + a = soup.find('a', string='More from 1843 magazine') + if a is not None: + more = a.parent.parent + more.extract() return soup diff --git a/recipes/aachener_nachrichten.recipe b/recipes/aachener_nachrichten.recipe deleted file mode 100644 index 7424426f70..0000000000 --- a/recipes/aachener_nachrichten.recipe +++ /dev/null @@ -1,113 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe(BasicNewsRecipe): - - title = u'Aachener Nachrichten' - __author__ = 'schuster' # AGE update 2012-11-28 - oldest_article = 1 - max_articles_per_feed = 100 - no_stylesheets = True - remove_javascript = True - remove_empty_feeds = True - language = 'de' - -# cover_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png' - masthead_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png' - - keep_only_tags = [ - dict(name='article', attrs={'class': ['single']}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ["clearfix navi-wrapper"]}), - dict(name='div', attrs={'id': ["article_actions"]}), - dict(name='style', attrs={'type': ["text/css"]}), - dict(name='aside'), - dict(name='a', attrs={'class': ["btn btn-action"]}) - ] - - feeds = [ - (u'Lokales - Euregio', - u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'), - (u'Lokales - Aachen', - u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'), - (u'Lokales - Nordkreis', - u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'), - (u'Lokales - Düren', - u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'), - (u'Lokales - Eiffel', - u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'), - (u'Lokales - Eschweiler', - u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'), - (u'Lokales - Geilenkirchen', - u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'), - (u'Lokales - Heinsberg', - u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'), - (u'Lokales - Jülich', - u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'), - (u'Lokales - Stolberg', - u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'), - (u'News - Politik', - u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'), - (u'News - Aus aller Welt', - u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'), - (u'News - Wirtschaft', - u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'), - (u'News - Kultur', - u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'), - (u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'), - (u'News - Digital', - u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'), - (u'News - Wissenschaft', - u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'), - (u'News - Hochschule', - u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'), - (u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'), - (u'News - Kurioses', - u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'), - (u'News - Musik', - u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'), - (u'News - Tagesthema', - u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'), - (u'News - Newsticker', - u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'), - (u'Sport - Aktuell', - u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'), - (u'Sport - Fußball', - u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'), - (u'Sport - Bundesliga', - u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'), - (u'Sport - Alemannia Aachen', - u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'), - (u'Sport - Volleyball', - u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'), - (u'Sport - Chio', - u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'), - (u'Dossier - Kinderuni', - u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'), - (u'Dossier - Karlspreis', - u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'), - (u'Dossier - Ritterorden', - u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'), - (u'Dossier - ZAB-Aachen', - u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'), - (u'Dossier - Karneval', - u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'), - (u'Ratgeber - Geld', - u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'), - (u'Ratgeber - Recht', - u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'), - (u'Ratgeber - Gesundheit', - u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'), - (u'Ratgeber - Familie', - u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'), - (u'Ratgeber - Livestyle', - u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'), - (u'Ratgeber - Reisen', - u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'), - (u'Ratgeber - Bauen und Wohnen', - u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'), - (u'Ratgeber - Bildung und Beruf', - u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'), - ] diff --git a/recipes/acim_bilim_dergisi.recipe b/recipes/acim_bilim_dergisi.recipe deleted file mode 100644 index 1d9746b127..0000000000 --- a/recipes/acim_bilim_dergisi.recipe +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1334868409(BasicNewsRecipe): - title = u'AÇIK BİLİM DERGİSİ' - description = ' Aylık çevrimiçi bilim dergisi' - __author__ = u'thomass' - oldest_article = 30 - max_articles_per_feed = 300 - auto_cleanup = True - encoding = 'UTF-8' - publisher = 'açık bilim' - category = 'haber, bilim,TR,dergi' - language = 'tr' - publication_type = 'magazine ' - conversion_options = { - 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg' - masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg' - - feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')] diff --git a/recipes/afr.recipe b/recipes/afr.recipe new file mode 100644 index 0000000000..d196252dfe --- /dev/null +++ b/recipes/afr.recipe @@ -0,0 +1,89 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ptempfile import PersistentTemporaryFile + + +class afr(BasicNewsRecipe): + title = 'Australian Financial Review' + __author__ = 'unkn0wn' + description = ( + 'For more than 65 years The Australian Financial Review has been the authority on business,' + ' finance and investment news in Australia. It has a reputation for independent, award-winning ' + 'journalism and is essential reading for Australia\'s business and investor community.' + ) + masthead_url = 'https://www.nineforbrands.com.au/wp-content/uploads/2020/08/AFR-DHOSP-Logo-black-RGB.png' + encoding = 'utf-8' + language = 'en_AU' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 25 + no_stylesheets = True + remove_empty_feeds = True + remove_attributes = ['style', 'height', 'width'] + + keep_only_tags = [ + dict(name=['article', 'main'], attrs={'id':'content'}) + ] + + remove_tags = [ + dict(attrs={'data-testid': [ + 'ArticleTools', 'ArticleBreadcrumb-Links', 'ad-wrapper', 'ArticleFooter', 'ArticleTags', + 'beyondwords-player-wrapper' + ]}), + dict(name=['button', 'aside', 'svg']), + ] + + remove_tags_after= [ dict(name='aside', attrs={'id':'stickyContainer'})] + + extra_css = ''' + #img-cap {font-size:small; text-align:center;} + [data-testid="AuthorNames"], [data-testid="ArticleTimestamp"] {font-size:small;} + ''' + + ignore_duplicate_articles = {'title'} + resolve_internal_links = True + remove_empty_feeds = True + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', 'podcast-' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + for fig in soup.findAll('figcaption'): + fig['id'] = 'img-cap' + return soup + + feeds = [] + + sections = [ + 'companies', 'market', 'politics', 'policy', 'world', 'wealth', 'street-talk', + 'chaticleer', 'rear-window', 'life-and-luxury', 'technology', 'property', + 'work-and-careers', + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:https%3A%2F%2Fwww.afr.com{}&hl=en-AU&gl=AU&ceid=AU:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Others', a.format(''))) diff --git a/recipes/aftenposten.recipe b/recipes/aftenposten.recipe deleted file mode 100644 index fea850fc00..0000000000 --- a/recipes/aftenposten.recipe +++ /dev/null @@ -1,20 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Aftenposten(BasicNewsRecipe): - title = u'Aftenposten' - __author__ = 'davotibarna' - description = 'Norske nyheter' - language = 'no' - oldest_article = 5 - max_articles_per_feed = 100 - recipe_disabled = ('The recipe to download Aftenposten has been ' - 'temporarily disabled at the publisher\'s request, while ' - 'they finalize their digital strategy.') - no_stylesheets = True - encoding = 'ISO-8859-1' - - feeds = [(u'Aftenposten', u'http://www.aftenposten.no/eksport/rss-1_0/')] - - def print_version(self, url): - return url.replace('#xtor=RSS-3', '?service=print') diff --git a/recipes/agrogerila.recipe b/recipes/agrogerila.recipe deleted file mode 100644 index 70abbe0960..0000000000 --- a/recipes/agrogerila.recipe +++ /dev/null @@ -1,36 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -boljevac.blogspot.com -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe - - -class AgroGerila(BasicNewsRecipe): - title = 'Agro Gerila' - __author__ = 'Darko Miletic' - description = 'Politicki nekorektan blog.' - oldest_article = 45 - max_articles_per_feed = 100 - language = 'sr' - encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = True - publication_type = 'blog' - extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' # noqa - - conversion_options = { - 'comment': description, 'tags': 'film, blog, srbija', 'publisher': 'Dry-Na-Nord', 'language': language - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - feeds = [(u'Posts', u'http://boljevac.blogspot.com/feeds/posts/default')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) diff --git a/recipes/air_force_times.recipe b/recipes/air_force_times.recipe deleted file mode 100644 index 0d94fb1d72..0000000000 --- a/recipes/air_force_times.recipe +++ /dev/null @@ -1,42 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AirForceTimes(BasicNewsRecipe): - title = 'Air Force Times' - __author__ = 'jde' - __date__ = '16 May 2012' - __version__ = '1.0' - description = 'News of the U.S. Air Force' - language = 'en' - publisher = 'AirForceTimes.com' - category = 'news, U.S. Air Force' - tags = 'news, U.S. Air Force' - cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg' - masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg' - oldest_article = 7 # days - max_articles_per_feed = 25 - publication_type = 'newspaper' - no_stylesheets = True - use_embedded_content = False - encoding = None - recursions = 0 - needs_subscription = False - remove_javascript = True - remove_empty_feeds = True - auto_cleanup = True - - feeds = [ - ('Home','http://feeds.feedburner.com/rss/category/air-home?format=xml'), - ('Health Benefits','http://feeds.feedburner.com/rss/category/air-healthbenefits?format=xml'), - ('Retirement Benefits','http://feeds.feedburner.com/rss/category/air-retirementbenefits?format=xml'), - ('Veterans Benefits','http://feeds.feedburner.com/rss/category/air-VeteransBenefits?format=xml'), - ('Education Benefits','http://feeds.feedburner.com/rss/category/air-educationbenefits?format=xml'), - ('Adventure','http://feeds.feedburner.com/rss/category/air-adventure?format=xml'), - ('Entertainment','http://feeds.feedburner.com/rss/category/air-Entertainment?format=xml'), - ('Careers','http://feeds.feedburner.com/rss/category/air-careers?format=xml'), - ('Technology','http://feeds.feedburner.com/rss/category/air-technology?format=xml'), - ('Opinion','http://feeds.feedburner.com/rss/category/air-opinion?format=xml'), - ('Pay','http://feeds.feedburner.com/rss/category/air-pay?format=xml'), - ('Guard','http://feeds.feedburner.com/rss/category/air-guard?format=xml'), - ('Your Air Force','http://feeds.feedburner.com/rss/category/air-yourairforce?format=xml'), - ] diff --git a/recipes/al_masry_alyoum_arabic.recipe b/recipes/al_masry_alyoum_arabic.recipe index c3696a27e3..b834f1ff62 100644 --- a/recipes/al_masry_alyoum_arabic.recipe +++ b/recipes/al_masry_alyoum_arabic.recipe @@ -8,10 +8,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class AlMasryAlyoum(BasicNewsRecipe): - title = u'Al-Masry Alyoum (المصري اليوم)' + title = u'المصري اليوم (Al-Masry Alyoum)' __author__ = 'Hassan Williamson' description = 'The Arabic version of the Al-Masry Alyoum (Egypt Independent) newspaper.' - language = 'ar' + language = 'ar_eg' encoding = 'utf8' cover_url = 'http://www.almasryalyoum.com/content/images/header_logo.png' oldest_article = 7 diff --git a/recipes/ambito.recipe b/recipes/ambito.recipe index b3eae78a93..7a5a177cc6 100644 --- a/recipes/ambito.recipe +++ b/recipes/ambito.recipe @@ -8,7 +8,7 @@ __copyright__ = '2008-2021, Darko Miletic ' ambito.com ''' -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class Ambito(BasicNewsRecipe): @@ -17,12 +17,11 @@ class Ambito(BasicNewsRecipe): description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires' publisher = 'Editorial Nefir S.A.' category = 'news, politics, economy, finances, Argentina' - oldest_article = 2 + oldest_article = 1.2 no_stylesheets = True - encoding = 'utf8' + encoding = 'utf-8' use_embedded_content = False remove_empty_feeds = True - handle_gzip = True compress_news_images = True scale_news_images_to_device = True ignore_duplicate_articles = {'url'} @@ -41,11 +40,14 @@ class Ambito(BasicNewsRecipe): } keep_only_tags = [ - dict(name='div', attrs={'class': 'detail-header-wrapper'}), - dict(attrs={'class': lambda x: x and 'detail-body' in x.split()}), + classes( + 'detail-highlighted-multimedia news-headline__publication-date news-headline__title' + ' news-headline__author-wrapper news-headline__article-summary' + ), + dict(name='article', attrs={'class': lambda x: x and 'article-body' in x.split()}), ] remove_tags = [ - dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link', 'img']) + dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link']) ] feeds = [ @@ -60,3 +62,8 @@ class Ambito(BasicNewsRecipe): (u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'), (u'Nacional', u'https://www.ambito.com/rss/nacional.xml') ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-td-src-property':True}): + img['src'] = img['data-td-src-property'] + return soup diff --git a/recipes/andhrajyothy_ap.recipe b/recipes/andhrajyothy_ap.recipe new file mode 100644 index 0000000000..3e7834098c --- /dev/null +++ b/recipes/andhrajyothy_ap.recipe @@ -0,0 +1,122 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date +from collections import defaultdict + +# figure out your local edition id from the log of this recipe +edi_id = 182 # NTR VIJAYAWADA - 182 + +today = date.today().strftime('%d/%m/%Y') + +# for older edition +# today = '15/01/2024' + +day, month, year = (int(x) for x in today.split('/')) +dt = date(year, month, day) +today = today.replace('/', '%2F') + +index = 'https://epaper.andhrajyothy.com' + +class andhra(BasicNewsRecipe): + title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్' + language = 'te' + __author__ = 'unkn0wn' + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/0/01/Andhra_Jyothi_newspaper_logo.png' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the ABN Andhra Jyothy epaper, digital edition' + encoding = 'utf-8' + remove_empty_feeds = True + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'ఆంధ్రజ్యోతి ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def parse_index(self): + + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + + get_edition = index + '/Home/GetEditionsHierarchy' + edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) + self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') + for edi in edi_data: + if edi['org_location'] in {'Magazines', 'Navya Daily'}: + continue + self.log(edi['org_location']) + cities = [] + for edi_loc in edi['editionlocation']: + cities.append(edi_loc['Editionlocation'] + ' - ' + edi_loc['EditionId']) + self.log('\t', ',\n\t'.join(cities)) + + self.log('\nDownloading: Edition ID - ', edi_id) + url = index + '/Home/GetAllpages?editionid=' + str(edi_id) + '&editiondate=' + today + main_data = json.loads(self.index_to_soup(url, raw=True)) + + feeds_dict = defaultdict(list) + + for page in main_data: + sec_name = page['PageNo'] + 'వ పేజీ' + if page['PageNumber'] == 'Page 1': + self.cover_url = page['HighResolution'] + art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId']) + raw2 = self.index_to_soup(art, raw=True) + art_data = json.loads(raw2) + for snaps in art_data: + section = sec_name + url = str(snaps['OrgId']) + if snaps['ObjectType'] == 4: + continue + feeds_dict[section].append({"title": '', "url": url}) + return [(section, articles) for section, articles in feeds_dict.items()] + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data['StoryContent']: + if x['Headlines']: + if len(x['Headlines']) > 0: + body += '

    ' + x['Headlines'][0].replace('\n', ' ') + '

    ' + for y in x['Headlines'][1:]: + body += '

    ' + y.replace('\n', ' ') + '

    ' + if data['LinkPicture']: + for pics in data['LinkPicture']: + if pics['fullpathlinkpic']: + body += '
    '.format(pics['fullpathlinkpic']) + if pics['caption']: + body += '
    ' + pics['caption'] + '

    ' + for x in data['StoryContent']: + if x['Body'] and x['Body'] != '': + body += '' + x['Body'] + '' + # if data['filepathstorypic']: # this gives you a snap image of the article from page + # body += '

    '.format(data['filepathstorypic'].replace('\\', '/')) + if body.strip() == '': + self.abort_article('no article') + return '
    ' + body + '
    ' + + def populate_article_metadata(self, article, soup, first): + article.url = '***' + h1 = soup.find('h1') + h4 = soup.find('h4') + body = soup.find(attrs={'class':'body'}) + if h4: + article.summary = self.tag_to_string(h4) + article.text_summary = article.summary + elif body: + article.summary = ' '.join(self.tag_to_string(body).split()[:15]) + '...' + article.text_summary = article.summary + article.title = 'ఆంధ్రజ్యోతి' + if h1: + article.title = self.tag_to_string(h1) + elif body: + article.title = ' '.join(self.tag_to_string(body).split()[:7]) + '...' + + def print_version(self, url): + return index + '/User/ShowArticleView?OrgId=' + url diff --git a/recipes/andhrajyothy_tel.recipe b/recipes/andhrajyothy_tel.recipe new file mode 100644 index 0000000000..7dc6449854 --- /dev/null +++ b/recipes/andhrajyothy_tel.recipe @@ -0,0 +1,122 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date +from collections import defaultdict + +# figure out your local edition id from the log of this recipe +edi_id = 225 # TELANGANA MAIN II - 225 + +today = date.today().strftime('%d/%m/%Y') + +# for older edition +# today = '15/01/2024' + +day, month, year = (int(x) for x in today.split('/')) +dt = date(year, month, day) +today = today.replace('/', '%2F') + +index = 'https://epaper.andhrajyothy.com' + +class andhra(BasicNewsRecipe): + title = 'ఆంధ్రజ్యోతి - తెలంగాణ' + language = 'te' + __author__ = 'unkn0wn' + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/0/01/Andhra_Jyothi_newspaper_logo.png' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the ABN Andhra Jyothy epaper, digital edition' + encoding = 'utf-8' + remove_empty_feeds = True + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'ఆంధ్రజ్యోతి ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def parse_index(self): + + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + + get_edition = index + '/Home/GetEditionsHierarchy' + edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) + self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') + for edi in edi_data: + if edi['org_location'] in {'Magazines', 'Navya Daily'}: + continue + self.log(edi['org_location']) + cities = [] + for edi_loc in edi['editionlocation']: + cities.append(edi_loc['Editionlocation'] + ' - ' + edi_loc['EditionId']) + self.log('\t', ',\n\t'.join(cities)) + + self.log('\nDownloading: Edition ID - ', edi_id) + url = index + '/Home/GetAllpages?editionid=' + str(edi_id) + '&editiondate=' + today + main_data = json.loads(self.index_to_soup(url, raw=True)) + + feeds_dict = defaultdict(list) + + for page in main_data: + sec_name = page['PageNo'] + 'వ పేజీ' + if page['PageNumber'] == 'Page 1': + self.cover_url = page['HighResolution'] + art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId']) + raw2 = self.index_to_soup(art, raw=True) + art_data = json.loads(raw2) + for snaps in art_data: + section = sec_name + url = str(snaps['OrgId']) + if snaps['ObjectType'] == 4: + continue + feeds_dict[section].append({"title": '', "url": url}) + return [(section, articles) for section, articles in feeds_dict.items()] + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data['StoryContent']: + if x['Headlines']: + if len(x['Headlines']) > 0: + body += '

    ' + x['Headlines'][0].replace('\n', ' ') + '

    ' + for y in x['Headlines'][1:]: + body += '

    ' + y.replace('\n', ' ') + '

    ' + if data['LinkPicture']: + for pics in data['LinkPicture']: + if pics['fullpathlinkpic']: + body += '
    '.format(pics['fullpathlinkpic']) + if pics['caption']: + body += '
    ' + pics['caption'] + '

    ' + for x in data['StoryContent']: + if x['Body'] and x['Body'] != '': + body += '' + x['Body'] + '' + # if data['filepathstorypic']: # this gives you a snap image of the article from page + # body += '

    '.format(data['filepathstorypic'].replace('\\', '/')) + if body.strip() == '': + self.abort_article('no article') + return '
    ' + body + '
    ' + + def populate_article_metadata(self, article, soup, first): + article.url = '***' + h1 = soup.find('h1') + h4 = soup.find('h4') + body = soup.find(attrs={'class':'body'}) + if h4: + article.summary = self.tag_to_string(h4) + article.text_summary = article.summary + elif body: + article.summary = ' '.join(self.tag_to_string(body).split()[:15]) + '...' + article.text_summary = article.summary + article.title = 'ఆంధ్రజ్యోతి' + if h1: + article.title = self.tag_to_string(h1) + elif body: + article.title = ' '.join(self.tag_to_string(body).split()[:7]) + '...' + + def print_version(self, url): + return index + '/User/ShowArticleView?OrgId=' + url diff --git a/recipes/ap.recipe b/recipes/ap.recipe index 0eb30dc832..2bfa2d0015 100644 --- a/recipes/ap.recipe +++ b/recipes/ap.recipe @@ -2,35 +2,12 @@ # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2017, Kovid Goyal -from __future__ import absolute_import, division, print_function, unicode_literals import json -import re -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes from calibre.utils.date import utcnow, parse_date -def extract_article(raw): - ms = re.search(r"window\['titanium-state'\]", raw) - me = re.search(r"window\['titanium-cacheConfig'\]", raw) - raw = raw[ms.start():me.start()] - raw = raw[raw.find('{'):] - data = json.loads(raw) - data = tuple(data['content']['data'].values())[0] - story_html = '

    ' + data['headline'] + '

    \n' - story_html += '

    ' + data['bylines'] + '

    \n' - story_html += '

    ' + data['published'] + '

    \n' - for m in data.get('media', ()): - sizes = m['imageRenderedSizes'] - if sizes: - sz = 800 if 800 in sizes else sizes[0] - url = m['gcsBaseUrl'] + '{}{}'.format(sz, m['imageFileExtension']) - story_html += '\n
    \n' - story_html += '
    ' + m['caption'] + '
    \n' - story_html += '\n
    ' + data['storyHTML'] + '
    ' - return '' + story_html + '' - - class AssociatedPress(BasicNewsRecipe): title = u'Associated Press' @@ -44,6 +21,22 @@ class AssociatedPress(BasicNewsRecipe): remove_empty_feeds = False oldest_article = 1.5 + keep_only_tags = [ + classes('Page-headline Page-lead Page-storyBody Page-authorinfo'), + ] + remove_tags = [ + classes('Page-actions Enhancement'), + dict(name='source'), + ] + remove_attributes = ['srcset'] + extra_css = ''' + .Figure-caption { + font-style: italic; + font-size: smaller; + margin-left: 1rem; margin-right: 1rem; + } + ''' + def parse_index(self): feeds = [] limit = self.test[0] if self.test else 100 @@ -87,6 +80,3 @@ class AssociatedPress(BasicNewsRecipe): articles.append({'title': title, 'url': url}) self.log('') return articles - - def preprocess_raw_html(self, raw_html, url): - return extract_article(raw_html) diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe deleted file mode 100644 index 4715589a83..0000000000 --- a/recipes/apple_daily.recipe +++ /dev/null @@ -1,305 +0,0 @@ -# vim:fileencoding=UTF-8 -from __future__ import unicode_literals -__license__ = 'GPL v3' -__copyright__ = '2013-2015, Eddie Lau' -__Date__ = '' - -from calibre import (__appname__, force_unicode, strftime) -from calibre.utils.date import now as nowf -import os -import datetime -import re -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.ebooks.metadata.opf2 import OPFCreator -from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.metadata import MetaInformation -from calibre.utils.localization import canonicalize_lang - - -class AppleDaily(BasicNewsRecipe): - title = u'蘋果日報 (香港)' - __author__ = 'Eddie Lau' - publisher = '蘋果日報' - publication_type= 'newspaper' - oldest_article = 1 - max_articles_per_feed = 100 - auto_cleanup = False - language = 'zh' - encoding = 'utf-8' - auto_cleanup = False - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - description = 'http://hkm.appledaily.com/' - category = 'Chinese, News, Hong Kong' - masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/8/86/Apple_Daily_Title.svg' - - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:125%; text-align:left; font-weight:bold;} p{font-size:90%;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa - keep_only_tags = [dict(name='div', attrs={'id': 'content-article'})] - remove_tags = [dict(name='div', attrs={'class': 'prev-next-btn'}), - dict(name='p', attrs={'class': 'next'}), - dict(name='meta'), - dict(name='link')] - - def get_dtlocal(self): - dt_utc = datetime.datetime.utcnow() - # convert UTC to local hk time - at HKT 6am, all news are available - return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24) - - def get_fetchdate(self): - if __Date__ != '': - return __Date__ - else: - return self.get_dtlocal().strftime("%Y%m%d") - - def get_fetchformatteddate(self): - if __Date__ != '': - return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8] - else: - return self.get_dtlocal().strftime("%Y-%m-%d") - - def get_fetchyear(self): - if __Date__ != '': - return __Date__[0:4] - else: - return self.get_dtlocal().strftime("%Y") - - def get_fetchmonth(self): - if __Date__ != '': - return __Date__[4:6] - else: - return self.get_dtlocal().strftime("%m") - - def get_fetchday(self): - if __Date__ != '': - return __Date__[6:8] - else: - return self.get_dtlocal().strftime("%d") - - # Note: does not work with custom date given by __Date__ - def get_weekday(self): - return self.get_dtlocal().weekday() - - def get_cover_url(self): - soup = self.index_to_soup('http://hkm.appledaily.com/') - cover = soup.find(attrs={'class': 'top-news'}).get('src', False) - br = BasicNewsRecipe.get_browser(self) - try: - br.open(cover) - except: - cover = None - return cover - - def populate_article_metadata(self, article, soup, first): - if first and hasattr(self, 'add_toc_thumbnail'): - picdiv = soup.find('img') - if picdiv is not None: - self.add_toc_thumbnail(article, picdiv['src']) - - def parse_index(self): - feeds = [] - soup = self.index_to_soup('http://hkm.appledaily.com/') - ul = soup.find(attrs={'class': 'menu'}) - sectionList = [] - for li in ul.findAll('li'): - relativea = li.find('a', href=True).get('href', False) - a = 'http://hkm.appledaily.com/' + relativea - title = li.find('a', text=True).strip() - # if (time.tzname != 'HKT'): - # if (title == u'三藩市'): - # continue - # if (title == u'洛杉磯'): - # continue - # if (title == u'紐  約'): - # continue - # if (title == u'美  國'): - # continue - # if (not title == u'動新聞') and (relativea.startswith('list.php')): - if (relativea.find('category=daily')!= -1)and (relativea.startswith('list.php')): - sectionList.append((title, a)) - for title, url in sectionList: - title = title.replace(" ", "") - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - return feeds - - def parse_section(self, url): - soup = self.index_to_soup(url) - ul = soup.find(attrs={'class': 'list'}) - current_articles = [] - if ul is None : - return current_articles - for li in ul.findAll('li'): - a = li.find('a', href=True) - title = li.find('p', text=True).strip() - if a is not None: - current_articles.append( - {'title': title, 'url': 'http://hkm.appledaily.com/' + a.get('href', False)}) - pass - return current_articles - - def create_opf(self, feeds, dir=None): - if dir is None: - dir = self.output_dir - title = self.short_title() - if self.output_profile.periodical_date_in_title: - title += strftime(self.timefmt) - mi = MetaInformation(title, [__appname__]) - mi.publisher = __appname__ - mi.author_sort = __appname__ - if self.publication_type: - mi.publication_type = 'periodical:' + \ - self.publication_type + ':' + self.short_title() - mi.timestamp = nowf() - article_titles, aseen = [], set() - for f in feeds: - for a in f: - if a.title and a.title not in aseen: - aseen.add(a.title) - article_titles.append(force_unicode(a.title, 'utf-8')) - - mi.comments = self.description - if not isinstance(mi.comments, type(u'')): - mi.comments = mi.comments.decode('utf-8', 'replace') - mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + - '\n\n'.join(article_titles)) - - language = canonicalize_lang(self.language) - if language is not None: - mi.language = language - # This one affects the pub date shown in kindle title - # mi.pubdate = nowf() - # now appears to need the time field to be > 12.00noon as well - mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int( - self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) - opf_path = os.path.join(dir, 'index.opf') - ncx_path = os.path.join(dir, 'index.ncx') - - opf = OPFCreator(dir, mi) - # Add mastheadImage entry to section - mp = getattr(self, 'masthead_path', None) - if mp is not None and os.access(mp, os.R_OK): - from calibre.ebooks.metadata.opf2 import Guide - ref = Guide.Reference(os.path.basename( - self.masthead_path), os.getcwd()) - ref.type = 'masthead' - ref.title = 'Masthead Image' - opf.guide.append(ref) - - manifest = [os.path.join(dir, 'feed_%d' % i) - for i in range(len(feeds))] - manifest.append(os.path.join(dir, 'index.html')) - manifest.append(os.path.join(dir, 'index.ncx')) - - # Get cover - cpath = getattr(self, 'cover_path', None) - if cpath is None: - pf = open(os.path.join(dir, 'cover.jpg'), 'wb') - if self.default_cover(pf): - cpath = pf.name - if cpath is not None and os.access(cpath, os.R_OK): - opf.cover = cpath - manifest.append(cpath) - - # Get masthead - mpath = getattr(self, 'masthead_path', None) - if mpath is not None and os.access(mpath, os.R_OK): - manifest.append(mpath) - - opf.create_manifest_from_files_in(manifest) - for mani in opf.manifest: - if mani.path.endswith('.ncx'): - mani.id = 'ncx' - if mani.path.endswith('mastheadImage.jpg'): - mani.id = 'masthead-image' - - entries = ['index.html'] - toc = TOC(base_path=dir) - self.play_order_counter = 0 - self.play_order_map = {} - - def feed_index(num, parent): - f = feeds[num] - for j, a in enumerate(f): - if getattr(a, 'downloaded', False): - adir = 'feed_%d/article_%d/' % (num, j) - auth = a.author - if not auth: - auth = None - desc = a.text_summary - if not desc: - desc = None - else: - desc = self.description_limiter(desc) - tt = a.toc_thumbnail if a.toc_thumbnail else None - entries.append('%sindex.html' % adir) - po = self.play_order_map.get(entries[-1], None) - if po is None: - self.play_order_counter += 1 - po = self.play_order_counter - parent.add_item('%sindex.html' % adir, None, - a.title if a.title else _( - 'Untitled Article'), - play_order=po, author=auth, - description=desc, toc_thumbnail=tt) - last = os.path.join( - self.output_dir, ('%sindex.html' % adir).replace('/', os.sep)) - for sp in a.sub_pages: - prefix = os.path.commonprefix([opf_path, sp]) - relp = sp[len(prefix):] - entries.append(relp.replace(os.sep, '/')) - last = sp - - if os.path.exists(last): - with open(last, 'rb') as fi: - src = fi.read().decode('utf-8') - src = src.replace('height:260px !important;','') # fix flow-player div tag parent - soup = BeautifulSoup(src) - body = soup.find('body') - if body is not None: - prefix = '/'.join('..'for i in range(2 * - len(re.findall(r'link\d+', last)))) - templ = self.navbar.generate(True, num, j, len(f), - not self.has_single_feed, - a.orig_url, __appname__, prefix=prefix, - center=self.center_navbar) - translatedTempl =re.sub( - '本篇由 '+__appname__+ - ' 快取自 蘋果日報 ; 本篇來源位置。'+ - ' 1: - for i, f in enumerate(feeds): - entries.append('feed_%d/index.html' % i) - po = self.play_order_map.get(entries[-1], None) - if po is None: - self.play_order_counter += 1 - po = self.play_order_counter - auth = getattr(f, 'author', None) - if not auth: - auth = None - desc = getattr(f, 'description', None) - if not desc: - desc = None - feed_index(i, toc.add_item('feed_%d/index.html' % i, None, - f.title, play_order=po, description=desc, author=auth)) - - else: - entries.append('feed_%d/index.html' % 0) - feed_index(0, toc) - - for i, p in enumerate(entries): - entries[i] = os.path.join(dir, p.replace('/', os.sep)) - opf.create_spine(entries) - opf.set_toc(toc) - - with open(opf_path, 'wb') as opf_file, open(ncx_path, 'wb') as ncx_file: - opf.render(opf_file, ncx_file) diff --git a/recipes/bangkokpost.recipe b/recipes/bangkokpost.recipe index 38bef6e485..464ad9800c 100644 --- a/recipes/bangkokpost.recipe +++ b/recipes/bangkokpost.recipe @@ -47,7 +47,7 @@ class BangkokPostRecipe(BasicNewsRecipe): feeds.append((u'Tech', u'http://www.bangkokpost.com/rss/data/tect.xml')) keep_only_tags = [ - classes('article-headline articl-content'), + classes('article-headline article-content'), ] def print_version(self, url): diff --git a/recipes/bar_and_bench.recipe b/recipes/bar_and_bench.recipe new file mode 100644 index 0000000000..9d9a11ab84 --- /dev/null +++ b/recipes/bar_and_bench.recipe @@ -0,0 +1,73 @@ +from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes +from calibre.ptempfile import PersistentTemporaryFile + +class bar(BasicNewsRecipe): + title = 'Bar and Bench' + __author__ = 'unkn0wn' + description = ( + 'Bar & Bench is the premier online portal for Indian legal news. News, interviews,' + ' and columns related to the Supreme Court of India and the High Courts are published.' + ) + language = 'en_IN' + masthead_url = 'https://gumlet.assettype.com/barandbench/2019-12/7a743b15-5d5d-44d7-96c2-13616780ed95/brand_2x.png' + + no_stylesheets = True + remove_javascript = True + remove_attributes = ['height', 'width', 'style'] + + keep_only_tags = [ + prefixed_classes( + 'text-story-m_header-details__ text-story-m_hero-image__ text-story-m_story-content-inner-wrapper__' + ) + ] + + remove_tags = [ + prefixed_classes( + 'text-story-m_story-tags__ story-footer-module__metype__' + ), + dict(name = 'svg') + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup + + ignore_duplicate_articles = {'title'} + resolve_internal_links = True + remove_empty_feeds = True + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', 'podcast-' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + feeds = [] + + sections = [ + 'news', 'columns', 'interviews', 'law-firms', 'apprentice-lawyer', 'legal-jobs' + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:barandbench.com{}&hl=en-IN&gl=IN&ceid=IN:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Others', a.format(''))) diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index ba33d8614c..923e66d6e5 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -1,122 +1,111 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPLv3 Copyright: 2019, Kovid Goyal -from __future__ import absolute_import, division, print_function, unicode_literals -import json +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes +from collections import defaultdict +from datetime import date +import re -from mechanize import Request -from calibre import random_user_agent -from calibre.web.feeds.news import BasicNewsRecipe -from base64 import standard_b64encode - -try: - import urllib.parse as urlparse -except ImportError: - import urlparse -try: - from urllib.parse import quote -except ImportError: - from urllib import quote - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -MAGAZINE_INDEX = 'https://www.barrons.com/magazine' - - -class BarronsMagazine(BasicNewsRecipe): +class barrons(BasicNewsRecipe): title = 'Barron\'s Magazine' - __author__ = 'Kovid Goyal' - description = 'Financial news from the publisher of the WSJ' - language = 'en' - needs_subscription = True + __author__ = 'unkn0wn' + description = ( + 'Barron\'s is an American weekly magazine/newspaper published by Dow Jones & Company. Founded in 1921 as a sister ' + 'publication to The Wall Street Journal, Barron\'s covers U.S. financial information, market developments, and ' + 'relevant statistics.' + ) + language = 'en_US' + use_embedded_content = False no_stylesheets = True + remove_javascript = True + remove_attributes = ['height', 'width', 'style'] + encoding = 'utf-8' + ignore_duplicate_articles = {'url'} + masthead_url = 'https://www.barrons.com/asset/barrons/images/barrons-logo.png' + delay = 1 + + extra_css = ''' + img {display:block; margin:0 auto;} + .figc { font-size:small; text-align:center; } + .imageCredit { color:#404040; font-size:x-small; } + .headline__category, .article-prebody { font-size:small; color:#404040; } + .sub-head { color:#202020; } + ''' keep_only_tags = [ - classes('article__headline article__body'), + classes('headline articleLead article-prebody'), + dict(name='section', attrs={'subscriptions-section':'content'}) + ] + remove_tags = [ + dict(name=['meta', 'link', 'svg', 'button', 'i-amphtml-sizer']), + classes('wsj-ad dynamic-inset-overflow newsletter-inset') ] - def get_browser(self, *a, **kw): - # To understand the login logic read app-min.js from - # https://sso.accounts.dowjones.com/login - kw['user_agent'] = random_user_agent(allow_ie=False) - br = super().get_browser(*a, **kw) - if not self.username or not self.password: - self.barrons_itp_page = br.open(MAGAZINE_INDEX).read() - return br - itp = quote(MAGAZINE_INDEX, safe='') - start_url = 'https://accounts.barrons.com/login?target=' + itp - self.log('Starting login process...') - res = br.open(start_url) - sso_url = res.geturl() - query = urlparse.parse_qs(urlparse.urlparse(sso_url).query) - query = {k:v[0] for k, v in query.items()} - request_query = { - 'username': self.username, - 'password': self.password, - 'client_id': query['client'], - 'sso': 'true', - 'tenant': 'sso', - '_intstate': 'deprecated', - 'connection': 'DJldap', - } - for cookie in br.cookiejar: - if cookie.name in ('_csrf', 'csrf'): - request_query['_csrf'] = cookie.value - for k in 'scope connection nonce state ui_locales ns protocol redirect_uri'.split(): - if k in query: - request_query[k] = query[k] - login_url = 'https://sso.accounts.dowjones.com/usernamepassword/login' - # you can get the version below from lib-min.js - # search for: str: "x.x.x" - # This might need to be updated in the future - auth0_client = json.dumps({"name": "auth0.js", "version": "7.0.3"}) - if not isinstance(auth0_client, bytes): - auth0_client = auth0_client.encode('utf-8') - auth0_client = standard_b64encode(auth0_client) - if isinstance(auth0_client, bytes): - auth0_client = auth0_client.decode('ascii') - rq = Request(login_url, headers={ - 'Accept': 'text/html', - 'Accept-Language': 'en-US,en;q=0.8', - 'Auth0-Client': auth0_client.rstrip('='), - 'X-HTTP-Method-Override': 'POST', - 'X-Requested-With': 'XMLHttpRequest', - 'X-Remote-User': self.username - }, data=request_query) - self.log('Sending login request...') - try: - res = br.open(rq) - except Exception as err: - if hasattr(err, 'read'): - raise Exception('Login request failed with error: {} and body: {}'.format(err, err.read().decode('utf-8', 'replace'))) - raise - if res.code != 200: - raise ValueError('Failed to login, check your username and password') - br.select_form(nr=0) - self.log('Performing login callback...') - res = br.submit() - self.barrons_itp_page = raw = res.read() - if b'/logout' not in raw: - raise ValueError( - 'Failed to login (callback URL failed), check username and password') + def preprocess_html(self, soup): + for figc in soup.findAll('figcaption'): + figc['class'] = 'figc' + for p in figc.findAll('p'): + p.name = 'div' + for by in soup.findAll(**classes('byline')): + for p in by.findAll('p'): + p.name = 'span' + for h2 in soup.findAll('h2'): + h2.name = 'h4' + for iframe in soup.findAll('amp-iframe'): + wsj = iframe.find('amp-img') + if wsj: + wsj.decompose() + data = re.search(r'datawrapper-chart-(.{5})', iframe['src']) + if data: + iframe.name = 'img' + iframe['src'] = 'https://datawrapper.dwcdn.net/' + data.group(1) + '/full.png' + for amp in soup.findAll('amp-img'): + if not amp.find('img', attrs={'src':True}): + if amp.has_attr('src'): + amp['src'] = amp['src'] + '&pixel_ratio=1.5' + amp.name = 'img' + else: + amp.img['src'] = amp.img['src'] + '&pixel_ratio=1.5' + return soup + + def get_browser(self, *args, **kwargs): + kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [ + ('Referer', 'https://www.google.com/'), + ('X-Forwarded-For', '66.249.66.1') + ] return br def parse_index(self): - soup = self.index_to_soup(self.barrons_itp_page) - articles = [] - for art in soup.findAll('article'): - h = art.find(['h2', 'h3']) - a = h.find('a') - title = self.tag_to_string(a) - url = a['href'] + archive = self.index_to_soup('https://www.barrons.com/magazine?archives=' + date.today().strftime('%Y')) + issue = archive.find(**prefixed_classes('BarronsTheme--archive-box--')) + self.timefmt = ' [' + self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--date--'))) + ']' + self.log(self.timefmt) + self.cover_url = issue.img['src'].split('?')[0] + + ans = defaultdict(list) + + for articles in archive.findAll(**prefixed_classes('BarronsTheme--story--')): + section = 'Magazine' + strap = articles.find_previous_sibling(**prefixed_classes('BarronsTheme--strap--')) + if strap: + label = strap.find(**prefixed_classes('BarronsTheme--label--')) + if label: + section = self.tag_to_string(label).strip() + a = articles.find(**prefixed_classes('BarronsTheme--heading')) + title = self.tag_to_string(a).strip() + url = a.a['href'] desc = '' - p = art.find('p', attrs={'class': lambda x: x and ('_summary_' in x or '_byline_' in x)}) - if p: - desc += self.tag_to_string(p) - articles.append({'title': title, 'url': url, 'description': desc}) - return [('Articles', articles)] + byl = articles.find(**prefixed_classes('BarronsTheme--byline--')) + if byl: + desc += self.tag_to_string(byl) + ttr = articles.find(**prefixed_classes('BarronsTheme--time-to-read--')) + if ttr: + desc += self.tag_to_string(ttr) + summ = articles.find(**prefixed_classes('BarronsTheme--summary--')) + if summ: + desc += ' | ' + self.tag_to_string(summ) + self.log('\t', title, ' ', url, '\n\t', desc) + ans[section].append({'title': title, 'url': url, 'description': desc}) + return [(section, articles) for section, articles in ans.items()] + + def print_version(self, url): + return url.split('?')[0].replace('/articles/', '/amp/articles/') diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe index 4374c75969..297a107792 100644 --- a/recipes/bloomberg-business-week.recipe +++ b/recipes/bloomberg-business-week.recipe @@ -1,8 +1,62 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes -from calibre import browser +from html5_parser import parse import json -import re +import random +import time +def get_contents(x): + if x == '': + return '' + otype = x.get('type', '') + if otype == 'text': + if 'attributes' in x: + if 'strong' in x['attributes']: + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + if 'emphasis' in x['attributes']: + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + elif otype == 'br': + return '
    ' + elif otype == 'paragraph': + return '

    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '

    ' + elif otype == 'heading': + return '

    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '

    ' + elif otype == 'list': + return '
      ' + ''.join(map(get_contents, x.get('content', ''))) + '
    ' + elif otype == 'listItem': + return '
  • ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '
  • ' + elif otype == 'quote': + return '
    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '
    ' + elif otype == 'media': + if x['subType'] == 'photo': + return '
    {} {}
    '.format( + x['data']['photo']['src'], x['data']['photo']['caption'], x['data']['photo']['credit']) + elif x['subType'] == 'chart': + if x['data'] and x['data']['chart']: + return '
    '.format(x['data']['chart']['fallback']) + elif otype == 'link': + if 'data' in x: + if 'href' in x['data']: + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + elif otype == 'entity': + if x['subType'] == 'story': + if x['data'] and x['data']['link'] and x['data']['link']['destination']: + if 'web' in x['data']['link']['destination']: + return '' + x.get('value', '') + ''.join( + map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + elif otype in {'div', 'callout'}: + return '
    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '
    ' + elif not any(x == otype for x in ['', 'ad', 'inline-newsletter', 'tabularData']): + if any(b in x for b in ['value', 'content']): + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + + return '' class Bloomberg(BasicNewsRecipe): title = u'Bloomberg Businessweek' @@ -14,22 +68,32 @@ class Bloomberg(BasicNewsRecipe): ignore_duplicate_articles = {'url'} resolve_internal_links = True masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg' - delay = 2 + description = ( + 'Bloomberg Businessweek helps global leaders stay ahead with insights and in-depth analysis on the people,' + ' companies, events, and trends shaping today\'s complex, global economy.' + ) + + simultaneous_downloads = 1 + extra_css = ''' - #auth {font-size:small; font-weight:bold;} - #time {font-size:small;} - #subhead {font-style:italic; color:#404040;} - .news-figure-caption-text, #cap {font-size:small; text-align:center;} + .auth {font-size:small; font-weight:bold;} + .time, .chart, .css--lede-byline, .css--lede-timestamp {font-size:small;} + .subhead, .cap span {font-style:italic; color:#404040;} + em, .col {color:#202020;} + .cat {font-size:small; color:gray;} + .news-figure-caption-text, .cap, .img, .css--caption-outer-wrapper {font-size:small; text-align:center;} .news-figure-credit {font-size:small; text-align:center; color:#202020;} ''' remove_tags = [ + dict(name=['button', 'svg']), dict(name='div', attrs={'id':['bb-that', 'bb-nav']}), - classes('twitter-logo bb-global-footer') + classes('twitter-logo bb-global-footer __sticky__audio__bar__portal__ css--social-wrapper-outer') ] - def get_browser(self): - br = browser() + def get_browser(self, *a, **kw): + kw['user_agent'] = 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/119.0' + br = BasicNewsRecipe.get_browser(self, *a, **kw) br.set_handle_redirect(False) return br @@ -40,77 +104,87 @@ class Bloomberg(BasicNewsRecipe): self.log('Downloading ', edition) self.cover_url = bw.find('img')['src'].replace('25x19', '600x800') soup = self.index_to_soup(edition) - timefmt = soup.find(**classes('section-front-header-module__title')) - if timefmt: - self.timefmt = ' [' + (self.tag_to_string(timefmt).replace('Issue', '')).strip() + ']' + if timefmt := soup.find(attrs={'class':lambda x: x and x.startswith('styles_MagazineTitle__')}): + self.timefmt = ' [' + (self.tag_to_string(timefmt).replace(' Issue', '')).strip() + ']' feeds = [] - for div in soup.findAll('div', attrs={'class':'story-list-module__info'}): - h3 = div.find('h3', attrs={'class':'story-list-module__title'}) + for div in soup.findAll(attrs={'class':lambda x: x and x.startswith( + ('styles_MagazineFeatures__', 'styles_MagazineStoryList__') + )}): + h3 = div.find(attrs={'class':lambda x: x and x.startswith( + ('styles_featuresTitle__', 'styles_magazineSectionTitle__') + )}) sec = self.tag_to_string(h3) self.log(sec) articles = [] - for art in div.findAll('article'): - a = art.find('a', **classes('story-list-story__info__headline-link')) + for art in div.findAll(attrs={'data-component':'headline'}): + a = art.find('a', href=True) url = a['href'] if url.startswith('http') is False: url = 'https://www.bloomberg.com' + a['href'] title = self.tag_to_string(a) - desc = '' - sum = art.find(**classes('story-list-story__info__summary')) - if sum: - desc = self.tag_to_string(sum).strip() - by = art.find(**classes('story-list-story__info__byline')) - if by: - desc = self.tag_to_string(by).strip() + ' | ' + desc - articles.append({'title': title, 'url': url, 'description': desc}) - self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'url': url}) + self.log('\t', title, '\n\t\t', url) if articles: feeds.append((sec, articles)) return feeds def preprocess_raw_html(self, raw, *a): - m = re.search('data-component-props="ArticleBody">', raw) + root = parse(raw) + m = root.xpath('//script[@data-component-props="ArticleBody"]') if not m: - m = re.search('data-component-props="FeatureBody">', raw) + m = root.xpath('//script[@data-component-props="FeatureBody"]') if not m: - return raw + m2 = root.xpath('//script[@id="__NEXT_DATA__"]') + if not m2: + return raw + if m: + data = json.loads(m[0].text) + data = data['story'] - raw = raw[m.start():] - raw = raw.split('>', 1)[1] - data = json.JSONDecoder().raw_decode(raw)[0] - data = data['story'] + else: + data = json.loads(m2[0].text) + data = data['props']['pageProps']['story'] title = '

    ' + data['headline'] + '

    ' cat = subhead = lede = auth = caption = '' - if 'primaryCategory' in data: - if data['primaryCategory'] is not None: - cat = '

    ' + data['primaryCategory'] + '

    ' + if 'primaryCategory' in data and data['primaryCategory'] is not None: + cat = '

    ' + data['primaryCategory'] + '

    ' - if len(data['abstract']) != 0: - if len(data['abstract']) == 2: - subhead = '

    ' + data['abstract'][0] + '

    ' + data['abstract'][1] + '

    ' + if len(data['abstract']) != 0 and len(data['abstract']) == 2: + subhead = '

    ' + data['abstract'][0] + '

    ' + data['abstract'][1] + '

    ' else: if 'summary' in data: - subhead = '
    ' + data['summary'] + '
    ' + subhead = '

    ' + data['summary'] + '

    ' - if 'byline' in data: - if data['byline'] is not None: - auth = '
    ' + data['byline']\ - + ' | ' + data['publishedAt'][:-14] + '
    ' + if 'byline' in data and data['byline'] is not None: + auth = '
    ' + data['byline']\ + + ' | ' + data['publishedAt'][:-14] + '
    ' - if 'ledeImageUrl' in data: - if data['ledeImageUrl'] is not None: - lede = '

    '.format(data['ledeImageUrl'].replace('\\', '')) + if 'ledeImageUrl' in data and data['ledeImageUrl'] is not None: + lede = '

    '.format(data['ledeImageUrl']) - if data['ledeDescription'] is not None: - caption = '' + data['ledeDescription'] + '' + if 'ledeDescription' in data and data['ledeDescription'] is not None: + caption = '' + data['ledeDescription'] + '' + else: + if 'lede' in data and data['lede'] is not None: + if 'alt' in data['lede'] and data['lede']['alt'] is not None: + caption = '' + data['lede']['alt'] + '' - body = data['body'].replace('\\', '') - html = '' + cat + title + subhead + auth + lede + caption + '

    ' + body - return html + if m: + time.sleep(3) + body = data['body'] + else: + body = '' + body_data = data['body']['content'] + for x in body_data: + body += get_contents(x) + pause = random.choice((5, 6, 7, 8, 9)) + self.log('Delay: ', pause, ' seconds') + time.sleep(pause) + return '' + cat + title + subhead + auth + lede + caption + '
    ' + body + '
    ' def preprocess_html(self, soup): for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}): @@ -124,4 +198,6 @@ class Bloomberg(BasicNewsRecipe): img['src'] = img['data-native-src'] else: img['src'] = '' + for img in soup.findAll('img', attrs={'src':lambda x: x and x.endswith(('-1x-1.jpg', '-1x-1.png'))}): + img['src'] = img['src'].replace('-1x-1', '750x-1') return soup diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index 28210d9baf..e525d3cbb4 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -1,8 +1,63 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre import browser +from html5_parser import parse from calibre.ptempfile import PersistentTemporaryFile import json -import re +import random +import time + +def get_contents(x): + if x == '': + return '' + otype = x.get('type', '') + if otype == 'text': + if 'attributes' in x: + if 'strong' in x['attributes']: + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + if 'emphasis' in x['attributes']: + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + elif otype == 'br': + return '
    ' + elif otype == 'paragraph': + return '

    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '

    ' + elif otype == 'heading': + return '

    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '

    ' + elif otype == 'list': + return '
      ' + ''.join(map(get_contents, x.get('content', ''))) + '
    ' + elif otype == 'listItem': + return '
  • ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '
  • ' + elif otype == 'quote': + return '
    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '
    ' + elif otype == 'media': + if x['subType'] == 'photo': + return '
    {} {}
    '.format( + x['data']['photo']['src'], x['data']['photo']['caption'], x['data']['photo']['credit']) + elif x['subType'] == 'chart': + if x['data'] and x['data']['chart']: + return '
    '.format(x['data']['chart']['fallback']) + elif otype == 'link': + if 'data' in x: + if 'href' in x['data']: + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + elif otype == 'entity': + if x['subType'] == 'story': + if x['data'] and x['data']['link'] and x['data']['link']['destination']: + if 'web' in x['data']['link']['destination']: + return '' + x.get('value', '') + ''.join( + map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + elif otype in {'div', 'callout'}: + return '
    ' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '
    ' + elif not any(x == otype for x in ['', 'ad', 'inline-newsletter', 'tabularData']): + if any(b in x for b in ['value', 'content']): + return '' + x.get('value', '') + ''.join(map(get_contents, x.get('content', ''))) + '' + + return '' class Bloomberg(BasicNewsRecipe): @@ -12,21 +67,25 @@ class Bloomberg(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False remove_attributes = ['style', 'height', 'width'] - ignore_duplicate_articles = {'url'} - resolve_internal_links = True - oldest_article = 2 # days - delay = 1.5 + ignore_duplicate_articles = {'url', 'title'} + masthead_url = 'https://assets.bbhub.io/company/sites/70/2022/09/logoBBGblck.svg' + description = ('Bloomberg delivers business and markets news, data, analysis, and video' + ' to the world, featuring stories from Businessweek and Bloomberg News.') + + simultaneous_downloads = 1 extra_css = ''' - #auth {font-size:small; font-weight:bold;} - #time {font-size:small;} - #subhead {font-style:italic; color:#404040;} - #cat {font-size:small; color:gray;} - .news-figure-caption-text, #cap {font-size:small; text-align:center;} + .auth {font-size:small; font-weight:bold;} + .time, .chart {font-size:small;} + .subhead, .cap span {font-style:italic; color:#404040;} + em, .col {color:#202020;} + .cat {font-size:small; color:gray;} + .news-figure-caption-text, .cap, .img {font-size:small; text-align:center;} .news-figure-credit {font-size:small; text-align:center; color:#202020;} ''' articles_are_obfuscated = True + resolve_internal_links = True def get_obfuscated_article(self, url): br = self.get_browser() @@ -34,61 +93,96 @@ class Bloomberg(BasicNewsRecipe): br.open(url) except Exception as e: url = e.hdrs.get('location') - html = br.open(url).read() + soup = self.index_to_soup(url) + link = soup.find('a', attrs={'href':lambda x: x and x.startswith('https://www.bloomberg.com')}) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', 'podcast' + ] + if any(x in link['href'] for x in skip_sections): + self.abort_article('Aborting Video article') + self.log('Found link: ', link['href']) + html = br.open(link['href']).read() pt = PersistentTemporaryFile('.html') pt.write(html) pt.close() return pt.name - def get_browser(self): - br = browser() + def get_browser(self, *a, **kw): + kw['user_agent'] = 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/119.0' + br = BasicNewsRecipe.get_browser(self, *a, **kw) br.set_handle_redirect(False) return br feeds = [ - ('Articles', 'https://news.google.com/rss/search?q=when:24h+allinurl:bloomberg.com&hl=en-US&gl=US&ceid=US:en'), + ('Features', + 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Ffeatures%2F&hl=en-US&gl=US&ceid=US:en'), + ('Opinion', 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fopinion%2F&hl=en-US&gl=US&ceid=US:en'), + ('Newsletters', + 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Fnewsletters%2F&hl=en-US&gl=US&ceid=US:en'), + ('News', + 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Farticles%2F&hl=en-US&gl=US&ceid=US:en'), + ('Others', 'https://news.google.com/rss/search?q=when:27h+allinurl:https%3A%2F%2Fwww.bloomberg.com&hl=en-US&gl=US&ceid=US:en') ] def preprocess_raw_html(self, raw, *a): - m = re.search('data-component-props="ArticleBody">', raw) + root = parse(raw) + m = root.xpath('//script[@data-component-props="ArticleBody"]') if not m: - m = re.search('data-component-props="FeatureBody">', raw) + m = root.xpath('//script[@data-component-props="FeatureBody"]') + if not m: + m2 = root.xpath('//script[@id="__NEXT_DATA__"]') - raw = raw[m.start():] - raw = raw.split('>', 1)[1] - data = json.JSONDecoder().raw_decode(raw)[0] - data = data['story'] + if m: + data = json.loads(m[0].text) + data = data['story'] - title = '

    ' + data['headline'] + '

    ' + elif m2: + data = json.loads(m2[0].text) + data = data['props']['pageProps']['story'] + + art_url = data['url'] + if not art_url.startswith('http'): + art_url = 'https://www.bloomberg.com' + art_url + + title = '

    '.format(art_url) + data['headline'] + '

    ' cat = subhead = lede = auth = caption = '' - if 'primaryCategory' in data: - if data['primaryCategory'] is not None: - cat = '

    ' + data['primaryCategory'] + '

    ' + if 'primaryCategory' in data and data['primaryCategory'] is not None: + cat = '

    ' + data['primaryCategory'] + '

    ' - if len(data['abstract']) != 0: - if len(data['abstract']) == 2: - subhead = '

    ' + data['abstract'][0] + '

    ' + data['abstract'][1] + '

    ' + if len(data['abstract']) != 0 and len(data['abstract']) == 2: + subhead = '

    ' + data['abstract'][0] + '

    ' + data['abstract'][1] + '

    ' else: if 'summary' in data: - subhead = '
    ' + data['summary'] + '
    ' + subhead = '

    ' + data['summary'] + '

    ' - if 'byline' in data: - if data['byline'] is not None: - auth = '
    ' + data['byline']\ - + ' | ' + data['publishedAt'][:-14] + '
    ' + if 'byline' in data and data['byline'] is not None: + auth = '
    ' + data['byline']\ + + ' | ' + data['publishedAt'][:-14] + '
    ' - if 'ledeImageUrl' in data: - if data['ledeImageUrl'] is not None: - lede = '

    '.format(data['ledeImageUrl'].replace('\\', '')) + if 'ledeImageUrl' in data and data['ledeImageUrl'] is not None: + lede = '

    '.format(data['ledeImageUrl']) - if data['ledeDescription'] is not None: - caption = '' + data['ledeDescription'] + '' + if 'ledeDescription' in data and data['ledeDescription'] is not None: + caption = '' + data['ledeDescription'] + '' + else: + if 'lede' in data and data['lede'] is not None: + if 'alt' in data['lede'] and data['lede']['alt'] is not None: + caption = '' + data['lede']['alt'] + '' - body = data['body'].replace('\\', '') - html = '' + cat + title + subhead + auth + lede + caption + '

    ' + body - return html + if m: + time.sleep(3) + body = data['body'] + elif m2: + body = '' + body_data = data['body']['content'] + for x in body_data: + body += get_contents(x) + pause = random.choice((5, 6, 7, 8, 9)) + self.log('Delay: ', pause, ' seconds') + time.sleep(pause) + return '' + cat + title + subhead + auth + lede + caption + '
    ' + body + '
    ' def preprocess_html(self, soup): for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}): @@ -102,4 +196,12 @@ class Bloomberg(BasicNewsRecipe): img['src'] = img['data-native-src'] else: img['src'] = '' + for img in soup.findAll('img', attrs={'src':lambda x: x and x.endswith(('-1x-1.jpg', '-1x-1.png'))}): + img['src'] = img['src'].replace('-1x-1', '750x-1') return soup + + def populate_article_metadata(self, article, soup, first): + article.url = soup.find('h1')['title'] + article.summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'})) + article.text_summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'})) + article.title = article.title.replace(' - Bloomberg', '') diff --git a/recipes/bookforummagazine.recipe b/recipes/bookforummagazine.recipe new file mode 100644 index 0000000000..cab082a8e3 --- /dev/null +++ b/recipes/bookforummagazine.recipe @@ -0,0 +1,78 @@ +from urllib.parse import urljoin + +from calibre.web.feeds.news import BasicNewsRecipe + +_issue_url = "" + + +class BookforumMagazine(BasicNewsRecipe): + title = "Bookforum" + description = ( + "Bookforum is an American book review magazine devoted to books and " + "the discussion of literature. https://www.bookforum.com/print" + ) + language = "en" + __author__ = "ping" + publication_type = "magazine" + encoding = "utf-8" + remove_javascript = True + no_stylesheets = True + auto_cleanup = False + compress_news_images = True + compress_news_images_auto_size = 8 + + keep_only_tags = [dict(class_="blog-article")] + remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])] + + extra_css = """ + .blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; } + .blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; } + .blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; } + .blog-article__book-info { margin: 1rem 0; } + .article-image-container img, .blog-article__publication-media img { + display: block; max-width: 100%; height: auto; + } + .blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; } + """ + + def preprocess_html(self, soup): + # strip away links that's not needed + for ele in soup.select(".blog-article__header a"): + ele.unwrap() + return soup + + def parse_index(self): + soup = self.index_to_soup( + _issue_url if _issue_url else "https://www.bookforum.com/print" + ) + meta_ele = soup.find("meta", property="og:title") + if meta_ele: + self.timefmt = f' [{meta_ele["content"]}]' + + cover_ele = soup.find("img", class_="toc-issue__cover") + if cover_ele: + self.cover_url = urljoin( + "https://www.bookforum.com", + soup.find("img", class_="toc-issue__cover")["src"], + ) + + articles = {} + for sect_ele in soup.find_all("div", class_="toc-articles__section"): + section_name = self.tag_to_string( + sect_ele.find("a", class_="toc__anchor-links__link") + ) + for article_ele in sect_ele.find_all("article"): + title_ele = article_ele.find("h1") + sub_title_ele = article_ele.find(class_="toc-article__subtitle") + articles.setdefault(section_name, []).append( + { + "title": self.tag_to_string(title_ele), + "url": article_ele.find("a", class_="toc-article__link")[ + "href" + ], + "description": self.tag_to_string(sub_title_ele) + if sub_title_ele + else "", + } + ) + return articles.items() diff --git a/recipes/boston.com.recipe b/recipes/boston.com.recipe index 47f730a967..5291a54d3e 100644 --- a/recipes/boston.com.recipe +++ b/recipes/boston.com.recipe @@ -101,7 +101,10 @@ def parse_section(raw_html): continue title = text(elem['headlines']) description = text(elem.get('description')) - url = absolutize_url(elem['canonical_url']) + try: + url = absolutize_url(elem['canonical_url']) + except KeyError: + continue yield {'title': title, 'url': url, 'description': description, 'date': ' ' + str(date.date())} diff --git a/recipes/boston_globe_print_edition.recipe b/recipes/boston_globe_print_edition.recipe new file mode 100644 index 0000000000..a311f2b781 --- /dev/null +++ b/recipes/boston_globe_print_edition.recipe @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal +from calibre.web.feeds.news import BasicNewsRecipe, classes +from collections import defaultdict +from datetime import date + +def class_as_string(x): + if isinstance(x, (list, tuple)): + x = ' '.join(x) + return x + +def class_startswith(*prefixes): + + def q(x): + if x: + x = class_as_string(x) + for prefix in prefixes: + if x.startswith(prefix): + return True + return False + + return dict(attrs={'class': q}) + +def absolutize_url(url): + if url.startswith("//"): + return "https:" + url + if url.startswith('/'): + url = "https://www.bostonglobe.com" + url + return url + + +class BostonGlobePrint(BasicNewsRecipe): + title = "Boston Globe | Print Edition" + __author__ = 'Kovid Goyal, unkn0wn' + description = 'The Boston Globe - Today\'s Paper' + language = 'en' + + keep_only_tags = [ + class_startswith('headline |', 'subheader |', 'byline |', 'image |', 'lead |', 'body |', 'comic-debug'), + ] + remove_tags = [ + classes('inline-newsletter ad skip-nav article-footer sharebar arc_ad'), + dict(id='continue_button'), + dict(name=['meta', 'link']) + ] + remove_tags_after = dict(attrs={'class': lambda x:x and x.startswith('body |')}) + remove_attributes = ['style', 'height', 'width'] + no_stylesheets = True + scale_news_images = 1600, 1200 + ignore_duplicate_articles = {'url'} + # simultaneous_downloads = 1 + + def image_url_processor(self, baseurl, url): + return absolutize_url(url) + + def get_cover_url(self): + cover = 'https://img.kiosko.net/' + str( + date.today().year + ) + '/' + date.today().strftime('%m') + '/' + date.today( + ).strftime('%d') + '/us/boston_globe.750.jpg' + br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) + try: + br.open(cover) + except: + index = 'https://en.kiosko.net/us/np/boston_globe.html' + soup = self.index_to_soup(index) + for image in soup.findAll('img', src=True): + if image['src'].endswith('750.jpg'): + return 'https:' + image['src'] + self.log("\nCover unavailable") + cover = None + return cover + + def parse_index(self): + + soup = self.index_to_soup('https://www.bostonglobe.com/todays-paper/') + if timefmt := soup.find(**classes('todays-date')): + self.timefmt = ' [' + self.tag_to_string(timefmt) + ']' + + feeds_dict = defaultdict(list) + + for div in soup.findAll('section', attrs={'id':['sp-top-main', 'sp-middle-main']}): + for a in div.findAll('a', href=lambda x: x and x.startswith('/' + str(date.today().year) + '/')): + section = 'Front Page' + if bar := a.findParent(**classes('container')).find_previous_sibling(**classes('title_bar')): + section = self.tag_to_string(bar) + url = absolutize_url(a['href']) + title = self.tag_to_string(a.find('h2')) + desc = '' + if d := a.find(**classes('deck')): + desc = self.tag_to_string(d) + + self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url) + feeds_dict[section].append({"title": title, "url": url, "description": desc}) + return [(section, articles) for section, articles in feeds_dict.items()] + + def preprocess_raw_html(self, raw_html, url): + soup = self.index_to_soup(raw_html) + meta = soup.find(attrs={'name': 'description'}, content=True) + if meta is not None and meta['content'].startswith('Comics: '): + meta = soup.find(property='og:image', content=True) + img_url = 'https://cloudfront-us-east-1.images.arcpublishing.com/bostonglobe/' + meta['content'].split('/')[-1] + title = self.tag_to_string(soup.find('title')) + raw_html = '

    {}

    '.format(title, img_url) + return raw_html + + def preprocess_html(self, soup): + for img in soup.findAll('img'): + fs = img.get('data-src') + if fs: + img['src'] = fs + return soup diff --git a/recipes/bsi_news.recipe b/recipes/bsi_news.recipe index cacd527bcc..49ff39d7ab 100644 --- a/recipes/bsi_news.recipe +++ b/recipes/bsi_news.recipe @@ -1,8 +1,6 @@ #!/usr/bin/env python from __future__ import absolute_import, division, print_function, unicode_literals -from datetime import datetime - from calibre.web.feeds.news import BasicNewsRecipe @@ -22,10 +20,6 @@ class germanyBSI(BasicNewsRecipe): simultaneous_downloads = 10 # description, some Reader show this in titlepage description = u'News from BSI' - # add date to description so for dayly downloads you can find them easier - # ---- can be edit by user - description = description + ' fetched: ' + \ - datetime.now().strftime("%Y-%m-%d") # %H:%M:%S") # Who published the content? publisher = u'Newsfeeds des BSI' # What is the content of? diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe index 2f3522a970..25af5a8b32 100644 --- a/recipes/business_standard.recipe +++ b/recipes/business_standard.recipe @@ -1,58 +1,118 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2012, Darko Miletic ' -''' -www.business-standard.com -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) - +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ptempfile import PersistentTemporaryFile +from html5_parser import parse +from datetime import datetime +import json class BusinessStandard(BasicNewsRecipe): title = 'Business Standard' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = "India's most respected business daily" - oldest_article = 1 - max_articles_per_feed = 20 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - publisher = 'Business Standard Limited' - category = 'news, business, money, india, world' language = 'en_IN' - masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' + no_stylesheets = True + remove_javascript = True + remove_attributes = ['width', 'height', 'float', 'style'] + def get_cover_url(self): soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): return citem['content'] - remove_attributes = ['width', 'height', 'style'] + def get_browser(self): + return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') - keep_only_tags = [ - classes('headline alternativeHeadline full-img article-content__img pubDate'), - dict(name='span', attrs={'class':'p-content'}), - ] - remove_tags = [ - classes('also-read-panel') + ignore_duplicate_articles = {'title', 'url'} + remove_empty_feeds = True + resolve_internal_links = True + max_articles_per_feed = 20 + + extra_css = ''' + img {display:block; margin:0 auto;} + .auth, .cat { font-size:small; color:#202020; } + .cap { font-size:small; text-align:center; } + ''' + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + soup = self.index_to_soup(url) + link = soup.find('a', attrs={'href':lambda x: x and x.startswith('https://www.business-standard.com')}) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/multimedia/', + ] + if any(x in link['href'] for x in skip_sections): + self.abort_article('skipping video links ', link['href']) + self.log('Found ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + feeds = [] + + sections = [ + 'india-news', 'economy', 'opinion', 'markets', 'companies', 'industry', 'finance', 'world-news', + # 'politics', 'cricket', 'sports', 'technology', 'book', 'education', 'specials' ] - feeds = [ - (u'Companies', u'https://www.business-standard.com/rss/companies-101.rss'), - (u'Economy and Policy', u'https://www.business-standard.com/rss/economy-policy-102.rss'), - (u'Finance', u'https://www.business-standard.com/rss/finance-103.rss'), - (u'Beyond Business', u'https://www.business-standard.com/rss/beyond-business-104.rss'), - (u'Opinion', 'https://www.business-standard.com/rss/opinion-105.rss'), - (u'Markets', u'https://www.business-standard.com/rss/markets-106.rss'), - (u'Technology', u'https://www.business-standard.com/rss/technology-108.rss'), - (u'Personal Finance', u'https://www.business-standard.com/rss/pf-114.rss'), - (u'International', u'https://www.business-standard.com/rss/international-116.rss'), - # (u'Today\'s Paper', u'https://www.business-standard.com/rss/todays-paper.rss'), - # for todays paper - subscrition required - ] + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:business-standard.com{}&hl=en-IN&gl=IN&ceid=IN:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + # feeds.append(('Others', a.format(''))) + + def preprocess_raw_html(self, raw, *a): + root = parse(raw) + m = root.xpath('//script[@id="__NEXT_DATA__"]') + + data = json.loads(m[0].text) + + img_url = None + if 'articleImageUrl' in data['props']['pageProps']['articleSchema']: + img_url = data['props']['pageProps']['articleSchema']['articleImageUrl'] + + art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url'] + + data = data['props']['pageProps']['data'] + + title = '

    '.format(art_url) + data['pageTitle'] + '

    ' + + cat = subhead = lede = auth = caption = '' + + if 'defaultArticleCat' in data and data['defaultArticleCat'] is not None: + if 'h1_tag' in data['defaultArticleCat'] and data['defaultArticleCat']['h1_tag'] is not None: + cat = '

    ' + data['defaultArticleCat']['h1_tag'] + '

    ' + + if 'metaDescription' in data and data['metaDescription'] is not None: + subhead = '

    ' + data['metaDescription'] + '

    ' + self.art_desc = data['metaDescription'] + + date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') + + authors = [] + if 'articleMappedMultipleAuthors' in data: + for aut in data['articleMappedMultipleAuthors']: + authors.append(data['articleMappedMultipleAuthors'][str(aut)]) + auth = '

    ' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '

    ' + + if 'featuredImageObj' in data: + if 'url' in data['featuredImageObj']: + if img_url is not None: + lede = '

    '.format(img_url) + else: + lede = '

    '.format(data['featuredImageObj']['url']) + if 'alt_text' in data['featuredImageObj']: + caption = '' + data['featuredImageObj']['alt_text'] + '

    ' + + body = data['htmlContent'] + + return '' + cat + title + subhead + auth + lede + caption + '

    ' + body + '
    ' + + def populate_article_metadata(self, article, soup, first): + article.url = soup.find('h1')['title'] + article.summary = self.tag_to_string(soup.find('h3')) + article.text_summary = self.tag_to_string(soup.find('h3')) + article.title = article.title.replace(' - Business Standard', '') diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe new file mode 100644 index 0000000000..d648ce042f --- /dev/null +++ b/recipes/business_standard_print.recipe @@ -0,0 +1,124 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from html5_parser import parse +from datetime import datetime +import json + +today = datetime.today().strftime('%d-%m-%Y') + +# today = '20-09-2023' + +day, month, year = (int(x) for x in today.split('-')) +dt = datetime(year, month, day) + +class BusinessStandardPrint(BasicNewsRecipe): + title = 'Business Standard Print Edition' + __author__ = 'unkn0wn' + description = "India's most respected business daily, Articles from Today's Paper" + language = 'en_IN' + masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' + encoding = 'utf-8' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + resolve_internal_links = True + remove_empty_feeds = True + + no_stylesheets = True + remove_javascript = True + remove_attributes = ['width', 'height', 'float', 'style'] + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'Business Standard ' + dt.strftime('%b %d, %Y') + + def get_browser(self): + return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + + ignore_duplicate_articles = {'title', 'url'} + remove_empty_feeds = True + resolve_internal_links = True + + extra_css = ''' + img {display:block; margin:0 auto;} + .auth, .cat { font-size:small; color:#202020; } + .cap { font-size:small; text-align:center; } + ''' + + def get_cover_url(self): + soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') + for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + return citem['content'] + + def parse_index(self): + if dt.weekday() == 6: + self.log.warn( + 'Business Standard Does Not Have A Print Publication On Sunday. The Reports' + ' And Columns On This Page Today Appeared In The Newspaper\'s Saturday Edition.' + ) + url = 'https://apibs.business-standard.com/category/today-paper?sortBy=' + today + raw = self.index_to_soup(url, raw=True) + data = json.loads(raw) + data = data['data'] + + feeds = [] + + for section in data: + if section == 'EpaperImage': + continue + self.log(section) + articles = [] + for article in data[section]: + title = article['heading1'] + desc = article['sub_heading'] + url = 'https://www.business-standard.com' + article['article_url'] + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + if articles: + feeds.append((section, articles)) + return feeds + + def preprocess_raw_html(self, raw, *a): + root = parse(raw) + m = root.xpath('//script[@id="__NEXT_DATA__"]') + + data = json.loads(m[0].text) + + img_url = None + if 'articleImageUrl' in data['props']['pageProps']['articleSchema']: + img_url = data['props']['pageProps']['articleSchema']['articleImageUrl'] + + art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url'] + + data = data['props']['pageProps']['data'] + + title = '

    '.format(art_url) + data['pageTitle'] + '

    ' + + cat = subhead = lede = auth = caption = '' + + if 'defaultArticleCat' in data and data['defaultArticleCat'] is not None: + if 'h1_tag' in data['defaultArticleCat'] and data['defaultArticleCat']['h1_tag'] is not None: + cat = '

    ' + data['defaultArticleCat']['h1_tag'] + '

    ' + + if 'metaDescription' in data and data['metaDescription'] is not None: + subhead = '

    ' + data['metaDescription'] + '

    ' + self.art_desc = data['metaDescription'] + + date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') + + authors = [] + if 'articleMappedMultipleAuthors' in data: + for aut in data['articleMappedMultipleAuthors']: + authors.append(data['articleMappedMultipleAuthors'][str(aut)]) + auth = '

    ' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '

    ' + + if 'featuredImageObj' in data: + if 'url' in data['featuredImageObj']: + if img_url is not None: + lede = '

    '.format(img_url) + else: + lede = '

    '.format(data['featuredImageObj']['url']) + if 'alt_text' in data['featuredImageObj']: + caption = '' + data['featuredImageObj']['alt_text'] + '

    ' + + body = data['htmlContent'] + + return '' + cat + title + subhead + auth + lede + caption + '

    ' + body + '
    ' diff --git a/recipes/business_standard_print_edition.recipe b/recipes/business_standard_print_edition.recipe deleted file mode 100644 index 9158a49152..0000000000 --- a/recipes/business_standard_print_edition.recipe +++ /dev/null @@ -1,92 +0,0 @@ -''' -www.business-standard.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe, classes - - -class BusinessStandard(BasicNewsRecipe): - title = 'Business Standard | Print Edition' - __author__ = 'unkn0wn' - description = "India's most respected business daily" - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - publisher = 'Business Standard Limited' - category = 'news, business, money, india, world' - language = 'en_IN' - extra_css = ''' - .article__desc{font-size:small;} - .article_image{font-size:small; font-style:italic;} - .article__dateline{font-size:small;} - .full-img{font-size:small; font-style:italic; text-align:center;} - .pubDate{font-size:small; text-align:center;} - ''' - - masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' - - def get_cover_url(self): - soup = self.index_to_soup( - 'https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/' - ) - for citem in soup.findAll( - 'meta', content=lambda s: s and s.endswith('view/3.jpg') - ): - return citem['content'] - - remove_attributes = ['width', 'height', 'style'] - - keep_only_tags = [ - classes( - 'article__title article__content article_content article_image article__dateline headline' - ' alternativeHeadline full-img article-content__img pubDate' - ), - dict(name='section', attrs={'subscriptions-section': 'content'}), - dict(name='span', attrs={'class': 'p-content'}) - ] - remove_tags = [ - classes('also-read-panel related-keyword more-stories-pagination'), - dict(name='br') - ] - - def parse_index(self): - soup = self.index_to_soup('https://www.business-standard.com/todays-paper') - ans = self.bs_parse_index(soup) - return ans - - def bs_parse_index(self, soup): - feeds = [] - div = soup.find('div', attrs={'class': 'main-cont-left'}) - for section in div.findAll('div', attrs={'class': 'row-inner'}): - h2 = section.find('h2') - secname = self.tag_to_string(h2) - self.log(secname) - articles = [] - for a in section.findAll( - 'a', href=lambda x: x and x.startswith('/article/') - ): - url = a['href'].replace('article', 'article-amp') - url = 'https://wap.business-standard.com' + url - title = self.tag_to_string(a).strip().replace('Premium Content', '') - articles.append({'title': title, 'url': url}) - self.log('\t', title, '\n\t\t', url) - if articles: - feeds.append((secname, articles)) - return feeds - - def preprocess_html(self, soup): - subs = soup.find('section', attrs={'subscriptions-section': 'content'}) - if subs: - art = soup.find(**classes('article_image')) - if art: - art.extract() - div = soup.find(**classes('article_content')) - if div: - div.extract() - h2 = soup.find('h2') - if h2: - h2.name = 'h4' - for img in soup.findAll('amp-img', src=True): - img.name = 'img' - img['src'] = img['src'].replace('\\', '').split('?')[0] - return soup diff --git a/recipes/business_today.recipe b/recipes/business_today.recipe index 88394d5219..44c5705ba3 100644 --- a/recipes/business_today.recipe +++ b/recipes/business_today.recipe @@ -7,6 +7,8 @@ class BT(BasicNewsRecipe): __author__ = 'unkn0wn' no_stylesheets = True use_embedded_content = False + remove_javascript = True + encoding = 'utf-8' remove_attributes = ['style', 'height', 'width'] ignore_duplicate_articles = {'url'} description = ( @@ -16,26 +18,36 @@ class BT(BasicNewsRecipe): masthead_url = 'https://akm-img-a-in.tosshub.com/businesstoday/resource/img/logo.png' keep_only_tags = [ - dict(name='h1'), - dict(name='h2'), - classes('brand-detial-main main-img story-with-main-sec'), + classes('story-heading sab-head-tranlate-sec brand-detial-main main-img field--name-body'), ] + remove_tags = [ + dict(name=['link', 'meta', 'svg', 'button', 'script']), dict(name='a', attrs={'title': 'videos'}), - classes('tranding-topics-main newsltter-iframe hedlineteg') + classes( + 'tranding-topics-main newsltter-iframe hedlineteg stoybday-ad story-recommended-chunk ' + 'banner_content' + ) ] - extra_css = 'a[href^="https://www.businesstoday.in/videos"]{display: none;}' + extra_css = ''' + img {display:block; margin:0 auto;} + em { color:#202020; } + .main-img { font-size:small; text-align:center; } + .summary {font-style:italic; color:#202020; } + ''' def parse_index(self): - soup = self.index_to_soup('https://www.businesstoday.in/magazine') - issue = soup.find(attrs={'class': 'view-id-latest_issue_magzine'}) - a = issue.findAll('a', href=lambda x: x and x.startswith('/magazine/issue/'))[1] + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + soup = self.index_to_soup('https://www.businesstoday.in') + a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1] + self.cover_url = a.img['data-src'].split('?')[0] url = a['href'] self.log('issue =', url) - soup = self.index_to_soup('https://www.businesstoday.in' + url) - tag = soup.find(attrs={'class': 'issue-image'}) - if tag: - self.cover_url = tag.find('img')['src'] + self.timefmt = ' [' + url.split('/')[-1] + ']' + soup = self.index_to_soup(url) section = None sections = {} @@ -62,7 +74,7 @@ class BT(BasicNewsRecipe): # Insert feeds in specified order, if available - feedSort = ['Editors Note'] + feedSort = ['Editor\'s Note'] for i in feedSort: if i in sections: feeds.append((i, sections[i])) @@ -80,6 +92,20 @@ class BT(BasicNewsRecipe): return feeds def preprocess_html(self, soup): + auth = soup.find(**classes('brand-detial-main')) + if auth: + ul = auth.find('ul') + if ul: + ul.decompose() + for vid in soup.findAll('a', attrs={ + 'href': lambda x: x and 'businesstoday.in/videos' in x + }): + vid.decompose() + summ = soup.find(**classes('summary')) + if summ: + h2 = summ.find('h2') + if h2: + h2.name = 'p' for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'].split('?')[0] return soup diff --git a/recipes/calcalist.recipe b/recipes/calcalist.recipe index 271633fc73..4f888b0bd9 100644 --- a/recipes/calcalist.recipe +++ b/recipes/calcalist.recipe @@ -3,8 +3,8 @@ import re class AdvancedUserRecipe1283848012(BasicNewsRecipe): - description = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.' - cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG' + description = 'This is a recipe of Calcalist.co.il' + cover_url = 'https://images1.calcalist.co.il//picserver3/wcm_upload_dev/2022/09/15/Hk9OzwlWi/calcalistlogn.png' title = u'Calcalist' language = 'he' __author__ = 'marbs' @@ -16,39 +16,40 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe): max_articles_per_feed = 100 remove_attributes = ['width'] simultaneous_downloads = 5 - keep_only_tags = dict(name='div', attrs={'id': 'articleContainer'}) + keep_only_tags = [ + dict(name='h1', attrs={'class': 'mainTitle'}), + dict(name='h2', attrs={'class': 'subTitle'}), + dict(name='div', attrs={'class': 'ArticleBodyComponent'}), + ] remove_tags = [dict(name='p', attrs={'text': [' ']})] max_articles_per_feed = 100 preprocess_regexps = [ (re.compile(r'

     

    ', re.DOTALL | re.IGNORECASE), lambda match: '') ] - feeds = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'), - (u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'), - (u'\u05d1\u05d0\u05d6\u05d6', - u'http://www.calcalist.co.il/integration/StoryRss3674.xml'), - (u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', - u'http://www.calcalist.co.il/integration/StoryRss184.xml'), - (u'\u05d4\u05e9\u05d5\u05e7', - u'http://www.calcalist.co.il/integration/StoryRss2.xml'), - (u'\u05d1\u05d0\u05e8\u05e5', - u'http://www.calcalist.co.il/integration/StoryRss14.xml'), - (u'\u05d4\u05db\u05e1\u05e3', - u'http://www.calcalist.co.il/integration/StoryRss9.xml'), - (u'\u05e0\u05d3\u05dc"\u05df', - u'http://www.calcalist.co.il/integration/StoryRss7.xml'), - (u'\u05e2\u05d5\u05dc\u05dd', - u'http://www.calcalist.co.il/integration/StoryRss13.xml'), - (u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', - u'http://www.calcalist.co.il/integration/StoryRss5.xml'), - (u'\u05e4\u05e0\u05d0\u05d9', - u'http://www.calcalist.co.il/integration/StoryRss3.xml'), - (u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', - u'http://www.calcalist.co.il/integration/StoryRss4.xml'), - (u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')] - - def print_version(self, url): - split1 = url.split("-") - print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + \ - split1[1] - return print_url + feeds = [ + (u" דף הבית", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-8,00.xml"), + (u" 24/7", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3674,00.xml"), + (u" באזז", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3673,00.xml"), + (u" משפט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3772,00.xml"), + (u" רכב", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3783,00.xml"), + (u" אחריות וסביבה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3781,00.xml"), + (u" דעות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3791,00.xml"), + (u" תיירות ותעופה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3784,00.xml"), + (u" קריירה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3782,00.xml"), + (u" אחד העם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3768,00.xml"), + (u" המלצות ואזהרות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3771,00.xml"), + (u" הייטק והון סיכון", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3928,00.xml"), + (u" חדשות טכנולוגיה", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3778,00.xml"), + (u" תקשורת", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-4471,00.xml"), + (u" אינטרנט", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3773,00.xml"), + (u" מכשירים וגאדג'טים", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3777,00.xml"), + (u" המדריך", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3880,00.xml"), + (u" אפליקציות", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3998,00.xml"), + (u" Play", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3792,00.xml"), + (u" הכסף", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-9,00.xml"), + (u" עולם", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-13,00.xml"), + (u" פרסום ושיווק", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-5,00.xml"), + (u" פנאי", u"http://www.calcalist.co.il/GeneralRSS/0,16335,L-3,00.xml"), + (u" עסקי ספורט", u"http://WallaNewsw.calcalist.co.il/GeneralRSS/0,16335,L-18,00.xml") + ] diff --git a/recipes/caravan_magazine.recipe b/recipes/caravan_magazine.recipe index 1d5ffab35a..3ed3135d33 100644 --- a/recipes/caravan_magazine.recipe +++ b/recipes/caravan_magazine.recipe @@ -7,6 +7,7 @@ import json from mechanize import Request from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag def classes(classes): @@ -15,10 +16,17 @@ def classes(classes): 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + class CaravanMagazine(BasicNewsRecipe): title = 'Caravan Magazine' - __author__ = 'Kovid Goyal, Gobelinus' + __author__ = 'Kovid Goyal, Gobelinus, unkn0wn' description = 'An Indian Journal of politics and culture' language = 'en_IN' timefmt = ' [%b, %Y]' @@ -27,16 +35,24 @@ class CaravanMagazine(BasicNewsRecipe): no_stylesheets = True - keep_only_tags = [ - classes('post-title short-desc author-details cover'), - dict(itemprop='articleBody'), - ] + remove_attributes = ['style', 'height', 'width'] + ignore_duplicate_articles = {'url'} + resolve_internal_links = True + + extra_css = ''' + blockquote {color:#202020;} + #fig-c {text-align:center; font-size:small;} + em {color:#202020;} + .article-footer {font-size:small;} + .date, .pre-title {font-size:small; color:#404040;} + .authors {font-size:small; font-weight:bold;} + ''' remove_tags = [ + classes('related-articles'), dict(name='meta'), dict(attrs={'class': ['share-with', 'img-wrap abs']}), ] - remove_attributes = ['style'] def get_browser(self, *args, **kw): br = BasicNewsRecipe.get_browser(self, *args, **kw) @@ -68,6 +84,8 @@ class CaravanMagazine(BasicNewsRecipe): def parse_index(self): base_url = 'https://www.caravanmagazine.in/' soup = self.index_to_soup('{0}magazine'.format(base_url)) + if magdate := soup.find('h6', attrs={'class':'magazine-date'}): + self.timefmt = ' [' + self.tag_to_string(magdate).strip() + ']' # find current issue cover feeds = [] @@ -94,10 +112,43 @@ class CaravanMagazine(BasicNewsRecipe): return feeds + def get_cover_url(self): + soup = self.index_to_soup( + 'https://www.readwhere.com/magazine/delhi-press/The-Caravan/5326' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('/magazine/300/new') + ): + return citem['content'].replace('300', '600') + + def print_version(self, url): + if not self.username or not self.password: + return url.replace('.in/','.in/amp/') + return url + def preprocess_html(self, soup): - for div in soup.findAll(itemprop='image'): - for img in div.findAll('img'): - img['src'] = div['content'] - for img in soup.findAll(attrs={'data-src': True}): - img['src'] = img['data-src'] + if not self.username or not self.password: + keep_only_tags = [classes('main-content')] + for fc in soup.findAll('figcaption'): + fc['id'] = 'fig-c' + for img in soup.findAll('amp-img'): + img.name = 'img' + if h6 := soup.find('h6'): + h6.name = 'h4' + else: + keep_only_tags = [ + classes('post-title short-desc author-details cover'), + dict(itemprop='articleBody'), + ] + for div in soup.findAll(itemprop='image'): + for img in div.findAll('img'): + img['src'] = div['content'] + for img in soup.findAll(attrs={'data-src': True}): + img['src'] = img['data-src'] + + body = new_tag(soup, 'body') + for spec in keep_only_tags: + for tag in soup.find('body').findAll(**spec): + body.insert(len(body.contents), tag) + soup.find('body').replaceWith(body) return soup diff --git a/recipes/cbc_canada.recipe b/recipes/cbc_canada.recipe index 763389a259..13a4867afb 100644 --- a/recipes/cbc_canada.recipe +++ b/recipes/cbc_canada.recipe @@ -9,11 +9,12 @@ class AdvancedUserRecipe1384137533(BasicNewsRecipe): max_articles_per_feed = 50 auto_cleanup = True - feeds = [(u'Top Stories', u'http://rss.cbc.ca/lineup/topstories.xml'), - (u'World', u'http://rss.cbc.ca/lineup/world.xml'), - (u'National', u'http://rss.cbc.ca/lineup/canada.xml'), - (u'Toronto', u'http://rss.cbc.ca/lineup/canada-toronto.xml'), - (u'Business', u'http://rss.cbc.ca/lineup/business.xml'), - (u'Politics', u'http://rss.cbc.ca/lineup/politics.xml'), - (u'Sci & Tech', u'http://rss.cbc.ca/lineup/technology.xml'), - (u'Offbeat', u'http://rss.cbc.ca/lineup/offbeat.xml')] + feeds = [(u'Top Stories', u'https://www.cbc.ca/webfeed/rss/rss-topstories'), + (u'World', u'https://www.cbc.ca/webfeed/rss/rss-world'), + (u'Canada', u'https://www.cbc.ca/webfeed/rss/rss-canada'), + (u'Politics', u'https://www.cbc.ca/webfeed/rss/rss-politics'), + (u'Business', u'https://www.cbc.ca/webfeed/rss/rss-business'), + (u'Arts', u'https://www.cbc.ca/webfeed/rss/rss-arts'), + (u'Health', u'https://www.cbc.ca/webfeed/rss/rss-health'), + (u'Technology', u'https://www.cbc.ca/webfeed/rss/rss-technology'), + (u'Indigenous', u'https://www.cbc.ca/webfeed/rss/rss-Indigenous'),] diff --git a/recipes/chowk.recipe b/recipes/chowk.recipe deleted file mode 100644 index eaab142724..0000000000 --- a/recipes/chowk.recipe +++ /dev/null @@ -1,45 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class ChowkRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en_IN' - version = 1 - - title = u'Chowk' - publisher = u'chowk.com' - category = u'Opinion, South Asia' - description = u'Ideas & Identities of South Asia' - - use_embedded_content = False - remove_empty_feeds = True - oldest_article = 30 - max_articles_per_feed = 100 - - remove_javascript = True - encoding = 'utf-8' - - feeds = [] - feeds.append(('Chowk Articles', 'http://www.chowk.com/rss')) - - keep_only_tags = [] - keep_only_tags.append(dict(name='div', attrs={'id': 'content'})) - - conversion_options = {'comments': description, 'tags': category, 'language': 'en', - 'publisher': publisher} - - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif;} - a {text-decoration: none; color: blue;} - div.pgtitle {font-size: x-large; font-weight: bold;} - div.wname, div.date {font-size: x-small; color: #696969;} - div.wname {margin-top: 1em;} - div.date {margin-bottom: 1em;} - div.title {font-weight: bold;} - ''' - - def print_version(self, url): - main, sep, id = url.rpartition('/') - - return main + '/print/' + id diff --git a/recipes/climate_progress.recipe b/recipes/climate_progress.recipe index b66b2ea6a4..f8c8467927 100644 --- a/recipes/climate_progress.recipe +++ b/recipes/climate_progress.recipe @@ -30,7 +30,7 @@ class ClimateProgress(BasicNewsRecipe): encoding = 'utf-8' language = 'en' - lang = 'en-US' + lang = 'en' direction = 'ltr' html2lrf_options = [ diff --git a/recipes/cnn.recipe b/recipes/cnn.recipe index 72af0e0a82..cb6ef666b6 100644 --- a/recipes/cnn.recipe +++ b/recipes/cnn.recipe @@ -4,8 +4,7 @@ __copyright__ = '2008, Kovid Goyal ' Profile to download CNN ''' -import re -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class CNN(BasicNewsRecipe): @@ -18,38 +17,14 @@ class CNN(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False - oldest_article = 15 + oldest_article = 2 ignore_duplicate_articles = {'url'} - # recursions = 1 - # match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html'] max_articles_per_feed = 25 - compress_news_images = True - compress_news_images_auto_size = 12 - - extra_css = ''' - h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} - .cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;} - ''' - - preprocess_regexps = [ - (re.compile(r'', re.DOTALL), lambda m: ''), - (re.compile(r'', re.DOTALL), lambda m: ''), - (re.compile(r'', re.DOTALL), lambda m: ''), - ] - + remove_attributes = ['style', 'height', 'width'] keep_only_tags = [ - dict(id=['body-text', 'storycontent']), - dict(attrs={'class': ['pg-headline', 'metadata']}), - ] - - remove_tags = [ - dict(attrs={'class': lambda x: x and bool({ - 'video__end-slate', 'owl-filmstrip', 'el-embed-instagram', - }.intersection(set(x.split())))}), + classes('headline__wrapper headline__sub-container article__main'), ] + remove_tags = [classes('video-inline_carousel')] feeds = [ ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'), @@ -68,15 +43,6 @@ class CNN(BasicNewsRecipe): ('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss') ] - def preprocess_html(self, soup): - body = soup.find('body') - for h2 in soup.findAll(attrs={'class': 'pg-headline'}): - h2.extract() - body.insert(0, h2) - for img in soup.findAll('img', attrs={'data-src-medium': True}): - img['src'] = img['data-src-medium'] - return soup - def get_article_url(self, article): ans = BasicNewsRecipe.get_article_url(self, article) ans = ans.partition('?')[0] @@ -93,3 +59,8 @@ class CNN(BasicNewsRecipe): self.log("\nCover unavailable") masthead = None return masthead + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'src':lambda x: x and x.endswith('.svg')}): + img.extract() + return soup diff --git a/recipes/cumhuriyet.recipe b/recipes/cumhuriyet.recipe index b0c411d684..6b706c0f07 100644 --- a/recipes/cumhuriyet.recipe +++ b/recipes/cumhuriyet.recipe @@ -1,68 +1,42 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -from __future__ import unicode_literals -# based on a recipe by Darko Miletic -# -# Cumhuriyet Gazetesi'nin köşe yazıları okuyuculara cumhuriyet.com.tr -# adresi üzerinden ücretsiz olarak sunulmaktadır. -# Calibre yazılımıyla kullanılabilen bu reçete Cumhuriyet Gazetesi'nin -# günlük köşe yazılarını hızlıca derleyip e-okuyucunuzda kolayca okunabilir -# hale getirir. Yazıların yayınlanma saati sabah olduğu için reçeteyi -# 7:00-24:00 arasında çizelgelemeniz gerekmektedir. -# -# 2014-02-10: Yenilenmiş Cumhuriyet Gazetesi web sitesine göre değiştirildi. -# 2013-08-28: İlk sürüm - -__license__ = 'GPL v3' -__copyright__ = '2012, Sethi Eksi ' -''' -cumhuriyet.com.tr -''' - from calibre.web.feeds.news import BasicNewsRecipe -class Cumhuriyet_tr(BasicNewsRecipe): - title = 'Cumhuriyet - Yazarlar' - __author__ = 'Cumhuriyet Gazetesi Yazarları' - description = 'Günlük Cumhuriyet Gazetesi Köşe Yazıları' +class Cumhuriyet(BasicNewsRecipe): + title = 'Cumhuriyet' + __author__ = 'ims' + description = 'News from Turkey' + language = 'tr' publisher = 'Cumhuriyet' - category = 'news, politics, Turkey' oldest_article = 1 - max_articles_per_feed = 150 + max_articles_per_feed = 10 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False - masthead_url = 'http://www.cumhuriyet.com.tr/image/template/Cumhuriyet_logo_300x60px.png' - cover_url = 'http://www.cumhuriyet.com.tr/image/template/Cumhuriyet_logo_300x60px.png' - language = 'tr' - extra_css = """ .name {display: block;width:100%;font-size:120%;} - #article-title {display: block;margin-top: 15px;width:100%;font-size:140%;} - #publish-date {display: block;width:100%;font-size:80%;} - """ -# extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} -# .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} -# """ + timefmt = ' [%d %b %Y]' + ignore_duplicate_articles = {'title', 'url'} - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [dict(attrs={'class': 'links'}), dict( - attrs={'id': 'share-bar'}), dict(attrs={'id': 'font-adjust'})] - remove_tags_before = dict(attrs={'id': 'content'}) - remove_tags_after = dict(attrs={'id': 'content'}) - - feeds = [ - (u'Yazarlar', u'http://www.cumhuriyet.com.tr/rss/2') + keep_only_tags = [ + dict(name='h1', attrs={'class': 'baslik'}), + dict(name='h2', attrs={'class': 'spot'}), + dict(name='div', attrs={'class': 'haberKaynagi'}), + dict(name='div', attrs={'class': 'yayin-tarihi'}), + dict(name='div', attrs={'class': 'haberMetni'}), ] -# def print_version(self, url): -# articleid = url.rpartition('hn=')[2] -# return 'http://www.cumhuriyet.com.tr/?hn=' + articleid + remove_tags = [ + dict(name=['button', 'svg']), + dict(name='ul', attrs={'class': 'breadcrumbs'}), + dict(name='div', attrs={'class': 'google-news'}), + dict(name='div', attrs={'class': 'iliskiliHaberler'}), + ] - def get_masthead_title(self): - return self.title + "(" + self.end_date + ")" - - def preprocess_html(self, soup): - return self.adeify_images(soup) + feeds = [ + ('Gundem', 'https://www.cumhuriyet.com.tr/rss/9999'), + ('Dünya', 'https://www.cumhuriyet.com.tr/rss/4'), + ('Türkiye', 'https://www.cumhuriyet.com.tr/rss/3'), + ('Ekonomi', 'https://www.cumhuriyet.com.tr/rss/5'), + ('Kultur Sanat', 'https://www.cumhuriyet.com.tr/rss/6'), + ('Siyaset', 'https://www.cumhuriyet.com.tr/rss/2'), + ('Bilim ve Teknoloji', 'https://www.cumhuriyet.com.tr/rss/10'), + ('Tarım', 'https://www.cumhuriyet.com.tr/rss/19'), + ] diff --git a/recipes/deccan_herald.recipe b/recipes/deccan_herald.recipe new file mode 100644 index 0000000000..bb8d69e3db --- /dev/null +++ b/recipes/deccan_herald.recipe @@ -0,0 +1,65 @@ +from calibre.ptempfile import PersistentTemporaryFile +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class herald(BasicNewsRecipe): + title = 'Deccan Herald' + __author__ = 'unkn0wn' + description = 'Deccan Herald is an Indian English language daily newspaper published from the Indian state of Karnataka.' + language = 'en_IN' + no_stylesheets = True + remove_attributes = ['height', 'width', 'style'] + ignore_duplicate_articles = {'url', 'title'} + encoding = 'utf-8' + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/bengaluru-crime/', '/metrolife/', + '/karnataka-districts/', '/brandspot/', '/entertainment/', + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping section') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + keep_only_tags = [ + classes('article-title article-author__name'), + dict(name='div', attrs={'id':'main-content'}) + + ] + + remove_tags = [ + classes( + 'storyShare social-media-icons in_article_video static_text' + ' nl-optin-mobile dk_only article-banner-adver-wrapper wb_holder' + ' field-name-field-tags section-full strip--business' + ) + ] + + feeds = [ + ('Nation', 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Fnational%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('Karnataka', 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Fstate%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('Opinion', 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Fopinion%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('City', + 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Fcity%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('Business', 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Fbusiness%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('World', + 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Finternational%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('Sports', + 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com%2Fsports%2F&hl=en-IN&gl=IN&ceid=IN:en'), + ('Others', 'https://news.google.com/rss/search?q=when:27h+allinurl:deccanherald.com&hl=en-IN&gl=IN&ceid=IN:en'), + ] \ No newline at end of file diff --git a/recipes/deutsche_welle_bs.recipe b/recipes/deutsche_welle_bs.recipe index 0b852c94c8..65ccd31cb9 100644 --- a/recipes/deutsche_welle_bs.recipe +++ b/recipes/deutsche_welle_bs.recipe @@ -1,73 +1,44 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -dw-world.de -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle_bs(BasicNewsRecipe): title = 'Deutsche Welle' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = 'Vijesti iz Njemacke i svijeta' publisher = 'Deutsche Welle' category = 'news, politics, Germany' - oldest_article = 1 + oldest_article = 2 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True language = 'bs' publication_type = 'newsportal' remove_empty_feeds = True + remove_javascript = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' - extra_css = """ - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - body{font-family: Arial,sans1,sans-serif} - img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} - .caption{font-size: x-small; display: block; margin-bottom: 0.4em} - """ - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - + + ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] + + keep_only_tags = [ + dict(name='article') + ] + remove_tags = [ - dict(name=['iframe', 'embed', 'object', 'form', 'base', 'meta', 'link']), dict( - attrs={'class': 'actionFooter'}) + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') ] - keep_only_tags = [dict(attrs={'class': 'ArticleDetail detail'})] - remove_attributes = ['height', 'width', 'onclick', 'border', 'lang'] - + feeds = [ - - (u'Politika', u'http://rss.dw-world.de/rdf/rss-bos-pol'), - (u'Evropa', u'http://rss.dw-world.de/rdf/rss-bos-eu'), - (u'Kiosk', u'http://rss.dw-world.de/rdf/rss-bos-eu'), - (u'Ekonomija i Nuka', u'http://rss.dw-world.de/rdf/rss-bos-eco'), - (u'Kultura', u'http://rss.dw-world.de/rdf/rss-bos-cul'), - (u'Sport', u'http://rss.dw-world.de/rdf/rss-bos-sp') + (u'Politika', u'http://rss.dw-world.de/rdf/rss-bos-pol'), + (u'Evropa', u'http://rss.dw-world.de/rdf/rss-bos-eu'), + (u'Kiosk', u'http://rss.dw-world.de/rdf/rss-bos-eu'), + (u'Ekonomija i Nuka', u'http://rss.dw-world.de/rdf/rss-bos-eco'), + (u'Kultura', u'http://rss.dw-world.de/rdf/rss-bos-cul'), + (u'Sport', u'http://rss.dw-world.de/rdf/rss-bos-sp') ] - def print_version(self, url): - artl = url.rpartition('/')[2] - return 'http://www.dw-world.de/popups/popup_printcontent/' + artl - def preprocess_html(self, soup): - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - item.name = 'div' - del item['href'] - item['target'] = '' - del item['target'] - else: - str = self.tag_to_string(item) - item.replaceWith(str) + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] return soup diff --git a/recipes/deutsche_welle_de.recipe b/recipes/deutsche_welle_de.recipe index 2d988f7c86..4ee5bd9c67 100644 --- a/recipes/deutsche_welle_de.recipe +++ b/recipes/deutsche_welle_de.recipe @@ -1,21 +1,12 @@ -from calibre.web.feeds.news import BasicNewsRecipe -# History: -# 1: Base Version -# 2: Added rules for wdr.de, ndr.de, br-online.de -# 3: Added rules for rbb-online.de, boerse.ard.de, sportschau.de -# 4: New design of tagesschau.de implemented. Simplified. -# 5: Taken out the pictures. - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle(BasicNewsRecipe): title = 'Deutsche Welle' description = 'Nachrichten der Deutschen Welle (DW)' publisher = 'DW - info@dw.com' language = 'de' - version = 1 - cover_url = 'https://pbs.twimg.com/profile_images/900269457976823808/nkod9w_m_400x400.jpg' - __author__ = 'VoHe' - oldest_article = 3 + __author__ = 'unkn0wn' + oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True remove_javascript = True @@ -23,26 +14,32 @@ class DeutscheWelle(BasicNewsRecipe): remove_javascript = True remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] - remove_tags_before = dict(name='h4', attrs={'class':'artikel'}) - - remove_tags_after = dict(name='div', attrs={'class':'col1 dim'}) + keep_only_tags = [ + dict(name='article') + ] remove_tags = [ - dict(name='div', attrs={'class':'footerSection'}), - dict(name='div', attrs={'class':'sharing-bar'}), - dict(name='div', attrs={'class':'coll dim'}), - dict(name='div', attrs={'class':'languageSection'}), + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') ] + # watch out https://www.dw.com/de/service/rss/s-9773 for description of possible rss feeds feeds = [ - ('Thema des Tages', 'http://rss.dw.com/xml/rss-de-top'), - # ('Nachrichten', 'http://rss.dw.com/xml/rss-de-news'), + ('Nachrichten', 'http://rss.dw.com/xml/rss-de-news'), ('Wissenschaft', 'http://rss.dw.com/xml/rss-de-wissenschaft'), - # ('Sport', 'http://rss.dw.com/xml/rss-de-sport'), + ('Sport', 'http://rss.dw.com/xml/rss-de-sport'), ('Deuschland entdecken', 'http://rss.dw.com/xml/rss-de-deutschlandentdecken'), ('Presse', 'http://rss.dw.com/xml/presse'), ('Politik', 'http://rss.dw.com/xml/rss_de_politik'), ('Wirtschaft', 'http://rss.dw.com/xml/rss-de-eco'), ('Kultur und Leben', 'http://rss.dw.com/xml/rss-de-cul'), + ('Thema des Tages', 'http://rss.dw.com/xml/rss-de-top'), ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] + return soup diff --git a/recipes/deutsche_welle_en.recipe b/recipes/deutsche_welle_en.recipe index 3cde7e7418..faa02a6183 100644 --- a/recipes/deutsche_welle_en.recipe +++ b/recipes/deutsche_welle_en.recipe @@ -1,34 +1,31 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import unicode_literals, division, absolute_import, print_function - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' - -''' -Deutsche Welle (english) - dw.com/en -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle_en(BasicNewsRecipe): title = 'Deutsche Welle' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = 'News from Germany and the world' publisher = 'Deutsche Welle' language = 'en' - oldest_article = 1 + oldest_article = 2 max_articles_per_feed = 50 no_stylesheets = True remove_javascript = True remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} - + remove_attributes = ['height', 'width', 'style'] + + keep_only_tags = [ + dict(name='article') + ] + + remove_tags = [ + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') + ] + feeds = [ - ('Top Stories', 'http://rss.dw-world.de/rdf/rss-en-top'), ('World', 'http://rss.dw.de/rdf/rss-en-world'), ('Germany', 'http://rss.dw.de/rdf/rss-en-ger'), ('Europe', 'http://rss.dw.de/rdf/rss-en-eu'), @@ -36,40 +33,11 @@ class DeutscheWelle_en(BasicNewsRecipe): ('Culture & Lifestyle', 'http://rss.dw.de/rdf/rss-en-cul'), ('Sports', 'http://rss.dw.de/rdf/rss-en-sports'), ('Visit Germany', 'http://rss.dw.de/rdf/rss-en-visitgermany'), - ('Asia', 'http://rss.dw.de/rdf/rss-en-asia') + ('Asia', 'http://rss.dw.de/rdf/rss-en-asia'), + ('Top Stories', 'http://rss.dw-world.de/rdf/rss-en-top'), ] - - keep_only_tags = [ - dict(name='div', attrs={'class': 'col3'}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': 'group'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': 'col1'}), - dict(name='div', attrs={'class': re.compile('gallery')}), - dict(name='div', attrs={'class': re.compile('audio')}), - dict(name='div', attrs={'class': re.compile('video')}) - ] - - remove_attributes = ['height', 'width', - 'onclick', 'border', 'lang', 'link'] - - extra_css = ''' - h1 {font-size: 1.6em; margin-top: 0em} - .artikel {font-size: 1em; text-transform: uppercase; margin: 0em} - ''' - + def preprocess_html(self, soup): - # convert local hyperlinks - for a in soup.findAll('a', href=True): - if a['href'].startswith('/'): - a['href'] = 'http://www.dw.com' + a['href'] - elif a['href'].startswith('#'): - del a['href'] - # remove all style attributes with an effect on font size - for item in soup.findAll(attrs={'style': re.compile('font-size')}): - del item['style'] + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] return soup diff --git a/recipes/deutsche_welle_es.recipe b/recipes/deutsche_welle_es.recipe index 1300fea96d..a036b2a96b 100644 --- a/recipes/deutsche_welle_es.recipe +++ b/recipes/deutsche_welle_es.recipe @@ -1,21 +1,8 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import unicode_literals, division, absolute_import, print_function - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' - -''' -Deutsche Welle (español) - dw.com/es -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle_es(BasicNewsRecipe): title = 'Deutsche Welle' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = 'Noticias desde Alemania y mundo' publisher = 'Deutsche Welle' language = 'es' @@ -27,6 +14,18 @@ class DeutscheWelle_es(BasicNewsRecipe): remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] + + keep_only_tags = [ + dict(name='article') + ] + + remove_tags = [ + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') + ] + feeds = [ ('Titulares', 'http://rss.dw-world.de/rdf/rss-sp-top'), ('Noticias de Alemania', 'http://rss.dw-world.de/rdf/rss-sp-ale'), @@ -40,37 +39,8 @@ class DeutscheWelle_es(BasicNewsRecipe): ('Conozca Alemania', 'http://rss.dw-world.de/rdf/rss-sp-con') ] - keep_only_tags = [ - dict(name='div', attrs={'class': 'col3'}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': 'group'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': 'col1'}), - dict(name='div', attrs={'class': re.compile('gallery')}), - dict(name='div', attrs={'class': re.compile('audio')}), - dict(name='div', attrs={'class': re.compile('video')}) - ] - - remove_attributes = ['height', 'width', - 'onclick', 'border', 'lang', 'link'] - - extra_css = ''' - h1 {font-size: 1.6em; margin-top: 0em} - .artikel {font-size: 1em; text-transform: uppercase; margin: 0em} - ''' - + def preprocess_html(self, soup): - # convert local hyperlinks - for a in soup.findAll('a', href=True): - if a['href'].startswith('/'): - a['href'] = 'http://www.dw.com' + a['href'] - elif a['href'].startswith('#'): - del a['href'] - # remove all style attributes with an effect on font size - for item in soup.findAll(attrs={'style': re.compile('font-size')}): - del item['style'] + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] return soup diff --git a/recipes/deutsche_welle_hr.recipe b/recipes/deutsche_welle_hr.recipe index 906e4a1d39..73264fc635 100644 --- a/recipes/deutsche_welle_hr.recipe +++ b/recipes/deutsche_welle_hr.recipe @@ -1,20 +1,12 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -dw-world.de -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle_hr(BasicNewsRecipe): title = 'Deutsche Welle' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = 'Vesti iz Njemacke i svijeta' publisher = 'Deutsche Welle' category = 'news, politics, Germany' - oldest_article = 1 + oldest_article = 2 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True @@ -22,50 +14,29 @@ class DeutscheWelle_hr(BasicNewsRecipe): publication_type = 'newsportal' remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' - extra_css = """ - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - body{font-family: Arial,sans1,sans-serif} - img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} - .caption{font-size: x-small; display: block; margin-bottom: 0.4em} - """ - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } + remove_javascript = True + + ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] + keep_only_tags = [ + dict(name='article') + ] + remove_tags = [ - dict(name=['iframe', 'embed', 'object', 'form', 'base', 'meta', 'link']), dict( - attrs={'class': 'actionFooter'}) + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') ] - keep_only_tags = [dict(attrs={'class': 'ArticleDetail detail'})] - remove_attributes = ['height', 'width', 'onclick', 'border', 'lang'] - + feeds = [ - - (u'Svijet', u'http://rss.dw-world.de/rdf/rss-cro-svijet'), - (u'Europa', u'http://rss.dw-world.de/rdf/rss-cro-eu'), - (u'Njemacka', u'http://rss.dw-world.de/rdf/rss-cro-ger'), - (u'Vijesti', u'http://rss.dw-world.de/rdf/rss-cro-all') + (u'Svijet', u'http://rss.dw-world.de/rdf/rss-cro-svijet'), + (u'Europa', u'http://rss.dw-world.de/rdf/rss-cro-eu'), + (u'Njemacka', u'http://rss.dw-world.de/rdf/rss-cro-ger'), + (u'Vijesti', u'http://rss.dw-world.de/rdf/rss-cro-all') ] - def print_version(self, url): - artl = url.rpartition('/')[2] - return 'http://www.dw-world.de/popups/popup_printcontent/' + artl - def preprocess_html(self, soup): - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - item.name = 'div' - del item['href'] - item['target'] = '' - del item['target'] - else: - str = self.tag_to_string(item) - item.replaceWith(str) + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] return soup diff --git a/recipes/deutsche_welle_pt.recipe b/recipes/deutsche_welle_pt.recipe index 4b9a9ea9dc..aff42efd0a 100644 --- a/recipes/deutsche_welle_pt.recipe +++ b/recipes/deutsche_welle_pt.recipe @@ -1,19 +1,12 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -dw-world.de -''' - -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle_pt(BasicNewsRecipe): title = 'Deutsche Welle' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = 'Noticias desde Alemania y mundo' publisher = 'Deutsche Welle' category = 'news, politics, Germany' - oldest_article = 1 + oldest_article = 2 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True @@ -21,42 +14,25 @@ class DeutscheWelle_pt(BasicNewsRecipe): publication_type = 'newsportal' remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' - extra_css = """ - body{font-family: Arial,sans-serif} - img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} - .caption{font-size: x-small; display: block; margin-bottom: 0.4em} - """ + + + remove_javascript = True + ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] + + def preprocess_html(self, soup): + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] + return soup - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['iframe', 'embed', 'object', 'form', 'base', 'meta', 'link']), dict( - attrs={'class': 'actionFooter'}) + keep_only_tags = [ + dict(name='article') + ] + + remove_tags = [ + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') ] - keep_only_tags = [dict(attrs={'class': 'ArticleDetail detail'})] - remove_attributes = ['height', 'width', 'onclick', 'border', 'lang'] feeds = [(u'Noticias', u'http://rss.dw-world.de/rdf/rss-br-all')] - - def print_version(self, url): - artl = url.rpartition('/')[2] - return 'http://www.dw-world.de/popups/popup_printcontent/' + artl - - def preprocess_html(self, soup): - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - item.name = 'div' - del item['href'] - item['target'] = '' - del item['target'] - else: - str = self.tag_to_string(item) - item.replaceWith(str) - return soup diff --git a/recipes/deutsche_welle_ru.recipe b/recipes/deutsche_welle_ru.recipe index 0fbbcc6327..ec4e838af0 100644 --- a/recipes/deutsche_welle_ru.recipe +++ b/recipes/deutsche_welle_ru.recipe @@ -1,24 +1,36 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 - -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle(BasicNewsRecipe): title = u'Deutsche Welle \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u043E\u043C' description = u'\u0420\u0443\u0441\u0441\u043A\u0430\u044F \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F Deutsche Welle: \u043D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0438 \u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438\u0437 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0415\u0432\u0440\u043E\u043F\u044B, \u043D\u0435\u043C\u0435\u0446\u043A\u0438\u0439 \u0438 \u0435\u0432\u0440\u043E\u043F\u0435\u0439\u0441\u043A\u0438\u0439 \u0432\u0437\u0433\u043B\u044F\u0434 \u043D\u0430 \u0441\u043E\u0431\u044B\u0442\u0438\u044F \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435, \u043F\u0440\u0430\u043A\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u043E\u0432\u0435\u0442\u044B \u0434\u043B\u044F \u0442\u0443\u0440\u0438\u0441\u0442\u043E\u0432 \u0438 \u0442\u0435\u0445, \u043A\u0442\u043E \u0436\u0435\u043B\u0430\u0435\u0442 \u0443\u0447\u0438\u0442\u044C\u0441\u044F \u0438\u043B\u0438 \u0440\u0430\u0431\u043E\u0442\u0430\u0442\u044C \u0432 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0415\u0432\u0440\u043E\u0441\u043E\u044E\u0437\u0430.' # noqa - __author__ = 'bugmen00t' + __author__ = 'bugmen00t, unkn0wn' publication_type = 'newspaper' - oldest_article = 14 + oldest_article = 2 max_articles_per_feed = 100 language = 'ru' - cover_url = 'https://www.dw.com/cssi/dwlogo-print.gif' - auto_cleanup = False - no_stylesheets = False + # cover_url = 'https://www.dw.com/cssi/dwlogo-print.gif' - remove_tags_before = dict(name='h1') + remove_javascript = True + no_stylesheets = True + remove_empty_feeds = True + ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] + + def preprocess_html(self, soup): + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] + return soup - remove_tags_after = dict(name='div', attrs={'class': 'longText'}) + keep_only_tags = [ + dict(name='article') + ] + + remove_tags = [ + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') + ] feeds = [ ( diff --git a/recipes/deutsche_welle_sr.recipe b/recipes/deutsche_welle_sr.recipe index b9c67e4976..7f1b1717c0 100644 --- a/recipes/deutsche_welle_sr.recipe +++ b/recipes/deutsche_welle_sr.recipe @@ -1,20 +1,12 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -dw-world.de -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class DeutscheWelle_sr(BasicNewsRecipe): title = 'Deutsche Welle' - __author__ = 'Darko Miletic' + __author__ = 'unkn0wn' description = 'Vesti iz Nemacke i sveta' publisher = 'Deutsche Welle' category = 'news, politics, Germany' - oldest_article = 1 + oldest_article = 2 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True @@ -22,55 +14,34 @@ class DeutscheWelle_sr(BasicNewsRecipe): publication_type = 'newsportal' remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' - extra_css = """ - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - body{font-family: Arial,sans1,sans-serif} - img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} - .caption{font-size: x-small; display: block; margin-bottom: 0.4em} - """ - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['iframe', 'embed', 'object', 'form', 'base', 'meta', 'link']), dict( - attrs={'class': 'actionFooter'}) - ] - keep_only_tags = [dict(attrs={'class': 'ArticleDetail detail'})] - remove_attributes = ['height', 'width', 'onclick', 'border', 'lang'] - - feeds = [ - - (u'Politika', u'http://rss.dw-world.de/rdf/rss-ser-pol'), - (u'Srbija', u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'), - (u'Region', u'http://rss.dw-world.de/rdf/rss-ser-pol-region'), - (u'Evropa', u'http://rss.dw-world.de/rdf/rss-ser-pol-eu'), - (u'Nemacka', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), - (u'Svet', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), - (u'Pregled stampe', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), - (u'Nauka Tehnika Medicina', u'http://rss.dw-world.de/rdf/rss-ser-science'), - (u'Kultura', u'feed:http://rss.dw-world.de/rdf/rss-ser-cul') - ] - - def print_version(self, url): - artl = url.rpartition('/')[2] - return 'http://www.dw-world.de/popups/popup_printcontent/' + artl - + remove_javascript = True + ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['height', 'width', 'style'] + def preprocess_html(self, soup): - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - item.name = 'div' - del item['href'] - item['target'] = '' - del item['target'] - else: - str = self.tag_to_string(item) - item.replaceWith(str) + for img in soup.findAll('img', srcset=True): + img['src'] = img['srcset'].split()[6] return soup + + keep_only_tags = [ + dict(name='article') + ] + + remove_tags = [ + dict(name=['footer', 'source']), + dict(attrs={'data-tracking-name':'sharing-icons-inline'}), + classes('kicker advertisement vjs-wrapper') + ] + + feeds = [ + (u'Politika', u'http://rss.dw-world.de/rdf/rss-ser-pol'), + (u'Srbija', u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'), + (u'Region', u'http://rss.dw-world.de/rdf/rss-ser-pol-region'), + (u'Evropa', u'http://rss.dw-world.de/rdf/rss-ser-pol-eu'), + (u'Nemacka', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), + (u'Svet', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), + (u'Pregled stampe', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), + (u'Nauka Tehnika Medicina', u'http://rss.dw-world.de/rdf/rss-ser-science'), + (u'Kultura', u'feed:http://rss.dw-world.de/rdf/rss-ser-cul') + ] + diff --git a/recipes/deutschland_funk.recipe b/recipes/deutschland_funk.recipe new file mode 100644 index 0000000000..aefe587276 --- /dev/null +++ b/recipes/deutschland_funk.recipe @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +#from __future__ import unicode_literals, division, absolute_import, print_function +from calibre.web.feeds.news import BasicNewsRecipe + +__license__ = 'GPL v3' +__copyright__ = '2014, 2023 Armin Geller' + +''' +Fetch Deutschlandfunk & Deutschlandfunk Kultur +''' +## +## Written: 2014-08-29 +## Last Edited: 2023-04-26 +## Version:1.6 +## New RSS source: https://www.deutschlandfunk.de/rss-angebot-102.html + +class AdvancedUserRecipe1432200863(BasicNewsRecipe): + + title = 'Deutschlandfunk & Deutschlandfunk Kultur' + __author__ = 'Armin Geller' + publisher = 'Deutschlandfunk' + category = 'Radio, News, Politics, Social, Culture, Nature, Environmental' + timefmt = ' [%a, %d %b %Y]' + language = 'de' + encoding = 'UTF-8' + publication_type = 'News feed' + oldest_article = 2 + max_articles_per_feed = 100 + auto_cleanup = False + + extra_css = ''' + h1, h2 {font-size: 1.6em; text-align: left} + .article-header-description {font-size: 1em; font-style: italic; font-weight: normal;margin-bottom: 1em} + .b-image-figure, .caption-figure.is-left, .b-image-credits {font-size: .75em; font-weight: normal;margin-bottom: .75em} + ''' + + + + feeds = [ + ('DLF Nachrichten', 'https://www.deutschlandfunk.de/nachrichten-100.rss'), + ('DLF Politikportal', 'https://www.deutschlandfunk.de/politikportal-100.rss'), + ('DLF Wirtschaft', 'https://www.deutschlandfunk.de/wirtschaft-106.rss'), + ('DLF Wissen', 'https://www.deutschlandfunk.de/wissen-106.rss'), + ('DLF Kulturportal', 'https://www.deutschlandfunk.de/kulturportal-100.rss'), + ('DLF Europa', 'https://www.deutschlandfunk.de/europa-112.rss'), + ('DLF Gesellschaft', 'https://www.deutschlandfunk.de/gesellschaft-106.rss'), + ('DLF Sportportal', 'https://www.deutschlandfunk.de/sportportal-100.rss'), + ('DLF-Kultur Politik', 'https://www.deutschlandfunkkultur.de/politik-114.rss'), + ('DLF-Kultur Bücher', 'https://www.deutschlandfunkkultur.de/buecher-108.rss'), + ('DLF-Kultur Musikportal', 'https://www.deutschlandfunkkultur.de/musikportal-100.rss'), + ('DLF-Kultur Wissenschaft', 'https://www.deutschlandfunkkultur.de/wissenschaft-108.rss'), + ('DLF-Kultur Meinung / Debatte', 'https://www.deutschlandfunkkultur.de/meinung-debatte-100.rss'), + ('DLF-Kultur Umwelt', 'https://www.deutschlandfunkkultur.de/umwelt-104.rss'), + ('DLF-Kultur Philosophie', 'https://www.deutschlandfunkkultur.de/philosophie-104.rss'), + ('DLF-Kultur Psychologie', 'https://www.deutschlandfunkkultur.de/psychologie-100.rss'), + ('DLF-Kultur Geschichte', 'https://www.deutschlandfunkkultur.de/geschichte-136.rss'), + ('DLF-Kultur Leben', 'https://www.deutschlandfunkkultur.de/leben-108.rss'), + ('DLF-Kultur Bühne', 'https://www.deutschlandfunkkultur.de/buehne-100.rss'), + ('DLF-Kultur Film / Serie', 'https://www.deutschlandfunkkultur.de/film-serie-100.rss'), + ] + keep_only_tags = [ + dict(name='nav', attrs={'class':'b-breadcrumbs'}), # DLF articles + dict(name='article', attrs={'class':'b-article'}), # DLF articles + dict(name='div', attrs={'class':[ + 'b-section-article-head-area', + 'b-section-editor-content', + ]}), # DLF Kultur articles + ] + + remove_tags = [ + dict(name='div', attrs={'class':[ + # 'article-header-actions', + 'b-article-extended-emphasis is-teaser-list u-space-bottom-xl', + 'article-extended-emphasis-teaser-group', + 'b-embed-opt-in js-embed-opt-in', + ]}), # DLF articles + dict(name='ul', attrs={'class':['b-social-icons']}), # DLF articles + + dict(name='ul', attrs={'class':['b-social-icons']}), # DLF Kultur articles + dict(name='div', attrs={'class':'b-footer-area-series'}), # DLF Kultur articles + dict(name='div', attrs={'id':'weekender'}) + ] diff --git a/recipes/dr_dk.recipe b/recipes/dr_dk.recipe index 2ba55a2e36..a9cd6e6bb6 100644 --- a/recipes/dr_dk.recipe +++ b/recipes/dr_dk.recipe @@ -1,58 +1,131 @@ #!/usr/bin/env python # vim:fileencoding=utf-8 -from __future__ import unicode_literals, division, absolute_import, print_function +# License: GPLv3 Copyright: 2023, Joel Davies + from calibre.web.feeds.news import BasicNewsRecipe -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -DR.dk -''' - - class DRNyheder(BasicNewsRecipe): - title = 'DR Nyheder' - __author__ = 'Darko Miletic' - publisher = 'DR Nyheder' - description = 'Her finder du nyheder fra DR og alle vores TV og Radio kanaler live og on demand - når du har lyst.' - category = 'news, politics, money, culture, sport, science, Denmark' - oldest_article = 2 - max_articles_per_feed = 50 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'da' - auto_cleanup = False + + # Feeds are found here: https://www.dr.dk/nyheder/dr-nyheder-som-rss-feed + feeds = [ + ('Seneste nyt', 'https://www.dr.dk/nyheder/service/feeds/senestenyt'), + ('Indland', 'https://www.dr.dk/nyheder/service/feeds/indland'), + ('Udland', 'https://www.dr.dk/nyheder/service/feeds/udland'), + ('Penge', 'https://www.dr.dk/nyheder/service/feeds/penge'), + ('Politik', 'https://www.dr.dk/nyheder/service/feeds/politik'), + #('Sporten', 'https://www.dr.dk/nyheder/service/feeds/sporten'), + #('Seneste sport', 'https://www.dr.dk/nyheder/service/feeds/senestesport'), + ('Viden', 'https://www.dr.dk/nyheder/service/feeds/viden'), + ('Kultur', 'https://www.dr.dk/nyheder/service/feeds/kultur'), + ('Musik', 'https://www.dr.dk/nyheder/service/feeds/musik'), + ('Mit Liv', 'https://www.dr.dk/nyheder/service/feeds/mitliv'), + ('Mad', 'https://www.dr.dk/nyheder/service/feeds/mad'), + ('Vejret', 'https://www.dr.dk/nyheder/service/feeds/vejret'), + ('Regionale', 'https://www.dr.dk/nyheder/service/feeds/regionale'), + ('DR Hovedstadsområdet', 'https://www.dr.dk/nyheder/service/feeds/regionale/kbh'), + ('DR Bornholm', 'https://www.dr.dk/nyheder/service/feeds/regionale/bornholm'), + ('DR Syd og Sønderjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/syd'), + ('DR Fyn', 'https://www.dr.dk/nyheder/service/feeds/regionale/fyn'), + ('DR Midt- og Vestjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/vest'), + ('DR Nordjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/nord'), + ('DR Trekantområdet', 'https://www.dr.dk/nyheder/service/feeds/regionale/trekanten'), + ('DR Sjælland', 'https://www.dr.dk/nyheder/service/feeds/regionale/sjaelland'), + ('DR Østjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/oestjylland') + ] + + title = 'DR Nyheder' + __author__ = 'Joel Davies' + publisher = 'DR Nyheder' + description = 'Her finder du nyheder fra DR.' + category = 'news, politics, money, culture, sport, science, Denmark' + publication_type = 'newspaper' + encoding = 'utf8' + language = 'da' + oldest_article = 4 # 2 might be best + max_articles_per_feed = 50 # 100 better, this is just for testing + no_stylesheets = True + use_embedded_content = False + auto_cleanup = False + remove_empty_feeds = True + ignore_duplicate_articles = {'title', 'url'} + simultaneous_downloads = 20 + compress_news_images = True + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/18/DR_logo.svg/1024px-DR_logo.svg.png' + + extra_css = ''' + .dre-byline__contributions { + margin-bottom: 10px; + } + + .dre-byline__contributions div { + display: inline; + } + + .dre-byline__contribution + .dre-byline__contribution:before { + display: inline; + content: ", "; + } + + .dre-standard-article__figure { + margin-bottom: 30px; + text-align: center; + } + + .dre-picture { + margin-bottom: 10px; + } + + .dre-picture__image { + max-width: 100%; + height: auto; + } + + .dre-standard-article__figure-caption { + font-size: .85em; + color: #575757; + } + ''' + + # Skip articles with /stories/ URL as these are Instagram story-style interactive pieces that play videos + # Also DRTV as these are just links to the live TV channel + def preprocess_raw_html(self, raw_html, url): + if '/stories/' in url or '/drtv/' in url: + self.abort_article('Skipping unsupported article type') + return raw_html + + # Generate cover from the first image on the dr.dk homepage + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup('https://www.dr.dk/') + main_content = soup.find('ul', attrs={'class': 'dre-grid-layout'}) + cover_item = main_content.find('img') + if cover_item: + cover_url = cover_item['src'] + return cover_url + keep_only_tags = [ - dict(name="h1", attrs={'id': 'access-content'}), - dict(name="p", attrs={'class': 'summary'}), - dict(name="span", attrs={'itemprop': 'datePublished'}), - dict(name="div", attrs={'class': 'wcms-article-content'}), + + dict(name="h1", attrs={'class': 'dre-article-title__heading'}), # Title + dict(name="div", attrs={'class': 'dre-article-byline'}), # Author + dict(name="figure", attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images + dict(name="p", attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article + dict(name="article", attrs={'itemtype': 'http://schema.org/NewsArticle'}), + #dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}), + #dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}), + #dict(name="div", attrs={'class': 'dre-speech'}), + #dict(name="div", attrs={'itemprop': 'author'}) ] remove_tags = [ - dict(name='menu', attrs={'class': 'share'}), - dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}), - ] - - # Feed are found here: http://www.dr.dk/nyheder/dr-nyheder-som-rss-feed - feeds = [ - ('Indland', 'http://www.dr.dk/nyheder/service/feeds/indland'), - ('Udland', 'http://www.dr.dk/nyheder/service/feeds/udland'), - ('Penge', 'http://www.dr.dk/nyheder/service/feeds/penge'), - ('Politik', 'http://www.dr.dk/nyheder/service/feeds/politik'), - ('Kultur', 'http://www.dr.dk/nyheder/service/feeds/kultur'), - ('Sporten', 'http://www.dr.dk/nyheder/service/feeds/sporten'), - ('Viden', 'http://www.dr.dk/nyheder/service/feeds/viden'), - ('Lev Nu', 'http://www.dr.dk/nyheder/service/feeds/levnu'), - ('DR Hovedstadsområdet', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/kbh/'), - ('DR Bornholm', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/bornholm/'), - ('DR Syd og Sønderjylland', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/syd/'), - ('DR Fyn', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/fyn/'), - ('DR Nordjylland', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/nord/'), - ('DR Trekantområdet', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/trekanten/'), - ('DR Sjælland', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/sjaelland/'), - ('DR Østjylland', 'http://www.dr.dk/Nyheder/Service/feeds/regionale/oestjylland/'), + dict(name='ol', attrs={'class': 'hydra-latest-news-page__list'}), + dict(name='div', attrs={'class': [ + 'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container', + 'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}), + dict(name="source"), + #dict(name='menu', attrs={'class': 'share'}), + #dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}), ] + # Fixes images having the wrong aspect ratio + remove_attributes = ['width', 'height'] diff --git a/recipes/dunyahalleri.recipe b/recipes/dunyahalleri.recipe index 2cb4ad86f9..8d7df9ba4d 100644 --- a/recipes/dunyahalleri.recipe +++ b/recipes/dunyahalleri.recipe @@ -10,6 +10,7 @@ from shutil import copyfile from calibre import strftime from calibre.ebooks.BeautifulSoup import Tag from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.utils.resources import get_path from PIL import Image, ImageDraw, ImageFont __license__ = 'GPL v3' @@ -168,7 +169,7 @@ class DunyaHalleri(BasicNewsRecipe): self.cover_img_path = None def draw_text(self, draw, text, text_size, top): - font_path = P('fonts/liberation/LiberationSerif-Bold.ttf') + font_path = get_path('fonts/liberation/LiberationSerif-Bold.ttf') font = ImageFont.truetype(font_path, text_size) width, height = draw.textsize(text, font=font) left = max(int((self.COVER_WIDTH - width) / 2.), 0) diff --git a/recipes/dunyahalleri_haftaninozeti.recipe b/recipes/dunyahalleri_haftaninozeti.recipe index 808976910c..ffee94b494 100644 --- a/recipes/dunyahalleri_haftaninozeti.recipe +++ b/recipes/dunyahalleri_haftaninozeti.recipe @@ -10,6 +10,7 @@ from shutil import copyfile from contextlib import closing from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.utils.resources import get_path from PIL import Image, ImageDraw, ImageFont __license__ = 'GPL v3' @@ -232,7 +233,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): self.cover_img_path = None def draw_text(self, draw, text, text_size, top): - font_path = P('fonts/liberation/LiberationSerif-Bold.ttf') + font_path = get_path('fonts/liberation/LiberationSerif-Bold.ttf') font = ImageFont.truetype(font_path, text_size) width, height = draw.textsize(text, font=font) left = max(int((self.COVER_WIDTH - width) / 2.), 0) @@ -262,7 +263,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): logo = Image.open(logo_file, 'r') width, height = logo.size logo = logo.resize( - (self.COVER_WIDTH, (self.COVER_WIDTH * height / width)), Image.ANTIALIAS) + (self.COVER_WIDTH, (self.COVER_WIDTH * height / width)), Image.Resampling.LANCZOS) width, height = logo.size left = max(int((self.COVER_WIDTH - width) / 2.), 0) top = max(int((self.COVER_HEIGHT - height) / 2.), 0) diff --git a/recipes/dw_de.recipe b/recipes/dw_de.recipe deleted file mode 100644 index 6e3abc2b45..0000000000 --- a/recipes/dw_de.recipe +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1398527969(BasicNewsRecipe): - title = u'DW-Deutsch XXL' - language = 'de_DE' - __author__ = 'xav' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - no_stylesheets = True - use_embedded_content = False - remove_javascript = True - feeds = [(u'Nachrichten', u'http://rss.dw.com/xml/rss-de-news'), - (u'Themen des Tages', u'http://rss.dw.com/xml/rss-de-top'), - (u'Langsam gesprochene Nachrichten', - u'https://rss.dw.com/rdf/DKfeed_lgn_de'), - (u'Wissenschaft', u'https://rss.dw.com/xml/rss-de-wissenschaft'), - (u'Wirtschaft', u'https://rss.dw.com/xml/rss-de-eco'), - (u'Wort der Woche', - u'https://rss.dw.com/xml/DKpodcast_wortderwoche_de'), - (u'Deutschland entdecken', - u'http://rss.dw.com/xml/rss-de-deutschlandentdecken')] - - def print_version(self, url): - target = url.rpartition('/')[2] - print_url = 'https://www.dw-world.de/popups/popup_printcontent/' + target - return print_url diff --git a/recipes/echo_moskvy.recipe b/recipes/echo_moskvy.recipe index 5b208aa289..866fb95d90 100644 --- a/recipes/echo_moskvy.recipe +++ b/recipes/echo_moskvy.recipe @@ -1,26 +1,34 @@ -# vim:fileencoding=utf-8 +from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe +class EchoMsk(BasicNewsRecipe): + title = '\u042D\u0425\u041E' + __author__ = 'bugmen00t' + description = ('\u042D\u0425\u041E - \u043A\u0430\u043A \u043D\u0430 \u0441\u0442\u0430\u0440\u043E\u043C' + ' \u0434\u043E\u0431\u0440\u043E\u043C \u0440\u0430\u0434\u0438\u043E') + publisher = 'Radio Echo GmbH' + category = 'news' + cover_url = u'https://echofm.online/logo.png' + language = 'ru' + no_stylesheets = True + remove_javascript = False + auto_cleanup = False + oldest_article = 7 + max_articles_per_feed = 50 -class AdjectiveSpecies(BasicNewsRecipe): - title = u'Эхо Москвы' - __author__ = 'bug_me_not' - cover_url = u'http://echo.msk.ru/i/logo.png' - description = 'Радиостанция Эхо Москвы' - publisher = 'Эхо Москвы' - category = 'news' - language = 'ru' - no_stylesheets = True - remove_javascript = True - oldest_article = 300 - max_articles_per_feed = 100 + remove_tags_before = dict(name='article') - remove_tags_before = dict(name='div', attrs={'class': 'topic'}) - remove_tags_after = dict(name='div', attrs={'class': 'typical'}) - remove_tags = [dict(name='div', attrs={'class': 'addInNetBlock'}), - dict(name='div', attrs={'class': 'flash'})] + remove_tags_after = dict(name='article') + + remove_tags = [ + dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}), + dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}), + dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}) + ] feeds = [ - (u'Интервью и передачи', u'http://echo.msk.ru/interview/rss-fulltext.xml'), - (u'Блоги', u'http://echo.msk.ru/blog/rss.xml') + ('\u0413\u043B\u0430\u0432\u043D\u043E\u0435', 'https://echofm.online/feed'), + ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://echofm.online/news/feed'), + ('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://echofm.online/opinions/feed'), + ('\u0414\u043E\u043A\u0443\u043C\u0435\u043D\u0442\u044B', 'https://echofm.online/documents/feed') ] diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 43063f2c4b..cff4313a3c 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -1,11 +1,6 @@ #!/usr/bin/env python # License: GPLv3 Copyright: 2008, Kovid Goyal -try: - from http.cookiejar import Cookie -except ImportError: - from cookielib import Cookie - import json from html5_parser import parse from lxml import etree @@ -126,6 +121,7 @@ class Economist(BasicNewsRecipe): title = 'The Economist' language = 'en' encoding = 'utf-8' + masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' __author__ = "Kovid Goyal" description = ( @@ -156,6 +152,7 @@ class Economist(BasicNewsRecipe): font-size: smaller; color: red; } + img {display:block; margin:0 auto;} ''' oldest_article = 7.0 resolve_internal_links = True @@ -198,36 +195,20 @@ class Economist(BasicNewsRecipe): self.web2disk_options.compress_news_images_auto_size = 5 self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - # Add a cookie indicating we have accepted Economist's cookie - # policy (needed when running from some European countries) - ck = Cookie( - version=0, - name='notice_preferences', - value='2:', - port=None, - port_specified=False, - domain='.economist.com', - domain_specified=False, - domain_initial_dot=True, - path='/', - path_specified=False, - secure=False, - expires=None, - discard=False, - comment=None, - comment_url=None, - rest={'HttpOnly': None}, - rfc2109=False - ) - br.cookiejar.set_cookie(ck) - br.set_handle_gzip(True) + def get_browser(self, *args, **kwargs): + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br def preprocess_raw_html(self, raw, url): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root = parse(raw) + if '/interactive/' in url: + return '

    ' + root.xpath('//h1')[0].text + '

    ' \ + + 'This article is supposed to be read in a browser' \ + + '
    ' script = root.xpath('//script[@id="__NEXT_DATA__"]') if script: try: @@ -251,6 +232,11 @@ class Economist(BasicNewsRecipe): x.text = x.text.upper() x.tag = 'span' x.set('style', 'font-variant: small-caps') + for x in root.xpath('//figcaption'): + x.set('style', 'text-align:center; font-size:small;') + for x in root.xpath('//cite'): + x.tag = 'blockquote' + x.set('style', 'color:#404040;') raw = etree.tostring(root, encoding='unicode') return raw @@ -269,6 +255,7 @@ class Economist(BasicNewsRecipe): self.timefmt = ' [' + edition_date + ']' else: url = 'https://www.economist.com/printedition' + # raw = open('/t/raw.html').read() raw = self.index_to_soup(url, raw=True) # with open('/t/raw.html', 'wb') as f: # f.write(raw) @@ -293,18 +280,21 @@ class Economist(BasicNewsRecipe): script_tag = soup.find("script", id="__NEXT_DATA__") if script_tag is not None: data = json.loads(script_tag.string) + # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) + # self.title = 'The Economist | ' + safe_dict(data, "props", "pageProps", "content", "image", "main", "headline") + self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "datePublishedString") + ']' self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical") self.log('Got cover:', self.cover_url) feeds_dict = defaultdict(list) for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"): section = safe_dict(part, "print", "section", "headline") or '' - title = safe_dict(part, "print", "headline") or '' + title = safe_dict(part, "headline") or '' url = safe_dict(part, "url", "canonical") or '' if not section or not title or not url: continue - desc = safe_dict(part, "print", "description") or '' - sub = safe_dict(part, "print", "subheadline") or '' + desc = safe_dict(part, "description") or '' + sub = safe_dict(part, "subheadline") or '' if sub and section != sub: desc = sub + ' :: ' + desc feeds_dict[section].append({"title": title, "url": url, "description": desc}) diff --git a/recipes/economist_espresso.recipe b/recipes/economist_espresso.recipe new file mode 100644 index 0000000000..997e8fb81d --- /dev/null +++ b/recipes/economist_espresso.recipe @@ -0,0 +1,73 @@ +''' +https://www.economist.com/the-world-in-brief +''' + +from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.ebooks.BeautifulSoup import Tag + + +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + +class Espresso(BasicNewsRecipe): + title = 'The Economist Espresso' + language = 'en' + __author__ = 'unkn0wn' + description = ( + 'Espresso is a rich, full-flavoured shot of daily global analysis' + ' from the editors of The Economist to get you up to speed, fast.' + 'Maximise your understanding of the most significant business, ' + 'economic, political and cultural developments globally.' + ) + cover_url = 'https://downloadr2.apkmirror.com/wp-content/uploads/2021/10/75/615777cc6611b.png' + no_stylesheets = True + remove_attributes = ['height', 'width', 'style'] + use_embedded_content = False + masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' + + extra_css = ''' + h1 { text-align:center; } + ._main-image, ._description, .sub { text-align:center; font-size:small; } + ._quote-container { font-size:x-large; font-style:italic; color:#202020; } + ''' + + keep_only_tags = [ + dict(name='main', attrs={'id':'content'}) + ] + + remove_tags = [ + classes('_podcast-promo _newsletter-promo-container _time-last-updated') + ] + + def parse_index(self): + return [ + ('Espresso', + [ + { + 'title': 'The World in Brief', + 'url': 'https://www.economist.com/the-world-in-brief', + 'description': 'Catch up quickly on the global stories that matter' + }, + ] + ), + ] + + def preprocess_html(self, soup): + if h1 := soup.find('h1'): + if p := h1.find_next_sibling('p'): + p['class'] = 'sub' + for hr in soup.findAll(attrs={'class':['_gobbet', '_article']}): + nt = new_tag(soup, 'hr') + hr.append(nt) + return soup + + def get_browser(self, *args, **kwargs): + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] + return br diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 43063f2c4b..cff4313a3c 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -1,11 +1,6 @@ #!/usr/bin/env python # License: GPLv3 Copyright: 2008, Kovid Goyal -try: - from http.cookiejar import Cookie -except ImportError: - from cookielib import Cookie - import json from html5_parser import parse from lxml import etree @@ -126,6 +121,7 @@ class Economist(BasicNewsRecipe): title = 'The Economist' language = 'en' encoding = 'utf-8' + masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' __author__ = "Kovid Goyal" description = ( @@ -156,6 +152,7 @@ class Economist(BasicNewsRecipe): font-size: smaller; color: red; } + img {display:block; margin:0 auto;} ''' oldest_article = 7.0 resolve_internal_links = True @@ -198,36 +195,20 @@ class Economist(BasicNewsRecipe): self.web2disk_options.compress_news_images_auto_size = 5 self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - # Add a cookie indicating we have accepted Economist's cookie - # policy (needed when running from some European countries) - ck = Cookie( - version=0, - name='notice_preferences', - value='2:', - port=None, - port_specified=False, - domain='.economist.com', - domain_specified=False, - domain_initial_dot=True, - path='/', - path_specified=False, - secure=False, - expires=None, - discard=False, - comment=None, - comment_url=None, - rest={'HttpOnly': None}, - rfc2109=False - ) - br.cookiejar.set_cookie(ck) - br.set_handle_gzip(True) + def get_browser(self, *args, **kwargs): + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br def preprocess_raw_html(self, raw, url): # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) root = parse(raw) + if '/interactive/' in url: + return '

    ' + root.xpath('//h1')[0].text + '

    ' \ + + 'This article is supposed to be read in a browser' \ + + '
    ' script = root.xpath('//script[@id="__NEXT_DATA__"]') if script: try: @@ -251,6 +232,11 @@ class Economist(BasicNewsRecipe): x.text = x.text.upper() x.tag = 'span' x.set('style', 'font-variant: small-caps') + for x in root.xpath('//figcaption'): + x.set('style', 'text-align:center; font-size:small;') + for x in root.xpath('//cite'): + x.tag = 'blockquote' + x.set('style', 'color:#404040;') raw = etree.tostring(root, encoding='unicode') return raw @@ -269,6 +255,7 @@ class Economist(BasicNewsRecipe): self.timefmt = ' [' + edition_date + ']' else: url = 'https://www.economist.com/printedition' + # raw = open('/t/raw.html').read() raw = self.index_to_soup(url, raw=True) # with open('/t/raw.html', 'wb') as f: # f.write(raw) @@ -293,18 +280,21 @@ class Economist(BasicNewsRecipe): script_tag = soup.find("script", id="__NEXT_DATA__") if script_tag is not None: data = json.loads(script_tag.string) + # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) + # self.title = 'The Economist | ' + safe_dict(data, "props", "pageProps", "content", "image", "main", "headline") + self.timefmt = ' [' + safe_dict(data, "props", "pageProps", "content", "datePublishedString") + ']' self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical") self.log('Got cover:', self.cover_url) feeds_dict = defaultdict(list) for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"): section = safe_dict(part, "print", "section", "headline") or '' - title = safe_dict(part, "print", "headline") or '' + title = safe_dict(part, "headline") or '' url = safe_dict(part, "url", "canonical") or '' if not section or not title or not url: continue - desc = safe_dict(part, "print", "description") or '' - sub = safe_dict(part, "print", "subheadline") or '' + desc = safe_dict(part, "description") or '' + sub = safe_dict(part, "subheadline") or '' if sub and section != sub: desc = sub + ' :: ' + desc feeds_dict[section].append({"title": title, "url": url, "description": desc}) diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe new file mode 100644 index 0000000000..3e7ce68cdb --- /dev/null +++ b/recipes/economist_world_ahead.recipe @@ -0,0 +1,320 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2008, Kovid Goyal + +import json +from html5_parser import parse +from lxml import etree + +from calibre import replace_entities +from calibre.ebooks.BeautifulSoup import NavigableString, Tag +from calibre.web.feeds.news import BasicNewsRecipe + + +def E(parent, name, text='', **attrs): + ans = parent.makeelement(name, **attrs) + ans.text = text + parent.append(ans) + return ans + + +def process_node(node, html_parent): + ntype = node.get('type') + if ntype == 'tag': + c = html_parent.makeelement(node['name']) + c.attrib.update({k: v or '' for k, v in node.get('attribs', {}).items()}) + html_parent.append(c) + for nc in node.get('children', ()): + process_node(nc, c) + elif ntype == 'text': + text = node.get('data') + if text: + text = replace_entities(text) + if len(html_parent): + t = html_parent[-1] + t.tail = (t.tail or '') + text + else: + html_parent.text = (html_parent.text or '') + text + + +def safe_dict(data, *names): + ans = data + for x in names: + ans = ans.get(x) or {} + return ans + + +class JSONHasNoContent(ValueError): + pass + + +def load_article_from_json(raw, root): + # open('/t/raw.json', 'w').write(raw) + try: + data = json.loads(raw)['props']['pageProps']['content'] + except KeyError as e: + raise JSONHasNoContent(e) + if isinstance(data, list): + data = data[0] + body = root.xpath('//body')[0] + for child in tuple(body): + body.remove(child) + article = E(body, 'article') + E(article, 'h4', data['subheadline'], style='color: red; margin: 0') + E(article, 'h1', data['headline'], style='font-size: x-large') + E(article, 'div', data['description'], style='font-style: italic; color: #202020;') + E(article, 'div', (data['datePublishedString'] or '') + ' | ' + (data['dateline'] or ''), style='color: gray; margin: 1em') + main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical') + if main_image_url: + div = E(article, 'div') + try: + E(div, 'img', src=main_image_url) + except Exception: + pass + for node in data.get('text') or (): + process_node(node, article) + + +def cleanup_html_article(root): + main = root.xpath('//main')[0] + body = root.xpath('//body')[0] + for child in tuple(body): + body.remove(child) + body.append(main) + main.set('id', '') + main.tag = 'article' + for x in root.xpath('//*[@style]'): + x.set('style', '') + for x in root.xpath('//button'): + x.getparent().remove(x) + + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + +def new_tag(soup, name, attrs=()): + impl = getattr(soup, 'new_tag', None) + if impl is not None: + return impl(name, attrs=dict(attrs)) + return Tag(soup, name, attrs=attrs or None) + + +class NoArticles(Exception): + pass + + +def process_url(url): + if url.startswith('/'): + url = 'https://www.economist.com' + url + return url + + +class Economist(BasicNewsRecipe): + + title = 'The Economist World Ahead' + language = 'en' + encoding = 'utf-8' + + __author__ = "Kovid Goyal" + description = ( + 'The World Ahead is The Economist’s future-gazing publication. It prepares audiences for what is to ' + 'come with mind-stretching insights and expert analysis—all in The Economist’s clear, elegant style.' + ' Best downloaded in late November.' + ) + extra_css = ''' + .headline {font-size: x-large;} + h2 { font-size: small; } + h1 { font-size: medium; } + em.Bold {font-weight:bold;font-style:normal;} + em.Italic {font-style:italic;} + p.xhead {font-weight:bold;} + .pullquote { + float: right; + font-size: larger; + font-weight: bold; + font-style: italic; + page-break-inside:avoid; + border-bottom: 3px solid black; + border-top: 3px solid black; + width: 228px; + margin: 0px 0px 10px 15px; + padding: 7px 0px 9px; + } + .flytitle-and-title__flytitle { + display: block; + font-size: smaller; + color: red; + } + img {display:block; margin:0 auto;} + ''' + oldest_article = 7.0 + resolve_internal_links = True + remove_tags = [ + dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), + dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={ + 'class': [ + 'dblClkTrk', 'ec-article-info', 'share_inline_header', + 'related-items', 'main-content-container', 'ec-topic-widget', + 'teaser', 'blog-post__bottom-panel-bottom', 'blog-post__comments-label', + 'blog-post__foot-note', 'blog-post__sharebar', 'blog-post__bottom-panel', + 'newsletter-form','share-links-header','teaser--wrapped', 'latest-updates-panel__container', + 'latest-updates-panel__article-link','blog-post__section' + ] + } + ), + dict(attrs={ + 'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}), + classes( + 'share-links-header teaser--wrapped latest-updates-panel__container' + ' latest-updates-panel__article-link blog-post__section newsletter-form blog-post__bottom-panel' + ) + ] + keep_only_tags = [dict(name='article', id=lambda x: not x)] + no_stylesheets = True + remove_attributes = ['data-reactid', 'width', 'height'] + # economist.com has started throttling after about 60% of the total has + # downloaded with connection reset by peer (104) errors. + delay = 1 + + needs_subscription = False + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + # Reduce image sizes to get file size below amazon's email + # sending threshold + self.web2disk_options.compress_news_images = True + self.web2disk_options.compress_news_images_auto_size = 5 + self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') + + + def get_browser(self, *args, **kwargs): + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] + return br + + def preprocess_raw_html(self, raw, url): + # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) + root = parse(raw) + if '/interactive/' in url: + return '

    ' + root.xpath('//h1')[0].text + '

    ' \ + + 'This article is supposed to be read in a browser' \ + + '
    ' + script = root.xpath('//script[@id="__NEXT_DATA__"]') + if script: + try: + load_article_from_json(script[0].text, root) + except JSONHasNoContent: + cleanup_html_article(root) + for div in root.xpath('//div[@class="lazy-image"]'): + noscript = list(div.iter('noscript')) + if noscript and noscript[0].text: + img = list(parse(noscript[0].text).iter('img')) + if img: + p = noscript[0].getparent() + idx = p.index(noscript[0]) + p.insert(idx, p.makeelement('img', src=img[0].get('src'))) + p.remove(noscript[0]) + for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'): + x.getparent().remove(x) + # the economist uses for small caps with a custom font + for x in root.xpath('//small'): + if x.text and len(x) == 0: + x.text = x.text.upper() + x.tag = 'span' + x.set('style', 'font-variant: small-caps') + for x in root.xpath('//figcaption'): + x.set('style', 'text-align:center; font-size:small;') + for x in root.xpath('//cite'): + x.tag = 'blockquote' + x.set('style', 'color:#404040;') + raw = etree.tostring(root, encoding='unicode') + return raw + + def parse_index(self): + # return [('Articles', [{'title':'test', + # 'url':'https://www.economist.com/interactive/briefing/2022/06/11/huge-foundation-models-are-turbo-charging-ai-progress' + # }])] + url = 'https://www.economist.com/the-world-ahead' + # raw = open('/t/raw.html').read() + raw = self.index_to_soup(url, raw=True) + # with open('/t/raw.html', 'wb') as f: + # f.write(raw) + soup = self.index_to_soup(raw) + # nav = soup.find(attrs={'class':'navigation__wrapper'}) + # if nav is not None: + # a = nav.find('a', href=lambda x: x and '/printedition/' in x) + # if a is not None: + # self.log('Following nav link to current edition', a['href']) + # soup = self.index_to_soup(process_url(a['href'])) + ans = self.economist_parse_index(soup) + if not ans: + raise NoArticles( + 'Could not find any articles, either the ' + 'economist.com server is having trouble and you should ' + 'try later or the website format has changed and the ' + 'recipe needs to be updated.' + ) + return ans + + def economist_parse_index(self, soup): + script_tag = soup.find("script", id="__NEXT_DATA__") + if script_tag is not None: + data = json.loads(script_tag.string) + # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) + self.title = safe_dict(data, "props", "pageProps", "content", "headline") + # self.cover_url = 'https://mma.prnewswire.com/media/2275620/The_Economist_The_World_Ahead_2024.jpg?w=600' + + feeds = [] + + for coll in safe_dict(data, "props", "pageProps", "content", "collections"): + section = safe_dict(coll, "headline") or '' + self.log(section) + articles = [] + for part in safe_dict(coll, "hasPart", "parts"): + title = safe_dict(part, "headline") or '' + url = safe_dict(part, "url", "canonical") or '' + if not title or not url: + continue + desc = safe_dict(part, "description") or '' + sub = safe_dict(part, "subheadline") or '' + if sub: + desc = sub + ' :: ' + desc + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + if articles: + feeds.append((section, articles)) + return feeds + + def eco_find_image_tables(self, soup): + for x in soup.findAll('table', align=['right', 'center']): + if len(x.findAll('font')) in (1, 2) and len(x.findAll('img')) == 1: + yield x + + def postprocess_html(self, soup, first): + for img in soup.findAll('img', srcset=True): + del img['srcset'] + for table in list(self.eco_find_image_tables(soup)): + caption = table.find('font') + img = table.find('img') + div = new_tag(soup, 'div') + div['style'] = 'text-align:left;font-size:70%' + ns = NavigableString(self.tag_to_string(caption)) + div.insert(0, ns) + div.insert(1, new_tag(soup, 'br')) + del img['width'] + del img['height'] + img.extract() + div.insert(2, img) + table.replaceWith(div) + return soup + + def canonicalize_internal_url(self, url, is_link=True): + if url.endswith('/print'): + url = url.rpartition('/')[0] + return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link) diff --git a/recipes/eenadu_ap.recipe b/recipes/eenadu_ap.recipe index c204b4e71c..657ccb4ed0 100644 --- a/recipes/eenadu_ap.recipe +++ b/recipes/eenadu_ap.recipe @@ -47,7 +47,7 @@ class eenadu_ap(BasicNewsRecipe): date.today().year ) + '/' + date.today().strftime('%m') + '/' + date.today( ).strftime('%d') + '/in/eenadu.750.jpg' - br = BasicNewsRecipe.get_browser(self) + br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) try: br.open(cover) except: diff --git a/recipes/el_confidencial.recipe b/recipes/el_confidencial.recipe new file mode 100644 index 0000000000..3b5b2865cb --- /dev/null +++ b/recipes/el_confidencial.recipe @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +__license__ = 'GPL v3' +__author__ = 'hmeza' +__description__ = 'El Confidencial - El diario de los lectores influyentes' +__version__ = 'v1.00.000' +__date__ = '27, April 2023' +''' +https://www.elconfidencial.com/ +''' + + +from calibre.web.feeds.news import BasicNewsRecipe + + +class ElConfidencial(BasicNewsRecipe): + title = u'El Confidencial' + oldest_article = 7.0 + __author__ = 'hmeza' + description = 'El Confidencial - El diario de los lectores influyentes' + timefmt = ' [%d %b, %Y]' + publication_type = 'newspaper' + language = 'es' + encoding = 'utf-8' + no_stylesheets = True + remove_javascript = True + + feeds = [ + (u'ACTUALIDAD - España', 'https://rss.elconfidencial.com/espana/'), + (u'ACTUALIDAD - Mundo', 'https://rss.elconfidencial.com/mundo/'), + (u'ACTUALIDAD - Comunicación', 'https://rss.elconfidencial.com/comunicacion/'), + (u'ACTUALIDAD - Sociedad', 'https://rss.elconfidencial.com/sociedad/'), + (u'OPINIÓN - A.casado', 'https://rss.blogs.elconfidencial.com/espana/al-grano/'), + (u'OPINIÓN - J.a.zarzalejos', 'https://rss.blogs.elconfidencial.com/espana/notebook/'), + (u'OPINIÓN - C.sánchez', 'https://rss.blogs.elconfidencial.com/espana/mientras-tanto/'), + (u'OPINIÓN - El confidente', 'https://rss.blogs.elconfidencial.com/espana/el-confidente/'), + (u'COTIZALIA - Mercados', 'https://rss.elconfidencial.com/mercados/'), + (u'COTIZALIA - Economía', 'https://rss.elconfidencial.com/economia/'), + (u'COTIZALIA - Empresas', 'https://rss.elconfidencial.com/empresas/'), + (u'COTIZALIA - Finanzas personales', 'https://rss.elconfidencial.com/mercados/finanzas-personales/'), + (u'COTIZALIA - Vivienda', 'https://rss.elconfidencial.com/vivienda'), + (u'COTIZALIA - Fondos de inversión', 'https://rss.elconfidencial.com/mercados/fondos-de-inversion/'), + (u'TEKNAUTAS - Aplicaciones', 'https://rss.elconfidencial.com/tags/temas/apps-9337/'), + (u'TEKNAUTAS - Emprendedores', 'https://rss.elconfidencial.com/tags/economia/emprendedores-4800/'), + (u'TEKNAUTAS - Gadgets', 'https://rss.elconfidencial.com/tags/temas/gadgets-9340/'), + (u'TEKNAUTAS - Hardware', 'https://rss.elconfidencial.com/tags/temas/hardware-9341/'), + (u'TEKNAUTAS - Internet', 'https://rss.elconfidencial.com/tags/temas/internet-9342/'), + (u'TEKNAUTAS - Móviles', 'https://rss.elconfidencial.com/tags/otros/moviles-8601/'), + (u'TEKNAUTAS - Redes sociales', 'https://rss.elconfidencial.com/tags/temas/redes-sociales-9344/'), + (u'DEPORTES - Fútbol', 'https://rss.elconfidencial.com/deportes/futbol/'), + (u'DEPORTES - Baloncesto', 'https://rss.elconfidencial.com/deportes/baloncesto/'), + (u'DEPORTES - Fórmula 1', 'https://rss.elconfidencial.com/deportes/formula-1/'), + (u'DEPORTES - Motociclismo', 'https://rss.elconfidencial.com/deportes/motociclismo/'), + (u'DEPORTES - Tenis', 'https://rss.elconfidencial.com/deportes/tenis/'), + (u'DEPORTES - Ciclismo', 'https://rss.elconfidencial.com/deportes/ciclismo/'), + (u'DEPORTES - Golf', 'https://rss.elconfidencial.com/deportes/golf/'), + (u'DEPORTES - Otros deportes', 'https://rss.elconfidencial.com/deportes/otros-deportes/'), + (u'ACV - Alimentación', 'https://rss.elconfidencial.com/tags/otros/alimentacion-5601/'), + (u'ACV - Bienestar', 'https://rss.elconfidencial.com/tags/temas/bienestar-9331/'), + (u'ACV - Educación', 'https://rss.elconfidencial.com/tags/temas/educacion-9332/'), + (u'ACV - Psicología', 'https://rss.elconfidencial.com/tags/temas/psicologia-9333/'), + (u'ACV - Salud', 'https://rss.elconfidencial.com/tags/otros/salud-6110/'), + (u'ACV - Sexualidad', 'https://rss.elconfidencial.com/tags/temas/sexualidad-6986/'), + (u'ACV - Trabajo', 'https://rss.elconfidencial.com/tags/economia/trabajo-5284/'), + (u'CULTURA - Libros', 'https://rss.elconfidencial.com/tags/otros/libros-5344/'), + (u'CULTURA - Arte', 'https://rss.elconfidencial.com/tags/otros/arte-6092/'), + (u'CULTURA - Cine', 'https://rss.elconfidencial.com/tags/otros/cine-7354/'), + (u'CULTURA - Música', 'https://rss.elconfidencial.com/tags/otros/musica-5272/'), + (u'VANITATIS - Actualidad', 'https://rss.vanitatis.elconfidencial.com/noticias/'), + (u'VANITATIS - Tendencias', 'https://rss.vanitatis.elconfidencial.com/estilo/'), + (u'VANITATIS - Televisión', 'https://rss.vanitatis.elconfidencial.com/television/'), + (u'VANITATIS - Casas reales', 'https://rss.vanitatis.elconfidencial.com/casas-reales/'), + (u'VANITATIS - Blogs', 'https://rss.blogs.vanitatis.elconfidencial.com/'), + (u'ALIMENTE - Nutrición', 'https://rss.alimente.elconfidencial.com/nutricion/'), + (u'ALIMENTE - Consumo', 'https://rss.alimente.elconfidencial.com/consumo/'), + (u'ALIMENTE - Gastronomía', 'https://rss.alimente.elconfidencial.com/gastronomia-y-cocina/'), + (u'ALIMENTE - Bienestar', 'https://rss.alimente.elconfidencial.com/bienestar/'), + (u'ALIMENTE - Recetas', 'https://rss.alimente.elconfidencial.com/recetas/'), + (u'GENTLEMAN - Gentlemanía', 'https://rss.gentleman.elconfidencial.com/gentlemania/'), + (u'GENTLEMAN - Nombres propios', 'https://rss.gentleman.elconfidencial.com/personajes/'), + (u'GENTLEMAN - Style', 'https://rss.gentleman.elconfidencial.com/estilo-hombre/'), + (u'GENTLEMAN - Gourmet', 'https://rss.gentleman.elconfidencial.com/gourmet/'), + (u'GENTLEMAN - Relojes', 'https://rss.gentleman.elconfidencial.com/relojes/') + ] diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe index 3854eb5074..7af992a5c6 100644 --- a/recipes/el_correo.recipe +++ b/recipes/el_correo.recipe @@ -130,7 +130,7 @@ class elcorreo(BasicNewsRecipe): # Controlamos si el artículo ha sido incluido en otro feed para eliminarlo - if not (link in self._processed_links): + if link not in self._processed_links: self._processed_links.append(link) else: link = None diff --git a/recipes/el_diplo.recipe b/recipes/el_diplo.recipe index d7e6ee9bb6..c9c44e26f0 100644 --- a/recipes/el_diplo.recipe +++ b/recipes/el_diplo.recipe @@ -1,102 +1,147 @@ # -*- mode: python; coding: utf-8; -*- # vim: set syntax=python fileencoding=utf-8 -__license__ = 'GPL v3' -__copyright__ = '2021, Darko Miletic ' +__license__ = "GPL v3" +__copyright__ = "2023, Tomás Di Domenico " -''' +""" www.eldiplo.org -''' +""" from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ptempfile import PersistentTemporaryFile -class ElDiplo2020(BasicNewsRecipe): - title = 'Le Monde Diplomatique - cono sur' - __author__ = 'Darko Miletic' - description = 'Publicación de Le Monde Diplomatique para el cono sur.' - publisher = 'Le Monde Diplomatique' - category = 'news, politics, Argentina, Uruguay, Paraguay, South America, World' - oldest_article = 31 - no_stylesheets = True - encoding = 'utf8' +class ElDiplo2023(BasicNewsRecipe): + title = "Le Monde Diplomatique - cono sur" + __author__ = "Tomás Di Domenico" + description = "Publicación de Le Monde Diplomatique para el cono sur." + publisher = "Capital Intelectual" + category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World" + oldest_article = 31 + no_stylesheets = True + encoding = "utf8" use_embedded_content = False - language = 'es_AR' - remove_empty_feeds = True - publication_type = 'magazine' - auto_cleanup = True - auto_cleanup_keep = '//div[contains(@class, "autor")] | //div[@class="edicion"]' - delay = 1 + language = "es_AR" + remove_empty_feeds = True + publication_type = "magazine" + delay = 1 simultaneous_downloads = 1 - timeout = 8 - needs_subscription = 'optional' - ignore_duplicate_articles = {'url'} - articles_are_obfuscated = True - temp_files = [] - fetch_retries = 10 + timeout = 8 + needs_subscription = True + ignore_duplicate_articles = {"url"} + temp_files = [] + fetch_retries = 10 handle_gzip = True compress_news_images = True scale_news_images_to_device = True - masthead_url = 'https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png' - INDEX = 'https://www.eldiplo.org/' + masthead_url = ( + "https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png" + ) + INDEX = "https://www.eldiplo.org/" - extra_css = """ - body{font-family: "GT Super", serif} - .autor{font-family: Inter, serif} + conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category} + + keep_only_tags = [dict(name=["article"])] + + remove_tags = [dict(name=["button"])] + + extra_css = """ + .entry-title { + text-align: center; + } + .text-right { + text-align: right; + } + .bajada { + display: block; + font-family: sans-serif; + text-align: center; + font-size: 110%; + padding: 2%; + } + .Destacado{ + display: block; + font-size: 120%; + font-weight: bold; + font-style: italic; + padding-left: 10%; + padding-right: 10%; + } """ - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX) if self.username is not None and self.password is not None: - br.select_form(id='loginform') - br['log'] = self.username - br['pwd'] = self.password + br.select_form(id="loginform") + br["log"] = self.username + br["pwd"] = self.password br.submit() return br - def parse_index(self): - articles = [] - soup = self.index_to_soup(self.INDEX) - mylink = soup.find('span', text='Sumario') - if mylink is None: - return None - indexurl = "https://www.eldiplo.org" + mylink.parent['href'] - self.log(indexurl) - parts = indexurl.split('www.eldiplo.org/', 1) - series = parts[1].split('-', 1)[0] - self.conversion_options.update({'series' : self.title}) - self.conversion_options.update({'series_index' : series}) - soupindex = self.index_to_soup(indexurl) - totalfeeds = [] - articles = [] - for article in soupindex.findAll('a', href=True, attrs={'class':'title'}): - url = article['href'] - title = self.tag_to_string(article) - articles.append({'title': title, 'url': url, 'description': '', 'date': ''}) - self.log('title: ', title, ' url: ', url) - totalfeeds.append(('Articles',articles)) - return totalfeeds + def get_cover_url(self): + soup_index = self.index_to_soup(self.INDEX) + tag_sumario = soup_index.find("span", text="Sumario") + url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] - def get_obfuscated_article(self, url): - result = None - count = 0 - while (count < self.fetch_retries): - try: - response = self.browser.open(url, timeout=self.timeout) - html = response.read() - count = self.fetch_retries - tfile = PersistentTemporaryFile('_fa.html') - tfile.write(html) - tfile.close() - self.temp_files.append(tfile) - result = tfile.name - except: - self.info("Retrying download...") - count += 1 - return result + soup = self.index_to_soup(url_sumario) + + container = soup.find("div", class_="px-16") + url = container.find("img")["src"] + + return getattr(self, "cover_url", url) + + def _process_article(self, article): + url = article.find("a", href=True, attrs={"class": "title"})["href"] + title = self.tag_to_string(article).replace("Editorial", "Editorial: ") + try: + title, authors = title.split(", por") + authors = f"por {authors}" + except ValueError: + authors = "" + self.log("title: ", title, " url: ", url) + return {"title": title, "url": url, "description": authors, "date": ""} + + def preprocess_html(self, soup): + font_size = "90%" + + # make the footnotes smaller + for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False): + p["style"] = f"font-size: {font_size};" + + return soup + + def parse_index(self): + soup_index = self.index_to_soup(self.INDEX) + + tag_sumario = soup_index.find("span", text="Sumario") + + if tag_sumario is None: + return None + + url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] + self.log(url_sumario) + + soup_sumario = self.index_to_soup(url_sumario) + + feeds = [] + articles = [] + dossiers = [] + + sumario = soup_sumario.find("div", class_="sumario") + + for section in sumario.find_all("div", recursive=False): + classes = section.attrs["class"] + + if "dossier" in classes: + dtitle = self.tag_to_string(section.find("h3")) + darticles = [] + for article in section.find_all("div", recursive=False): + darticles.append(self._process_article(article)) + dossiers.append((dtitle, darticles)) + else: + articles.append(self._process_article(section)) + feeds.append(("Artículos", articles)) + feeds += dossiers + + return feeds diff --git a/recipes/el_economista.recipe b/recipes/el_economista.recipe new file mode 100644 index 0000000000..97d11712cb --- /dev/null +++ b/recipes/el_economista.recipe @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +__license__ = 'GPL v3' +__author__ = 'hmeza' +__description__ = 'El Economista - El diario de los lectores influyentes' +__version__ = 'v1.00.000' +__date__ = '27, April 2023' +''' +https://www.elconfidencial.com/ +''' + + +from calibre.web.feeds.news import BasicNewsRecipe + + +class ElConfidencial(BasicNewsRecipe): + title = u'elEconomista.es' + oldest_article = 7.0 + __author__ = 'hmeza' + description = 'elEconomista.es' + timefmt = ' [%d %b, %Y]' + publication_type = 'newspaper' + language = 'es' + encoding = 'utf-8' + no_stylesheets = True + remove_javascript = True + + feeds = [ + (u'elEconomista economía', u'https://www.eleconomista.es/rss/rss-economia.php'), + (u'elEconomista gestion', u'https://www.eleconomista.es/rss/rss-gestion.php'), + (u'elEconomista tecnología', u'https://www.eleconomista.es/rss/rss-category.php?category=tecnologia'), + (u'Selección elEconomista', u'https://www.eleconomista.es/rss/rss-seleccion-ee.php'), + (u'Mercados', u'https://www.eleconomista.es/rss/rss-mercados.php'), + ] diff --git a/recipes/el_pais.recipe b/recipes/el_pais.recipe index c3195a2a8f..0bf6fb5ce3 100644 --- a/recipes/el_pais.recipe +++ b/recipes/el_pais.recipe @@ -1,8 +1,8 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal' +__author__ = 'Alvaro Beiro, improving Jordi Balcells work based on an earlier version by Lorenzo Vigentini & Kovid Goyal' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -description = 'Main daily newspaper from Spain - v1.04 (19, October 2010)' +description = 'Main daily newspaper from Spain - v1.05 (13, March 2023)' __docformat__ = 'restructuredtext en' ''' @@ -13,12 +13,13 @@ from calibre.web.feeds.news import BasicNewsRecipe class ElPais(BasicNewsRecipe): - __author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells' + __author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells & Alvaro Beiro' description = 'Main daily newspaper from Spain' - title = u'El Pais' + title = u'El Pa\xeds' publisher = u'Ediciones El Pa\xeds SL' category = 'News, politics, culture, economy, general interest' + publication_type = 'newspaper' language = 'es' timefmt = '[%a, %d %b, %Y]' @@ -32,6 +33,13 @@ class ElPais(BasicNewsRecipe): remove_javascript = True no_stylesheets = True + extra_css = ''' +span._db {max-width: 100%; height: auto;} +.a_m_p {font-size: .75rem;} +.a_m_m {text-transform: uppercase; padding-top: 0.5rem;} +div.a_md_a {text-align: center; text-transform: uppercase; font-size: .8rem;} +''' + keep_only_tags = [ dict(attrs={'class': [ 'article_header', @@ -41,6 +49,8 @@ class ElPais(BasicNewsRecipe): 'articulo-titulares', 'articulo-apertura', 'articulo__contenedor' + 'a_e_m', + 'a_md_a', ]}), dict(name='div', attrs={'class': 'a_c',}), @@ -57,6 +67,9 @@ class ElPais(BasicNewsRecipe): 'more_info', 'articulo-apoyos', 'top10', + 'a_ei', + 'w-cta', + 'ph-v_b', ] }, ), @@ -64,13 +77,16 @@ class ElPais(BasicNewsRecipe): dict(name='svg'), ] + remove_attributes = ['width', 'height'] + feeds = [ (u'Espa\xf1a', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/espana/portada'), (u'Internacional', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/internacional/portada'), - (u'Opini\xf3n', u'https://elpais.com/rss/elpais/opinion.xml'), + (u'Economía', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/economia/portada'), + (u'Opinión', u'http://ep00.epimg.net/rss/elpais/opinion.xml'), (u'Ciencia', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/ciencia/portada'), - (u'Tecnolog\xeda', + (u'Tecnología', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/tecnologia/portada'), (u'Cultura', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/cultura/portada'), (u'Estilo', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/estilo/portada'), @@ -79,3 +95,21 @@ class ElPais(BasicNewsRecipe): (u'Sociedad', u'https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/section/sociedad/portada'), (u'Blogs', u'http://ep01.epimg.net/rss/elpais/blogs.xml'), ] + + def get_cover_url(self): + from datetime import date + cover = ('https://srv00.epimg.net/pdf/elpais/snapshot/' + + str(date.today().year) + '/' + date.today().strftime('%m') + '/elpais/' + + str(date.today().year) + date.today().strftime('%m') + date.today().strftime('%d') + 'Big.jpg') + br = BasicNewsRecipe.get_browser(self) + try: + br.open(cover) + except: + self.log("\nCover unavailable") + cover = None + return cover + + def image_url_processor(cls, baseurl, url): + splitUrl = url.split("cloudfront-") + parsedUrl = 'https://cloudfront-' + splitUrl[1] + return parsedUrl diff --git a/recipes/elpais_impreso.recipe b/recipes/elpais_impreso.recipe index fb67faedcb..8e288527e7 100644 --- a/recipes/elpais_impreso.recipe +++ b/recipes/elpais_impreso.recipe @@ -81,7 +81,7 @@ class ElPais_RSS(BasicNewsRecipe): def get_article_url(self, article): url = BasicNewsRecipe.get_article_url(self, article) - if url and (not('/album/' in url) and not('/futbol/partido/' in url)): + if url and ('/album/' not in url and '/futbol/partido/' not in url): return url self.log('Skipping non-article', url) return None diff --git a/recipes/en_globes_co_il.recipe b/recipes/en_globes_co_il.recipe new file mode 100644 index 0000000000..2ad5aac6af --- /dev/null +++ b/recipes/en_globes_co_il.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.news import BasicNewsRecipe + + +class En_Globes_Recipe(BasicNewsRecipe): + description = 'This is en.globes.co.il.' + cover_url = 'https://www.globes.co.il/images/GlobesEN-144x40.gif' + title = u'Globes in English' + language = 'en' + __author__ = 'barakplasma' + extra_css = 'img {max-width:100%;}' + simultaneous_downloads = 5 + remove_javascript = True + keep_only_tags = [ + dict(name='h1', attrs={'id': 'F_Title'}), + dict(name='h2', attrs={'id': 'coteret_SubCoteret'}), + dict(name='p', attrs={'id': None}), + ] + max_articles_per_feed = 100 + + feeds = [ + (u"Main Headlines", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederNode?iID=942"), + (u"Israeli stocks on Wall Street", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1392"), + (u"All news", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1725"), + (u"Macro economics", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1389"), + (u"Aerospace and defense", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1380"), + (u"Real estate", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederKeyword?iID=1385"), + (u"Energy and water", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1382"), + (u"Start-ups and venture capital", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1397"), + (u"Financial services", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1383"), + (u"Tel Aviv markets", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1404"), + (u"Healthcare", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1377"), + (u"Telecommunications", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1386"), + (u"Information technology", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1376"), + (u"Transport and infrastructure", u"https://www.globes.co.il/WebService/Rss/RssFeeder.asmx/FeederKeyword?iID=1388"), + ] diff --git a/recipes/endgadget.recipe b/recipes/endgadget.recipe index a948ef03b3..90dc10bedd 100644 --- a/recipes/endgadget.recipe +++ b/recipes/endgadget.recipe @@ -1,62 +1,86 @@ #!/usr/bin/env python -__license__ = 'GPL v3' +__license__ = 'GPL v3' __copyright__ = 'Copyright 2011 Starson17' ''' engadget.com ''' -from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.web.feeds.news import BasicNewsRecipe + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) class Engadget(BasicNewsRecipe): - title = u'Engadget' - __author__ = 'Starson17, modified by epubli' - __version__ = 'v2.0' - __date__ = '14, Sep 2022' - description = 'Tech news' - language = 'en' - oldest_article = 7 + title = u'Engadget' + __author__ = 'Starson17, modified by epubli' + __version__ = 'v2.1' + __date__ = '2023-09-19' + description = 'Tech news' + language = 'en' + oldest_article = 7 max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_javascript = True - remove_empty_feeds = True + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + remove_empty_feeds = True compress_news_images = True scale_news_images_to_device = True cover_url = 'https://upload.wikimedia.org/wikipedia/commons/b/bb/Engadget-logo.svg' - keep_only_tags = [ - dict(name='figure', attrs={'data-component': 'DefaultLede'}), - dict(name='div', attrs={'data-component': 'ArticleHeader'}), - dict( - name='div', - attrs={'class': ['article-text', 'article-text c-gray-1 no-review']} - ), + keep_only_tags = [ + dict(name='div', attrs={'class':'caas-content-wrapper'}), + dict(name='figure', attrs={'data-component':'DefaultLede'}), + dict(name='div', attrs={'data-component':'ArticleHeader'}), + dict(name='div', attrs={'class':['article-text','article-text c-gray-1 no-review']}), dict(name='figure') ] remove_tags = [ - dict(name='div', attrs={'data-component': 'ArticleAuthorInfo'}), - classes('notification-upsell-push article-slideshow D(f) rapid-with-clickid athena-button') + dict(name='div', attrs={'class':'caas-content-byline-wrapper'}), + dict(name='div', attrs={'data-component':'ArticleAuthorInfo'}), + classes('caas-3p-blocked commerce-disclaimer notification-upsell-push article-slideshow athena-button email-form') ] feeds = [(u'Posts', u'https://www.engadget.com/rss.xml')] def parse_feeds(self): - # Call parent's method. - feeds = BasicNewsRecipe.parse_feeds(self) - # Loop through all feeds. - for feed in feeds: - # Loop through all articles in feed. - for article in feed.articles[:]: - # Remove articles with '...' in the title. - if 'best tech deals' in article.title: - print('Removing:', article.title) - feed.articles.remove(article) - elif 'Podcast' in article.title: - print('Removing:', article.title) - feed.articles.remove(article) - elif 'The Morning After' in article.title: - print('Removing:', article.title) - feed.articles.remove(article) - return feeds + # Call parent's method. + feeds = BasicNewsRecipe.parse_feeds(self) + # Loop through all feeds. + for feed in feeds: + # Loop through all articles in feed. + for article in feed.articles[:]: + # Remove articles with '...' in the title. + if 'best tech deals' in article.title: + print('Removing:',article.title) + feed.articles.remove(article) + elif 'Podcast' in article.title: + print('Removing:',article.title) + feed.articles.remove(article) + elif 'The Morning After' in article.title: + print('Removing:',article.title) + feed.articles.remove(article) + return feeds + + def preprocess_html(self, soup): + for attr in 'data-src data-src-mobile'.split(): + for img in soup.findAll('img'): + try: + ds = img[attr].split()[0] + del img[attr] + except KeyError: + continue + if ds: + img['src'] = ds + for divs in soup.findAll('div'): + try: + if divs['style'].split()[0].startswith('padding'): + print('Removing padding') + del divs['style'] + except KeyError: + continue + + return soup + diff --git a/recipes/eos_wetenschap.recipe b/recipes/eos_wetenschap.recipe index e32686ac9c..adc302f41f 100644 --- a/recipes/eos_wetenschap.recipe +++ b/recipes/eos_wetenschap.recipe @@ -8,7 +8,7 @@ class EOSWetenschap(BasicNewsRecipe): __author__ = u'erkfuizfeuadjfjzefzfuzeff' description = u'Wetenschapsnieuws' oldest_article = 7 - language = 'nl_NL' + language = 'nl' max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/epoch_times.recipe b/recipes/epoch_times.recipe index 150dc7651d..cf2c0cc8b8 100644 --- a/recipes/epoch_times.recipe +++ b/recipes/epoch_times.recipe @@ -7,40 +7,45 @@ class EpochTimes(BasicNewsRecipe): title = 'The Epoch Times' __author__ = 'Kovid Goyal' description = 'US general news' - language = 'en_US' + language = 'en' encoding = 'utf-8' - oldest_article = 2 - max_articles_per_feed = 10 - extra_css = """ - body{font-family: Arial,sans-serif } - .featured_caption{font-size: small} - .author,.date{font-size: small} - """ + oldest_article = 1.2 + max_articles_per_feed = 20 + ignore_duplicate_articles = {'url'} + remove_attributes = ['height', 'width', 'style'] + remove_empty_feeds = True + no_stylesheets = True + resolve_internal_links = True + masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png' + extra_css = '.post_caption, .text-sm, .uppercase {font-size:small;}' + keep_only_tags = [ - classes('post_title featured_image pricat_name author date post_content'), + dict(name='article') ] remove_tags = [ - classes('author_wrapper bottom_row'), + classes('print:hidden h-header shortcode aspect-square'), + dict(name=['button', 'svg']), + dict(name='img', attrs={'src':lambda x: x and x.endswith('svg')}) ] + # feeds can be found at https://www.theepochtimes.com/rssfeeds feeds = [ - ('US', 'https://www.theepochtimes.com/c-us/feed/'), - ('World', 'https://www.theepochtimes.com/c-world/feed/'), - ('General', 'https://www.theepochtimes.com/feed/'), - ('Opinion', 'https://www.theepochtimes.com/c-opinion/feed/'), - ('Business and Economy', 'https://www.theepochtimes.com/c-business/feed/'), - ('Science', 'https://www.theepochtimes.com/c-science/feed/'), - ('Tech', 'https://www.theepochtimes.com/c-tech/feed/'), - ('Health', 'https://www.theepochtimes.com/c-wellness/feed/'), - ('Entertainment', 'https://www.theepochtimes.com/c-entertainment/feed/'), + ('Special Series', 'https://feed.theepochtimes.com/health/special-series/feed'), + ('US', 'https://feed.theepochtimes.com/us/feed'), + ('China News', 'https://feed.theepochtimes.com/china/feed'), + ('World', 'https://feed.theepochtimes.com/world/feed'), + ('Opinion', 'https://feed.theepochtimes.com/opinion/feed'), + ('Business & Markets', 'https://feed.theepochtimes.com/business/feed'), + ('Science', 'https://feed.theepochtimes.com/science/feed'), + ('Tech', 'https://feed.theepochtimes.com/tech/feed'), + ('Health & Wellness', 'https://feed.theepochtimes.com/wellness/feed'), + ('Epoch Taste', 'https://feed.theepochtimes.com/epoch-taste/feed'), + ('Entertainment', 'https://feed.theepochtimes.com/entertainment/feed'), ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] - title = soup.find(attrs={'class': 'post_title'}) - fi = soup.find(attrs={'class': 'featured_image'}) - if title is not None and fi is not None: - title.extract() - fi.insert_before(title) + for fig_c in soup.findAll('figcaption'): + fig_c['class'] = 'post_caption' return soup diff --git a/recipes/equestria_daily.recipe b/recipes/equestria_daily.recipe index 247dbf37af..75a49416c9 100644 --- a/recipes/equestria_daily.recipe +++ b/recipes/equestria_daily.recipe @@ -11,7 +11,7 @@ class AdvancedUserRecipe1639926896(BasicNewsRecipe): title = "Equestria Daily" description = "Everything new in Equestria and beyond!" - language = 'en_US' + language = 'en' # Max. supported by website: 50 max_articles_per_feed = 30 diff --git a/recipes/expansion_spanish.recipe b/recipes/expansion_spanish.recipe index 74fec85199..7ba76a1723 100644 --- a/recipes/expansion_spanish.recipe +++ b/recipes/expansion_spanish.recipe @@ -147,7 +147,7 @@ class expansion_spanish(BasicNewsRecipe): # Eliminar artículos duplicados en otros feeds - if not (link in self._processed_links): + if link not in self._processed_links: self._processed_links.append(link) else: link = None diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index 66e71b8f77..c105cfa725 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -1,8 +1,10 @@ import json import re +from calibre import browser from urllib.parse import quote +from html5_parser import parse -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class ft(BasicNewsRecipe): @@ -16,9 +18,36 @@ class ft(BasicNewsRecipe): remove_javascript = True remove_empty_feeds = True ignore_duplicate_articles = {'url'} + resolve_internal_links = True remove_attributes = ['style', 'width', 'height'] masthead_url = 'https://im.ft-static.com/m/img/masthead_main.jpg' - extra_css = '#fig-cap{font-style:italic; text-align:left; font-size:small;}' + simultaneous_downloads = 1 + + extra_css = ''' + .article-info__time-byline {font-size:small; font-weight:bold; } + .o-topper__visual, #fig, .main-image, .n-content-image { text-align:center; font-size:small; } + blockquote, i { color:#5c5c5c; } + .o-topper__standfirst { font-style:italic; color:#202020; } + .o-topper__topic { font-size:small; color:#5c5c5c; } + ''' + + keep_only_tags = [ + classes( + 'body_json o-topper__topic o-topper__headline o-topper__standfirst o-topper__visual article-info__time-byline main-image' + ), + dict(name='article', attrs={'id':'article-body'}) + ] + + remove_tags = [ + dict(name=['source', 'svg', 'button', 'aside']), + dict(name='aside', attrs={'class':'n-content-recommended--single-story'}), + dict(attrs={'data-layout-name':'card'}), + classes('in-article-advert flourish-disclaimer') + ] + + def get_cover_url(self): + soup = self.index_to_soup('https://www.frontpages.com/financial-times/') + return 'https://www.frontpages.com' + soup.find('img', attrs={'id':'giornale-img'})['src'] # needs_subscription = 'optional' # @@ -35,45 +64,44 @@ class ft(BasicNewsRecipe): # br.submit() # return br - def get_browser(self, *args, **kw): - br = super().get_browser(*args, **kw) - br.set_current_header('Referer', 'https://www.google.com/') - return br + def get_browser(self, *args, **kwargs): + return self - def get_cover_url(self): - from datetime import date - cover = 'http://img.kiosko.net/' + str( - date.today().year - ) + '/' + date.today().strftime('%m') + '/' + date.today( - ).strftime('%d') + '/uk/ft_uk.750.jpg' - br = BasicNewsRecipe.get_browser(self) - try: - br.open(cover) - except: - index = 'https://en.kiosko.net/uk/np/ft_uk.html' - soup = self.index_to_soup(index) - for image in soup.findAll('img', src=True): - if image['src'].endswith('750.jpg'): - return image['src'] - self.log("\nCover unavailable") - cover = None - return cover + def clone_browser(self, *args, **kwargs): + return self.get_browser() + + def open_novisit(self, *args, **kwargs): + br = browser(user_agent='Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)') + br.addheaders += [ + ('Referer', 'https://www.google.com/'), + ('X-Forwarded-For', '66.249.66.1') + ] + return br.open_novisit(*args, **kwargs) + + open = open_novisit feeds = [ + ('FirstFT', 'https://www.ft.com/firstft?format=rss'), + ('Opinion', 'https://www.ft.com/opinion?format=rss'), ('World', 'https://www.ft.com/world?format=rss'), ('US', 'https://www.ft.com/us?format=rss'), ('Companies', 'https://www.ft.com/companies?format=rss'), ('Tech', 'https://www.ft.com/technology?format=rss'), ('Markets', 'https://www.ft.com/markets?format=rss'), ('Climate', 'https://www.ft.com/climate-capital?format=rss'), - ('Opinion', 'https://www.ft.com/opinion?format=rss'), ('Life & Arts', 'https://www.ft.com/life-arts?format=rss'), ('How to spend it', 'https://www.ft.com/htsi?format=rss'), + ('Others', 'https://www.ft.com/rss/home/uk') ] def preprocess_raw_html(self, raw, *a): # with open('/t/raw.html', 'w') as f: # f.write(raw) + root = parse(raw) + if root.xpath('//article[@id="article-body"]'): + self.log('**has article content') + return raw + self.log('**no article content') m = re.search(r'type="application/ld\+json">[^<]+?"@type":"NewsArticle"', raw) raw = raw[m.start():] raw = raw.split('>', 1)[1] @@ -114,12 +142,11 @@ class ft(BasicNewsRecipe): body = re.sub(r'\[https://\S+?\]', insert_image, body) if data.get('description'): desc = '

    ' + data['description'] + '

    ' - html = '

    ' + title + '

    ' + desc + '

    ' + author + '

    ' + image + '

    ' + body + html = '

    ' + title + '

    ' + desc + '

    ' + author + '

    ' + image + '

    ' + body return html def preprocess_html(self, soup): - for span in soup.findAll('span'): - p = span.findParent('p') - if p: - p['id'] = 'fig-cap' + for con in soup.findAll(attrs={'class':'n-content-layout__slot'}): + if con.find('figure'): + con['id'] = 'fig' return soup diff --git a/recipes/financial_times_print_edition.recipe b/recipes/financial_times_print_edition.recipe deleted file mode 100644 index 8a2e4440fb..0000000000 --- a/recipes/financial_times_print_edition.recipe +++ /dev/null @@ -1,169 +0,0 @@ -import json -import re -from datetime import date -from calibre.web.feeds.news import BasicNewsRecipe, classes -from urllib.parse import quote - - -class ft(BasicNewsRecipe): - title = 'Financial Times - Print Edition' - language = 'en' - __author__ = "Kovid Goyal" - description = 'The Financial Times is one of the world’s leading news organisations, recognised internationally for its authority, integrity and accuracy.' - no_stylesheets = True - remove_javascript = True - remove_empty_feeds = True - ignore_duplicate_articles = {'url'} - remove_attributes = ['style', 'width', 'height'] - masthead_url = 'https://im.ft-static.com/m/img/masthead_main.jpg' - extra_css = '#fig-cap{font-style:italic; text-align:left; font-size:small;}' - - # needs_subscription = 'optional' - # - # def get_browser(self, *args, **kw): - # br = super().get_browser(*args, **kw) - # if self.username and self.password: - # # ft.com uses a CAPTCHA on its login page so this sadly doesnt work - # br.open('https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com') - # br.select_form(id='email-form') - # br['email'] = self.username - # br.submit() - # br.select_form(id='login-form') - # br['password'] = self.password - # br.submit() - # return br - - def get_browser(self, *args, **kw): - br = super().get_browser(*args, **kw) - br.set_current_header('Referer', 'https://www.google.com/') - return br - - def get_cover_url(self): - from datetime import date - cover = 'http://img.kiosko.net/' + str( - date.today().year - ) + '/' + date.today().strftime('%m') + '/' + date.today( - ).strftime('%d') + '/uk/ft_uk.750.jpg' - br = BasicNewsRecipe.get_browser(self) - try: - br.open(cover) - except: - index = 'https://en.kiosko.net/uk/np/ft_uk.html' - soup = self.index_to_soup(index) - for image in soup.findAll('img', src=True): - if image['src'].endswith('750.jpg'): - return image['src'] - self.log("\nCover unavailable") - cover = None - return cover - - def parse_index(self): - soup = self.index_to_soup('https://www.ft.com/todaysnewspaper/uk') - # International edition: https://www.ft.com/todaysnewspaper/international - ans = self.ft_parse_index(soup) - if not ans: - is_sunday = date.today().weekday() == 6 - if is_sunday: - raise ValueError( - 'The Financial Times Newspaper is not published on Sundays.' - ) - else: - raise ValueError( - 'The Financial Times Newspaper is not published today.' - ) - return ans - - def ft_parse_index(self, soup): - feeds = [] - for section in soup.findAll(**classes('o-teaser-collection')): - h2 = section.find('h2') - secname = self.tag_to_string(h2) - self.log(secname) - articles = [] - for a in section.findAll( - 'a', href=True, **classes('js-teaser-heading-link') - ): - url = a['href'] - url = 'https://www.ft.com' + url - title = self.tag_to_string(a) - desc = '' - desc_parent = a.findParent('div') - div = desc_parent.find_previous_sibling( - 'div', **classes('o-teaser__meta') - ) - if div is not None: - desc = div.find('a', **classes('o-teaser__tag')) - desc = self.tag_to_string(desc) - prefix = div.find('span', **classes('o-teaser__tag-prefix')) - if prefix is not None: - prefix = self.tag_to_string(prefix) - desc = prefix + ' ' + desc - articles.append({ - 'title': title, - 'url': url, - 'description': desc - }) - self.log('\t', desc) - self.log('\t', title) - self.log('\t\t', url) - if articles: - feeds.append((secname, articles)) - return feeds - - def preprocess_raw_html(self, raw, *a): - # with open('/t/raw.html', 'w') as f: - # f.write(raw) - m = re.search( - r'type="application/ld\+json">[^<]+?"@type":"NewsArticle"', raw - ) - raw = raw[m.start():] - raw = raw.split('>', 1)[1] - # with open('/t/raw.json', 'w') as f: - # f.write(raw) - data = json.JSONDecoder().raw_decode(raw)[0] - title = data['headline'] - body = data['articleBody'] - body = body.replace('\n\n', '

    ') - - author = '' - if 'author' in data: - try: - author = data['author']['name'] - except TypeError: - author = ' and '.join(x['name'] for x in data['author']) - - image = desc = title_image_url = '' - - def resize_img(img): - a = 'https://www.ft.com/__origami/service/image/v2/images/raw/' - b = quote(img, safe='') - c = '?dpr=2&fit=scale-down&quality=medium&source=next&width=400' - # use width = 200, 300, 400,.. 700... - return a + b + c - - if data.get('image'): - image_url = data['image']['url'] - if body.__contains__(image_url) is False: - title_image_url = resize_img(image_url) - image = '

    '.format(title_image_url) - # embedded image links - - def insert_image(m): - url = m.group()[1:-1] - if url.__contains__('studio') is False: - url = resize_img(url) - return '

    '.format(url) - - body = re.sub(r'\[https://\S+?\]', insert_image, body) - - if data.get('description'): - desc = '

    ' + data['description'] + '

    ' - html = '

    ' + title + '

    ' + desc + '

    ' + author + '

    ' + image + '

    ' + body - return html - - def preprocess_html(self, soup): - for span in soup.findAll('span'): - p = span.findParent('p') - if p: - p['id'] = 'fig-cap' - return soup diff --git a/recipes/firstpost.recipe b/recipes/firstpost.recipe new file mode 100644 index 0000000000..f81a19cb44 --- /dev/null +++ b/recipes/firstpost.recipe @@ -0,0 +1,83 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.ptempfile import PersistentTemporaryFile + +# Firstpost feeds mix sections into other feeds, like explainers end up in opinion feed and opinions end up in India feed. +# change google_feeds to True to fetch right sections. +google_feeds = False + +class firstpost(BasicNewsRecipe): + title = 'Firstpost' + __author__ = 'unkn0wn' + description = ( + 'Firstpost.com will serve as a trusted guide to the crush of news and ideas around you.' + ' With thoughtful analysis and fearless views our team of editors and writers will track' + ' news in India and the world and provide a perspective that is reflective of a changing dynamic.' + ) + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en_IN' + remove_attributes = ['height', 'width', 'style'] + masthead_url = 'https://images.firstpost.com/wp-content/uploads/2016/03/FP-Logo.png?impolicy=website&width=600&height=60' + max_articles_per_feed = 25 + remove_empty_feeds = True + ignore_duplicate_articles = {'title', 'url'} + extra_css = ''' + .category-name, .author-info { font-size:small; color:#202020; } + .wp-caption-text { font-size:small; text-align:center; } + ''' + + keep_only_tags = [ + classes('article-sect') + ] + + remove_tags = [ + classes('art-rel-articles tags-wrap'), + dict(name='svg'), + ] + + feeds = [] + + sections = [ + 'india', 'politics', 'opinion', 'explainers', 'business', + 'world', 'web-stories', 'tech', 'artandculture', 'health', 'health-supplement', + # 'photos', 'entertainment', 'living', 'education', 'sports', 'firstcricket', + ] + if not google_feeds: + oldest_article = 1.2 # days + for sec in sections: + a = 'https://www.firstpost.com/rss/{}.xml' + feeds.append((sec.capitalize(), a.format(sec))) + else: + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', '/vantage/' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:firstpost.com{}&hl=en-IN&gl=IN&ceid=IN:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + # feeds.append(('Others', a.format(''))) + + def preprocess_html(self, soup): + if h2 := soup.find('h2', attrs={'class':'category-name'}): + h2.name = 'p' + if h := soup.find('h2', attrs={'class':'inner-copy'}): + h.name = 'p' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/fluter_de.recipe b/recipes/fluter_de.recipe index 5a7912bc36..627437eeb4 100644 --- a/recipes/fluter_de.recipe +++ b/recipes/fluter_de.recipe @@ -1,4 +1,9 @@ -__license__ = 'GPL v3' +## +## Written: 2013-02-05 +## Version: v4.1 +## Last update: 2013-02-05 V3, 2020-07-05 v4, 2023-06-16 v4.1 +## +__license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -7,24 +12,31 @@ Fetch fluter.de from calibre.web.feeds.news import BasicNewsRecipe - class AdvancedUserRecipe1313693926(BasicNewsRecipe): - title = u'Fluter' + title = u' fluter. ' description = 'fluter.de Magazin der Bundeszentrale für politische Bildung/bpb' language = 'de' encoding = 'UTF-8' - __author__ = 'Armin Geller' # 2013-02-05 V3 + __author__ = 'Armin Geller' # 2013-02-05 V3 - oldest_article = 7 + oldest_article = 14 max_articles_per_feed = 50 + auto_cleanup = False - feeds = [ - (u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'), - ] + feeds = [ + (u'Inhalt:', u'https://www.fluter.de/rss.xml') + ] - extra_css = '.cs_img {margin-right: 10pt;}' + keep_only_tags = [ + dict(name='article', attrs={'class':'node node-article block fullWidth stage'}) + ] - def print_version(self, url): - return url + '?tpl=1260' + remove_tags = [ + dict(name='h2', attrs={'class':'element-invisible'}) + ] + + extra_css = ''' + .field-group-format, .group_additional_info, .additional-info {display: inline-block; min-width: 8rem; text-align: center} + ''' diff --git a/recipes/focus_de.recipe b/recipes/focus_de.recipe index b969923727..e111f8cc1c 100644 --- a/recipes/focus_de.recipe +++ b/recipes/focus_de.recipe @@ -1,27 +1,35 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import unicode_literals, division, absolute_import, print_function - ''' focus.de ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe, classes class AdvancedUserRecipe1305567197(BasicNewsRecipe): title = 'Focus (DE)' - __author__ = 'Anonymous' - description = 'RSS-Feeds von Focus.de' + __author__ = 'unkn0wn' + description = 'RSS-Feeds von Focus.de, best downloaded at the end of the week.' language = 'de' oldest_article = 7 - max_articles_per_feed = 100 + max_articles_per_feed = 25 no_stylesheets = True remove_javascript = True use_embedded_content = False remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} + remove_attributes = ['width', 'height', 'style'] + masthead_url = 'https://www.focus-magazin.de/img/Focus_Logo.jpg' + extra_css = ''' + .posMarker_oh { font-size:small; color:#404040; } + .posMarker_he { font-size:large; font-weight:bold; } + .leadIn { font-style:italic; color:#202020; } + .caption { text-align:center; font-size:small; } + .authorMeta, .displayDate { font-size:small; } + ''' + + def get_cover_url(self): + soup = self.index_to_soup('https://www.focus-magazin.de/') + return soup.find('img', attrs={'class':'main-cover'})['src'] feeds = [ ('Politik', 'http://rss.focus.de/politik/'), @@ -29,33 +37,25 @@ class AdvancedUserRecipe1305567197(BasicNewsRecipe): ('Gesundheit', 'http://rss.focus.de/gesundheit/'), ('Panorama', 'http://rss.focus.de/panorama/'), ('Digital', 'http://rss.focus.de/digital/'), - ('Reisen', 'http://rss.focus.de/reisen/') + ('Reisen', 'http://rss.focus.de/reisen/'), + ('Andere', 'http://rss.focus.de') ] keep_only_tags = [ - dict(name='div', attrs={'id': 'article'}) + classes('articleHead articleContent') ] remove_tags = [ - dict(name='div', attrs={'class': ['inimagebuttons', - 'kolumneHead clearfix']}) + dict(name=['svg', 'script']), + classes('socFbLikeShare video social_frame'), + dict(attrs={'id': 'article-social-holder'}) ] - remove_attributes = ['width', 'height'] - - extra_css = 'h1 {font-size: 1.6em; text-align: left; margin-top: 0em} \ - h2 {font-size: 1em; text-align: left} \ - .overhead {margin-bottom: 0em} \ - .caption {font-size: 0.6em}' - - def print_version(self, url): - return url + '?drucken=1' - def preprocess_html(self, soup): - # remove useless references to videos - for item in soup.findAll('h2'): - if item.string: - txt = item.string.upper() - if txt.startswith('IM VIDEO:') or txt.startswith('VIDEO:'): - item.extract() + if h1 := soup.find(attrs={'class':'articleIdentH1'}): + h1.name = 'h1' + if he := soup.find(**classes('posMarker_he')): + he.name = 'div' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] return soup diff --git a/recipes/footballua.recipe b/recipes/footballua.recipe new file mode 100644 index 0000000000..ea6ff53dd8 --- /dev/null +++ b/recipes/footballua.recipe @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + +class FootballUA(BasicNewsRecipe): + title = 'Football.UA' + __author__ = 'bugmen00t' + description = ('\u0421\u043F\u043E\u0440\u0442\u0438\u0432\u043D\u0438\u0439 \u043F\u043E\u0440\u0442\u0430\u043B' + ' \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456,' + ' \u043F\u0440\u0438\u0441\u0432\u044F\u0447\u0435\u043D\u0438\u0439 \u043B\u0438\u0448\u0435 \u0444\u0443\u0442\u0431\u043E\u043B\u0443.') + publisher = 'United Media Holding group' + category = 'news' + cover_url = u'https://s.ill.in.ua/i/news/570x380/212/212438.jpg' + language = 'uk' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + remove_empty_feeds = True + oldest_article = 3 + max_articles_per_feed = 20 + + remove_tags_before = dict(name='article') + + remove_tags_after = dict(name='article') + + remove_tags = [ + dict(name='div', attrs={'class': 'bottom-info'}), + dict(name='div', attrs={'class': 'social-buttons'}) + ] + + feeds = [ + ('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://football.ua/rss2.ashx'), + ] diff --git a/recipes/foreign_policy.recipe b/recipes/foreign_policy.recipe index 8a2bccc452..a702fa81e0 100644 --- a/recipes/foreign_policy.recipe +++ b/recipes/foreign_policy.recipe @@ -5,7 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' -from collections import OrderedDict +from collections import defaultdict from calibre.web.feeds.news import BasicNewsRecipe @@ -23,25 +23,38 @@ class ForeignPolicy(BasicNewsRecipe): description = 'International News' no_stylesheets = True remove_javascript = True + remove_empty_feeds = True + resolve_internal_links = True + encoding = 'utf-8' + remove_attributes = ['style', 'height', 'width'] + + extra_css = ''' + img {display:block; margin:0 auto;} + .department-meta { font-size:small; color:#404040; } + .dek-heading, .author-bio, .date-time { font-size:small; color:#202020; } + .figure-image, .caption, .wp-caption { font-size:small; text-align:center; } + ''' keep_only_tags = [ - dict(name='h1'), - classes('dek-heading meta-data figure-image post-content-main bio-no-photo'), - dict(attrs={'class': lambda x: x and set(x.split()).intersection( - {'wide_header_bg', 'wide_header_text'})}), + dict(name='article', attrs={'class':'article'}) ] remove_tags = [ - dict(name=['meta', 'link']), - classes('share-links content-ungated -excerpt related-articles fp-lightbox--overlay more-text'), + dict(name=['meta', 'link', 'svg', 'button', 'iframe', 'aside']), + classes( + 'share-links content-ungated related-articles fp-lightbox--overlay more-text myfp-article ' + 'editors-note-in-post--v2 author-photo related-articles-carousel sidebar-box_right ' + ), ] remove_tags_after = [classes('post-content-main')] def parse_index(self): soup = self.index_to_soup('https://foreignpolicy.com/the-magazine') - img = soup.find('img', attrs={'data-lazy-src': lambda x: x and '-cover' in x}) - self.cover_url = img['data-lazy-src'] + img = soup.find('img', attrs={'src': lambda x: x and '-cover' in x}) + if img: + self.cover_url = img['src'].split('?')[0] + '?w=800?quality=90' current_section = None - amap = OrderedDict() + feeds_dict = defaultdict(list) + soup = soup.find('main') for x in soup.findAll(name=('h2', 'h3')): if x.name == 'h2': current_section = self.tag_to_string(x) @@ -52,31 +65,18 @@ class ForeignPolicy(BasicNewsRecipe): title = self.tag_to_string(x) a = x.parent url = a['href'] - self.log('\t', title, 'url') - amap.setdefault(current_section, []).append({'title': title, 'url': url}) - ans = [] - for sec_name in sorted(amap, key=lambda x: x.lower()): - articles = amap[sec_name] - if articles: - ans.append((sec_name, articles)) - return ans + desc = '' + meta = a.findNext(attrs={'class':'meta-data -excerpt'}) + if meta: + desc += self.tag_to_string(meta) + dek = a.findNext(attrs={'class':'dek-heading -excerpt'}) + if dek: + desc += ' | ' + self.tag_to_string(dek) + self.log('\t', title, url, '\n\t', desc) + feeds_dict[current_section].append({"title": title, "url": url, "description": desc}) + return [(section, articles) for section, articles in feeds_dict.items()] def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-srcset': True}): - img['src'] = img['data-srcset'].split()[0] - for img in soup.findAll('img', src=False, attrs={'data-src': True}): - img['src'] = img['data-src'] - body = soup.find('body') - div = soup.find( - attrs={'class': lambda x: x and 'wide_header_bg' in x.split()}) - if div is not None: - div.extract() - body.insert(0, div) - div = soup.find( - attrs={'class': lambda x: x and 'wide_header_text' in x.split()}) - if div is not None: - div.extract() - body.insert(0, div) - for div in soup.findAll(id='footer-logo'): - div.parent.extract() + for img in soup.findAll('img', attrs={'src':True}): + img['src'] = img['src'].split('?')[0] + '?w=600?quality=90' return soup diff --git a/recipes/foreignaffairs.recipe b/recipes/foreignaffairs.recipe index 04f2e48607..5836c68d05 100644 --- a/recipes/foreignaffairs.recipe +++ b/recipes/foreignaffairs.recipe @@ -6,13 +6,7 @@ import html5lib import mechanize from lxml import html -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +from calibre.web.feeds.news import BasicNewsRecipe, classes def as_article(source, log): @@ -155,9 +149,14 @@ class ForeignAffairsRecipe(BasicNewsRecipe): self.timefmt = u' [%s]' % date link = soup.find('link', rel='canonical', href=True)['href'] year, volnum, issue_vol = link.split('/')[-3:] - self.cover_url = soup.find(**classes('subscribe-callout-image'))['data-src'].split("|")[-1] - self.cover_url = self.cover_url.split('?')[0] - self.cover_url = self.cover_url.replace('_webp_issue_small_2x', '_webp_issue_large_2x') + self.cover_url = re.sub( + r"_webp_issue_small_\dx", + "_webp_issue_large_2x", + soup.find(class_="subscribe-callout-image")["srcset"] + .split(",")[0] + .strip() + .split(" ")[0], + ) cls = soup.find('body')['class'] if isinstance(cls, (list, tuple)): @@ -181,9 +180,8 @@ class ForeignAffairsRecipe(BasicNewsRecipe): return html.tostring(root, encoding='unicode') def preprocess_html(self, soup): - for attr in ('ng-src', 'data-blazy', 'data-src'): - for img in soup.findAll('img', attrs={attr: True}): - img['src'] = img[attr] + for img in soup.find_all('img', attrs={'srcset': True}): + img['src'] = img['srcset'].split(',')[-1].strip().split(' ')[0].strip() return soup def get_browser(self): diff --git a/recipes/frontline.recipe b/recipes/frontline.recipe index b3bd2de3a2..7a95524c8d 100644 --- a/recipes/frontline.recipe +++ b/recipes/frontline.recipe @@ -1,7 +1,6 @@ from collections import defaultdict from calibre.web.feeds.news import BasicNewsRecipe, classes - class Frontline(BasicNewsRecipe): title = u'Frontline' __author__ = 'unkn0wn' @@ -16,18 +15,21 @@ class Frontline(BasicNewsRecipe): remove_attributes = ['height', 'width'] resolve_internal_links = True extra_css = ''' - .overline{ font-size:small; color:#404040; } - .person-name { font-size:small; font-weight:bold; } - .lead-img-caption, .caption-cont { font-size:small; text-align:center; } + .environment, .publish-time, .author { font-size:small; color:#404040; } + .caption { font-size:small; text-align:center; } + img { display:block; margin:0 auto; } + .question {font-weight:bold;} ''' keep_only_tags = [ - classes('article') + dict(name='div', attrs={'class':'container article-section'}) ] remove_tags = [ - classes('shareicon-article articleBottomLine secheader mobilesocialicons'), - dict(name='h2', attrs={'class':'title'}) + classes( + 'breadcrumb comments-shares share-page article-video ' + 'referpara slide-mobile title-patch hide-mobile related-stories' + ), ] def preprocess_html(self, soup): @@ -36,11 +38,11 @@ class Frontline(BasicNewsRecipe): source = img.findPrevious('source', srcset=True) img.extract() if source: - source['src'] = source['srcset'] + source['src'] = source['srcset'].replace('_320','_1200') source.name = 'img' else: img['src'] = img['data-original'] - for cap in soup.findAll(**classes('caption-cont')): + for cap in soup.findAll(**classes('caption')): cap.name = 'figcaption' return soup @@ -50,30 +52,32 @@ class Frontline(BasicNewsRecipe): return soup def parse_index(self): - soup = self.index_to_soup('https://frontline.thehindu.com/magazine/') - issue = soup.find(**classes('sptar-archive-item')).find('a')['href'] - self.log(issue) - soup = self.index_to_soup(issue) - time = soup.find(**classes('date')).findNext('h3') - if time: - self.timefmt = ' ' + self.tag_to_string(time) - self.log('Downloading Issue:', self.timefmt) - self.cover_url = soup.find(**classes('sptar-cover-item')).find('img')['data-original'].replace('FREE_320', 'FREE_810') + soup = self.index_to_soup('https://frontline.thehindu.com/current-issue/') + + if cover := soup.find('div', attrs={'class':'magazine'}): + self.cover_url = cover.find(**classes('sptar-image')).img['data-original'].replace('_320', '_1200') + self.log('Cover ', self.cover_url) + if desc := cover.find(**classes('sub-text')): + self.description = self.tag_to_string(desc) + feeds_dict = defaultdict(list) - for div in soup.findAll('div', attrs={'class':'brief-list-item'}): - a = div.find(**classes('brief-title')).find('a') + + mag = soup.find(**classes('section-magazine')) + for div in mag.findAll('div', attrs={'class':'content'}): + a = div.find(**classes('title')).find('a') url = a['href'] title = self.tag_to_string(a) section = 'Articles' - cat = div.find(**classes('brief-cat')) - if cat: + if cat := div.find(**classes('label')): section = self.tag_to_string(cat) desc = '' - art = div.find(**classes('artbody')) - if art: + + if art := div.find(**classes('sub-text')): desc = self.tag_to_string(art) + if auth := div.find(**classes('author')): + desc = self.tag_to_string(auth) + ' | ' + desc if not url or not title: continue self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url) - feeds_dict[section].append({"title": title, "url": url}) - return [(section, articles) for section, articles in feeds_dict.items()] + feeds_dict[section].append({"title": title, "url": url, "description": desc}) + return [(section, articles) for section, articles in feeds_dict.items()] \ No newline at end of file diff --git a/recipes/github.recipe b/recipes/github.recipe new file mode 100644 index 0000000000..4bb78a8fd5 --- /dev/null +++ b/recipes/github.recipe @@ -0,0 +1,42 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class GithubBlog(BasicNewsRecipe): + title = u'Github Blog' + language = 'en' + description = 'Updates, ideas, and inspiration from GitHub to help developers build and design software.' + cover_url = 'https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png' + masthead_url = 'https://github.githubassets.com/assets/GitHub-Logo-ee398b662d42.png' + + oldest_article = 14 + + keep_only_tags = [ + dict(name='div', attrs={'class': 'col-12 offset-lg-1 col-lg-10 col-xl-7 mt-5 mt-lg-10 mb-6 mb-lg-8' }), + dict(name='section', attrs={'class': lambda x: x and 'post__content' in x.split(' ') }) + ] + + remove_tags = [ + dict(name='div', attrs={'class': lambda x: x and 'post-tags' in x.split(' ') }), + dict(name='ul', attrs={'class': lambda x: x and 'post-hero__categories' in x.split(' ') }) + ] + + preprocess_regexps = [ + # Styles the article description + (re.compile(r'(

    ]*>)([^<]*)(

    )'), + lambda m: '

    %s

    ' % (m.group(2))) + ] + + use_embedded_content = False + no_stylesheets = True + + feeds = [ + (u'Engineering', u'https://github.blog/category/engineering/feed/'), + (u'Product', u'https://github.blog/category/product/feed/'), + (u'Security', u'https://github.blog/category/security/feed/'), + (u'Open Source', u'https://github.blog/category/open-source/feed/'), + (u'Enterprise', u'https://github.blog/category/enterprise/feed/'), + (u'Community', u'https://github.blog/category/community/feed/'), + (u'Education', u'https://github.blog/category/education/feed/'), + (u'Company', u'https://github.blog/category/company/feed/'), + (u'Policy', u'https://github.blog/category/policy/feed/') + ] diff --git a/recipes/globes_co_il.recipe b/recipes/globes_co_il.recipe index 48b7db4fed..ffd5c71f9d 100644 --- a/recipes/globes_co_il.recipe +++ b/recipes/globes_co_il.recipe @@ -1,49 +1,41 @@ from calibre.web.feeds.news import BasicNewsRecipe -import re class AdvancedUserRecipe1283848012(BasicNewsRecipe): description = 'This is Globes.co.il.' - cover_url = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg' + cover_url = 'https://images.globes.co.il/globes/logo-138-35-2.svg?ver=1' title = u'Globes' language = 'he' - __author__ = 'marbs' + __author__ = 'marbs & barakplasma' extra_css = 'img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }' # noqa simultaneous_downloads = 5 remove_javascript = True + keep_only_tags = [ + dict(name='h1', attrs={'id': 'F_Title'}), + dict(name='h2', attrs={'id': 'coteret_SubCoteretText'}), + dict(name='div', attrs={'class': 'articleInner'}), + ] timefmt = '[%a, %d %b, %Y]' oldest_article = 1 max_articles_per_feed = 100 remove_attributes = ['width', 'style'] - feeds = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'), - (u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'), - (u'וול סטריט ושווקי העולם', - u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'), - (u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'), - (u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'), - (u'נתח שוק וצרכנות', - u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'), - (u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'), - (u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'), - (u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'), - (u'קניון המניות - טור שבועי', - u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'), - (u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')] - - def print_version(self, url): - split1 = url.split("=") - print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + \ - split1[1] - return print_url - - def preprocess_html(self, soup): - soup.find('tr', attrs={'bgcolor': 'black'} - ).findPrevious('tr').extract() - soup.find('tr', attrs={'bgcolor': 'black'}).extract() - return soup - - def fixChars(self, string): - # Replace lsquo (\x91) - fixed = re.sub("■", "■", string) - return fixed + feeds = [ + (u"עידכוני RSS ", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3038"), + (u"כל הכתבות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=2"), + (u"שוק ההון", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585"), + (u"בארץ", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=9917"), + (u"גלובלי ושוקי עולם", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225"), + (u"גלובסטק", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594"), + (u"דין וחשבון", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829"), + (u"דעות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845"), + (u"וידאו", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=2007"), + (u"ליידי גלובס", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3314"), + (u"מגזין G", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3312"), + (u"nadlan", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607"), + (u"נתח שוק וצרכנות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821"), + (u"מטבעות דיגיטליים", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=9758"), + (u"קריירה", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iid=3266"), + (u"תיירות", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iid=9010"), + (u"רכב", u"https://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220") + ] diff --git a/recipes/go_comics.recipe b/recipes/go_comics.recipe index 26217ac8f1..740e4317c9 100644 --- a/recipes/go_comics.recipe +++ b/recipes/go_comics.recipe @@ -573,7 +573,7 @@ class GoComics(BasicNewsRecipe): fname = ascii_filename('%03d_%s' % (num, title)).replace(' ', '_') path = os.path.join(self.gocomics_dir, fname) html = '{h1}

    {date}

    {img}
    '.format(**data) - with lopen(path, 'wb') as f: + with open(path, 'wb') as f: f.write(html.encode('utf-8')) return {'title':'Page %d of %s' % ((num + 1), title), 'url': ('file:' if iswindows else 'file://') + path.replace(os.sep, '/')} diff --git a/recipes/google_news.recipe b/recipes/google_news.recipe index 7b0becf263..42c4cc666a 100644 --- a/recipes/google_news.recipe +++ b/recipes/google_news.recipe @@ -2,7 +2,7 @@ # vim:fileencoding=utf-8 from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -from datetime import datetime +from calibre.ptempfile import PersistentTemporaryFile import json # a serarch topic, filled into the string below. You can change that to anything google news should be searched for... @@ -17,9 +17,9 @@ class google_news_de(BasicNewsRecipe): title = 'Google News' cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png' # Author - __author__ = 'Volker Heggemann, VoHe' + __author__ = 'Volker Heggemann, VoHe, unkn0wn' # oldest article to download (in days) ---- can be edit by user - oldest_article = 2 + oldest_article = 1.25 # describes itself, ---- can be edit by user max_articles_per_feed = 200 # speed up the download on fast computers be careful (I test max.20) @@ -27,10 +27,6 @@ class google_news_de(BasicNewsRecipe): simultaneous_downloads = 10 # description, some Reader show this in titlepage description = u'Google News filter by your own recipe. Please read it in calibre software!' - # add date to description so for dayly downloads you can find them easier - # ---- can be edit by user - description = description + ' fetched: ' + \ - datetime.now().strftime("%Y-%m-%d") # %H:%M:%S") # What is the content of? category = u'NEWS' # describes itself, ---- can be edit by user @@ -41,6 +37,30 @@ class google_news_de(BasicNewsRecipe): # remove the rubbish (in ebook) auto_cleanup = True + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', 'podcast-' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Found link: ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name # now the content description and URL follows # feel free to add, wipe out what you need ---- can be edit by user diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index c990354fac..bdc9425306 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -20,10 +20,11 @@ def classes(classes): class Guardian(BasicNewsRecipe): title = u'The Guardian and The Observer' + is_observer = False + base_url = "https://www.theguardian.com/uk" if date.today().weekday() == 6: + is_observer = True base_url = "https://www.theguardian.com/observer" - else: - base_url = "https://www.theguardian.com/uk" __author__ = 'Kovid Goyal' language = 'en_GB' @@ -89,41 +90,26 @@ class Guardian(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self, *a, **kw) return br - def get_cover_url(self): - coverdate = date.today() - if 'observer' in self.base_url: - cover = ( - 'https://www.thepaperboy.com/frontpages/archive/The_Observer_' + str(coverdate.day) + '_' + - str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg') - else: - cover = ( - 'https://www.thepaperboy.com/frontpages/archive/The_Guardian_' + str(coverdate.day) + '_' + - str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg') - - return cover - - def parse_section(self, url, title_prefix=''): - feeds = [] - soup = self.index_to_soup(url) + def parse_section(self, section_url): + soup = self.index_to_soup(section_url) for section in soup.findAll('section'): - title = title_prefix + self.tag_to_string(section.find( - attrs={'class': 'fc-container__header__title'})).strip().capitalize() - self.log('\nFound section:', title) - if 'Video' in title: - self.log('=======> Skip section:', title) + articles = [] + title = self.tag_to_string(section.find('h2')) + if not title: continue - feeds.append((title, [])) + self.log('Found section:', title) for li in section.findAll('li'): - for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True): - title = self.tag_to_string(a).strip() + a = li.find('a', attrs={'href': True, 'aria-label': True}) + if a: url = a['href'] - self.log(' ', title, url) - feeds[-1][1].append({'title': title, 'url': url}) - break - return feeds + if url.startswith('/'): + url = self.base_url.rpartition('/')[0] + url + self.log('\t', a['aria-label'], url) + articles.append({'title': a['aria-label'], 'url': url}) + if articles: + yield title, articles def parse_index(self): - feeds = self.parse_section(self.base_url) - feeds += self.parse_section( - 'https://www.theguardian.com/uk/sport', 'Sport - ') + feeds = list(self.parse_section(self.base_url)) + feeds += list(self.parse_section('https://www.theguardian.com/uk/sport')) return feeds diff --git a/recipes/hamilton_spectator.recipe b/recipes/hamilton_spectator.recipe index 7c0302c386..2fd7635782 100644 --- a/recipes/hamilton_spectator.recipe +++ b/recipes/hamilton_spectator.recipe @@ -1,4 +1,5 @@ -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.ptempfile import PersistentTemporaryFile ''' Hamilton Spectator Calibre Recipe @@ -7,10 +8,8 @@ Hamilton Spectator Calibre Recipe class HamiltonSpectator(BasicNewsRecipe): title = u'Hamilton Spectator' - oldest_article = 2 - max_articles_per_feed = 100 - auto_cleanup = True - __author__ = u'Eric Coolman' + max_articles_per_feed = 50 + __author__ = u'unkn0wn' publisher = u'thespec.com' description = u'Ontario Canada Newspaper' category = u'News, Ontario, Canada' @@ -19,47 +18,69 @@ class HamiltonSpectator(BasicNewsRecipe): no_stylesheets = True language = 'en_CA' encoding = 'utf-8' + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://bloximages.chicago2.vip.townnews.com/thespec.com/content/tncms/custom/image/c0094646-1108-11ee-8af0-b3954ce40e5e.png' - feeds = [ - (u'Top Stories', u'http://www.thespec.com/rss?query=/&assetType=Article'), - (u'All News', u'http://www.thespec.com/rss?query=/news&assetType=Article'), - (u'Local', u'http://www.thespec.com/rss?query=/local&assetType=Article'), - (u'Ontario', u'http://www.thespec.com/rss?query=/ontario&assetType=Article'), - (u'Canada', u'http://www.thespec.com/rss?query=/canada&assetType=Article'), - (u'World News', u'http://www.thespec.com/rss?query=/world&assetType=Article'), - (u'Business', u'http://www.thespec.com/rss?query=/business&assetType=Article'), - (u'Crime', u'http://www.thespec.com/rss?query=/crime&assetType=Article'), - (u'All Sports', u'http://www.thespec.com/rss?query=/sports&assetType=Article'), - (u'Ticats', u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'), - (u'Bulldogs', u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'), - (u'High School Sports', - u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'), - (u'Local Sports', u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'), - (u'What''s On', u'http://www.thespec.com/rss?query=/whatson&assetType=Article'), - (u'Arts and Entertainment', - u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'), - (u'Books', u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'), - (u'Movies', u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'), - (u'Music', u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'), - (u'Restaurant Reviews', - u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'), - (u'Opinion', u'http://www.thespec.com/rss?query=/opinion&assetType=Article'), - (u'Opinion Columns', - u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'), - (u'Cartoons', u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'), - (u'Letters', u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'), - (u'Editorial', u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'), - (u'Community', u'http://www.thespec.com/rss?query=/community&assetType=Article'), - (u'Education', u'http://www.thespec.com/rss?query=/community/education&assetType=Article'), - (u'Faith', u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'), - (u'Contests', u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'), - (u'Living', u'http://www.thespec.com/rss?query=/living&assetType=Article'), - (u'Food', u'http://www.thespec.com/rss?query=/living/food&assetType=Article'), - (u'Health and Fitness', - u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'), - (u'Your Home', u'http://www.thespec.com/rss?query=/living/home&assetType=Article'), - (u'Travel', u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'), - (u'Family and Parenting', - u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'), - (u'Style', u'http://www.thespec.com/rss?query=/living/style&assetType=Article') + ignore_duplicate_articles = {'title'} + articles_are_obfuscated = True + + extra_css = ''' + .caption { font-size:small; text-align:center; } + .authorList, .endnote_contrib { font-size:small; } + ''' + + keep_only_tags = [ + classes( + 'headline asset-summary authorList articleMainArt asset-body' + ) ] + + remove_tags = [ + dict(name=['svg', 'button']), + dict(attrs={'id':['tncms-region-article_instory_top', 'tncms-region-article_bottom', 'asset-video-primary']}), + classes( + 'tnt-blurred-image share-container subscriber-offers access-offers-in-page ' + 'access-offers-wrapper tnt-ads-container adLabelWrapperManual shareIcons ' + 'articleFeedbackCTA comments-container card-image' + ) + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-srcset':True}): + for x in img['data-srcset'].split(','): + if '640w' in x.split(): + img['src'] = x.split()[0] + return soup + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', 'podcast' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + feeds = [] + + sections = [ + 'news', 'politics', 'opinion', 'business', 'sports', 'life', 'entertainment' + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:thespec.com{}&hl=en-CA&gl=IN&ceid=CA:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Others', a.format(''))) diff --git a/recipes/harpers.recipe b/recipes/harpers.recipe index 511a2a7316..059f4b5e08 100644 --- a/recipes/harpers.recipe +++ b/recipes/harpers.recipe @@ -17,22 +17,67 @@ class Harpers(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - masthead_url = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } - extra_css = ''' - h1{ font-family:georgia ; color:#111111; font-size:large;} - .box-of-helpful{ font-family:arial ; font-size:x-small;} - p{font-family:georgia ;} - .caption{font-family:Verdana,sans-serif;font-size:x-small;color:#666666;} - ''' - keep_only_tags = [ - dict(name='div', attrs={'class': ['postdetailFull', 'articlePost']})] - remove_tags = [dict(name=['link', 'object', 'embed', 'meta', 'base'])] - remove_attributes = ['width', 'height'] + dict( + class_=[ + "article-content", + "template-index-archive", # harper's index + ] + ) + ] + remove_tags = [ + dict( + class_=[ + "component-newsletter-signup", + "sidebar", + "header-meta", + "component-from-author", + "from-issue", + "d-none", + "COA_roles_fix_space", + "section-tags", + "aria-font-adjusts", + "component-share-buttons", + "index-footer", + "index-prev-link", + "comma", + ] + ), + # for harper's index + dict( + class_=[ + "aria-font-adjusts", + "component-share-buttons", + "index-footer", + "index-prev-link", + ] + ), + ] + remove_attributes = ["style", "width", "height"] + + extra_css = """ + h1.article-title { font-size: x-large; margin-bottom: 0.4rem; } + .subheading, .post-subtitle { font-size: large; font-style: italic; margin-bottom: 1rem; } + .byline { margin-bottom: 1rem } + .article-hero-img img, .flex-section-image img, .wp-caption img { + display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto; + box-sizing: border-box; + } + .wp-caption-text { font-size: small; margin-top: 0.3rem; } + + .author-bio { margin-top: 2.5rem; font-style: italic; } + .author-bio em { font-weight: bold; } + + .index-item { font-size: large; margin: 1rem 0; } + .index-statement > p { display: inline-block; margin: 0.5rem 0; } + .index-statement > span { display: inline-block; } + .index-statement .index-tooltip { font-size: small; } + """ + feeds = [(u"Harper's Magazine", u'https://harpers.org/feed/')] diff --git a/recipes/harpers_full.recipe b/recipes/harpers_full.recipe index 5a8e91fb0d..159419d623 100644 --- a/recipes/harpers_full.recipe +++ b/recipes/harpers_full.recipe @@ -3,107 +3,169 @@ # vi: set fenc=utf-8 ft=python : # kate: encoding utf-8; syntax python; -__license__ = 'GPL v3' -__copyright__ = '2008-2019, Darko Miletic ' -''' -harpers.org - paid subscription/ printed issue articles +__license__ = "GPL v3" +__copyright__ = "2008-2019, Darko Miletic " +""" +harpers.org - printed issue articles This recipe only get's article's published in text format images and pdf's are ignored -If you have institutional subscription based on access IP you do not need to enter -anything in username/password fields -''' +""" -import time -try: - from urllib.parse import urlencode -except ImportError: - from urllib import urlencode +from urllib.parse import urljoin + +from calibre import browser from calibre.web.feeds.news import BasicNewsRecipe - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +# overwrite this with a custom issue url, e.g. https://harpers.org/archive/2023/01/ +_issue_url = "" class Harpers_full(BasicNewsRecipe): title = "Harper's Magazine - articles from printed edition" - __author__ = 'Darko Miletic' + __author__ = "Darko Miletic, updated by ping" description = "Harper's Magazine, the oldest general-interest monthly in America, explores the issues that drive our national conversation, through long-form narrative journalism and essays, and such celebrated features as the iconic Harper's Index." # noqa publisher = "Harpers's" - category = 'news, politics, USA' - oldest_article = 30 + category = "news, politics, USA" + oldest_article = 31 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - delay = 1 - language = 'en' - encoding = 'utf8' - needs_subscription = 'optional' - publication_type = 'magazine' - LOGIN = 'https://harpers.org/wp-admin/admin-ajax.php' + language = "en" + encoding = "utf8" + publication_type = "magazine" + requires_version = (5, 0, 0) # py3 + ignore_duplicate_articles = {"url"} + base_url = "https://harpers.org" + keep_only_tags = [ - classes('article-header-text entry-content'), + dict( + class_=[ + "article-content", + "template-index-archive", # harper's index + ] + ) ] remove_tags = [ - classes('related-issue-tout section-tags component-from-author component-share-buttons') + dict( + class_=[ + "component-newsletter-signup", + "sidebar", + "header-meta", + "component-from-author", + "from-issue", + "d-none", + "COA_roles_fix_space", + "section-tags", + "aria-font-adjusts", + "component-share-buttons", + "index-footer", + "index-prev-link", + "comma", + ] + ), + # for harper's index + dict( + class_=[ + "aria-font-adjusts", + "component-share-buttons", + "index-footer", + "index-prev-link", + ] + ), ] + remove_attributes = ["style", "width", "height"] - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('https://harpers.org/') - if self.username is not None and self.password is not None: - tt = time.localtime() * 1000 - data = urlencode({'action': 'cds_auth_user', 'm': self.username, 'p': self.password, 'rt': 'https://harpers.org/', 'tt': tt - }) - br.open(self.LOGIN, data) - return br + extra_css = """ + h1.article-title { font-size: x-large; margin-bottom: 0.4rem; } + .subheading, .post-subtitle { font-size: large; font-style: italic; margin-bottom: 1rem; } + .byline { margin-bottom: 1rem } + .article-hero-img img, .flex-section-image img, .wp-caption img { + display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto; + box-sizing: border-box; + } + .wp-caption-text { font-size: small; margin-top: 0.3rem; } + + .author-bio { margin-top: 2.5rem; font-style: italic; } + .author-bio em { font-weight: bold; } + + .index-item { font-size: large; margin: 1rem 0; } + .index-statement > p { display: inline-block; margin: 0.5rem 0; } + .index-statement > span { display: inline-block; } + .index-statement .index-tooltip { font-size: small; } + """ + + # Send cookie-less requests to get full article + def get_browser(self, *args, **kwargs): + return self + + def clone_browser(self, *args, **kwargs): + return self.get_browser() + + def open_novisit(self, *args, **kwargs): + br = browser() + return br.open_novisit(*args, **kwargs) + + open = open_novisit + + def preprocess_html(self, soup): + # General UI tweaks + # move subheading to before byline (instead of where it is now, after) + subheading_ele = soup.find(class_="subheading") + byline_ele = soup.find(class_="byline") + if byline_ele and subheading_ele: + byline_ele.insert_before(subheading_ele.extract()) + + # strip extraneous stuff from author bio + for bio in soup.find_all(class_="author-bio"): + for dec_ele in bio.find_all("br"): + dec_ele.decompose() + for unwrap_ele in bio.find_all("p"): + unwrap_ele.unwrap() + + # remove extraneous hr + for hr in soup.select(".after-post-content hr"): + hr.decompose() + return soup def parse_index(self): - # find current issue - soup = self.index_to_soup('https://harpers.org/') - currentIssue_url = soup.find(attrs={'data-current-issue-url': True})['data-current-issue-url'] - self.log('Found issue at:', currentIssue_url) + if not _issue_url: + issues_soup = self.index_to_soup("https://harpers.org/issues/") + curr_issue_a_ele = issues_soup.select_one("div.issue-card a") + curr_issue_url = urljoin(self.base_url, curr_issue_a_ele["href"]) + else: + curr_issue_url = _issue_url - # go to the current issue - soup = self.index_to_soup(currentIssue_url) - self.timefmt = u' [%s]' % self.tag_to_string(soup.find('a', href=currentIssue_url)) + soup = self.index_to_soup(curr_issue_url) + self.timefmt = ( + f' [{self.tag_to_string(soup.find("h1", class_="issue-heading")).strip()}]' + ) + self.cover_url = soup.find("img", class_="cover-img")["src"] - # get cover - self.cover_url = soup.find(**classes('past-issue')).find('img')['src'] - self.log('Found cover at:', self.cover_url) - features = [] - - self.log('Features') - for item in soup.find(**classes('issue-features')).findAll(**classes('article-card')): - h = item.find(**classes('ac-title')) - a = h.parent - url = a['href'] - title = self.tag_to_string(h).strip() - h = item.find(**classes('ac-subtitle')) - if h is not None: - st = self.tag_to_string(h).strip() - if st: - title += ': ' + st - desc = '' - p = item.find(**classes('byline')) - if p is not None: - desc += self.tag_to_string(p) - self.log(' ', title, 'at', url) - features.append({'title': title, 'url': url, 'description': desc}) - - readings = [] - self.log('Readings') - for item in soup.find(**classes('issue-readings')).findAll(**classes('reading-item')): - a = item.find('a', **classes('ac-title')) - title = self.tag_to_string(a).strip() - url = a['href'] - desc = '' - a = item.find(**classes('ac-author')) - if a is not None: - desc = self.tag_to_string(a) - self.log(' ', title, 'at', url) - readings.append({'title': title, 'url': url, 'description': desc}) - - return [('Features', features), ('Readings', readings)] + articles = {} + for section_name in ("features", "readings", "articles"): + section = soup.find("section", class_=f"issue-{section_name}") + if not section: + continue + for card in section.find_all("div", class_="article-card"): + title_ele = card.find(class_="ac-title") + if not title_ele: + continue + article_url = card.find("a")["href"] + article_title = self.tag_to_string(title_ele) + article_description = ( + f'{self.tag_to_string(card.find(class_="ac-tax"))} ' + f'{self.tag_to_string(card.find(class_="ac-subtitle"))}' + ).strip() + byline = card.find(class_="byline") + if byline: + article_description += ( + f' {self.tag_to_string(byline).strip().strip(",")}' + ) + articles.setdefault(section_name.title(), []).append( + { + "url": article_url, + "title": article_title, + "description": article_description, + } + ) + return articles.items() diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe index f0b0c0218e..8f3081c48b 100644 --- a/recipes/hbr.recipe +++ b/recipes/hbr.recipe @@ -1,124 +1,182 @@ -from calibre.web.feeds.news import BasicNewsRecipe, classes -from datetime import datetime -from calibre import browser -from collections import OrderedDict +import json import re +from collections import OrderedDict +from urllib.parse import urlencode, urljoin + +from calibre import browser, random_user_agent +from calibre.web.feeds.news import BasicNewsRecipe, classes +from mechanize import Request + +_issue_url = "" # custom issue url class HBR(BasicNewsRecipe): - title = 'Harvard Business Review' - __author__ = 'unkn0wn' + title = "Harvard Business Review" + __author__ = "unkn0wn, updated by ping" description = ( - 'Harvard Business Review is the leading destination for smart management thinking.' - ' Through its flagship magazine, books, and digital content and tools published on HBR.org,' - ' Harvard Business Review aims to provide professionals around the world with rigorous insights' - ' and best practices to help lead themselves and their organizations more effectively and to make a positive impact.') - language = 'en' - use_embedded_content = False - no_stylesheets = True + "Harvard Business Review is the leading destination for smart management thinking. " + "Through its flagship magazine, books, and digital content and tools published on HBR.org, " + "Harvard Business Review aims to provide professionals around the world with rigorous insights " + "and best practices to help lead themselves and their organizations more effectively and to " + "make a positive impact." + ) + language = "en" + masthead_url = "https://hbr.org/resources/css/images/hbr_logo.svg" + publication_type = "magazine" + encoding = "utf-8" remove_javascript = True - masthead_url = 'http://hbr.org/resources/css/images/hbr_logo.svg' - remove_attributes = ['height', 'width', 'style'] - encoding = 'utf-8' - ignore_duplicate_articles = {'url'} - extra_css = ''' - article-sidebar{font-family:Georgia,"Times New Roman",Times,serif; border:ridge; text-align:left;} - [close-caption]{ border:ridge; font-size:small; text-align:center;} - article-ideainbrief{font-family:Georgia,"Times New Roman",Times,serif; text-align:left; font-style:italic; } - .article-byline-list{font-size:small;} - .credits--hero-image{font-size:small;} - .credits--inline-image{font-size:small;} - .caption--inline-image{font-size:small;} - .description-text{font-size:small; color:gray;} - .right-rail--container{font-size:small; color:#4c4c4c;} - .link--black{font-size:small;} - .article-callout{color:#4c4c4c; text-align:center;} - .slug-content{color:gray;} - ''' + no_stylesheets = True + auto_cleanup = False + compress_news_images = True + ignore_duplicate_articles = {"url"} + base_url = "https://hbr.org" + + remove_attributes = ["height", "width", "style"] + extra_css = """ + h1.article-hed { font-size: x-large; margin-bottom: 0.4rem; } + .article-dek { font-size: large; font-style: italic; margin-bottom: 1rem; } + .article-byline { margin-top: 0.7rem; font-size: medium; font-style: normal; font-weight: bold; } + .pub-date { font-size: small; margin-bottom: 1rem; } + img { + display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto; + box-sizing: border-box; + } + .container--caption-credits-hero, .container--caption-credits-inline, span.credit { font-size: small; } + .question { font-weight: bold; } + .description-text { + margin: 1rem 0; + border-top: 1px solid gray; + padding-top: 0.5rem; + font-style: italic; + } + """ keep_only_tags = [ classes( - 'headline-container hero-image-content article-summary article-body standard-content' - ' article-dek-group article-dek slug-container' + "headline-container article-dek-group pub-date hero-image-content " + "article-body standard-content" ), - dict(name='article-sidebar'), ] remove_tags = [ classes( - 'left-rail--container translate-message follow-topic newsletter-container ' + "left-rail--container translate-message follow-topic " + "newsletter-container by-prefix related-topics--common" ), + dict(name=["article-sidebar"]), ] + def preprocess_raw_html(self, raw_html, article_url): + soup = self.soup(raw_html) + + # break author byline out of list + byline_list = soup.find("ul", class_="article-byline-list") + if byline_list: + byline = byline_list.parent + byline.append( + ", ".join( + [ + self.tag_to_string(author) + for author in byline_list.find_all(class_="article-author") + ] + ) + ) + byline_list.decompose() + + # Extract full article content + content_ele = soup.find( + "content", + attrs={ + "data-index": True, + "data-page-year": True, + "data-page-month": True, + "data-page-seo-title": True, + "data-page-slug": True, + }, + ) + endpoint_url = "https://hbr.org/api/article/piano/content?" + urlencode( + { + "year": content_ele["data-page-year"], + "month": content_ele["data-page-month"], + "seotitle": content_ele["data-page-seo-title"], + } + ) + data = { + "contentKey": content_ele["data-index"], + "pageSlug": content_ele["data-page-slug"], + } + headers = { + "User-Agent": random_user_agent(), + "Pragma": "no-cache", + "Cache-Control": "no-cache", + "Content-Type": "application/json", + "Referer": article_url, + } + br = browser() + req = Request( + endpoint_url, + headers=headers, + data=json.dumps(data), + method="POST", + timeout=self.timeout, + ) + res = br.open(req) + article = json.loads(res.read()) + new_soup = self.soup(article["content"]) + # clear out existing partial content + for c in list(content_ele.children): + c.extract() # use extract() instead of decompose() because of strings + content_ele.append(new_soup.body) + return str(soup) + def parse_index(self): - soup = self.index_to_soup('https://hbr.org/magazine') - a = soup.find('a', href=lambda x: x and x.startswith('/archive-toc/')) - url = a['href'] - self.log('Downloading issue:', url) - cov_url = a.find('img', attrs={'src': True})['src'] - self.cover_url = 'https://hbr.org' + cov_url - soup = self.index_to_soup('https://hbr.org' + url) + if not _issue_url: + soup = self.index_to_soup(f"{self.base_url}/magazine") + a = soup.find("a", href=lambda x: x and x.startswith("/archive-toc/")) + cov_url = a.find("img", attrs={"src": True})["src"] + self.cover_url = urljoin(self.base_url, cov_url) + issue_url = urljoin(self.base_url, a["href"]) + else: + issue_url = _issue_url + mobj = re.search(r"archive-toc/(?P(BR)?\d+)\b", issue_url) + if mobj: + self.cover_url = f'https://hbr.org/resources/images/covers/{mobj.group("issue")}_500.png' + + self.log("Downloading issue:", issue_url) + soup = self.index_to_soup(issue_url) + issue_title = soup.find("h1") + if issue_title: + self.timefmt = f" [{self.tag_to_string(issue_title)}]" feeds = OrderedDict() + for h3 in soup.find_all("h3", attrs={"class": "hed"}): + article_link_ele = h3.find("a") + if not article_link_ele: + continue - for h3 in soup.findAll('h3', attrs={'class': 'hed'}): - articles = [] - d = datetime.today() - for a in h3.findAll( - 'a', href=lambda x: x.startswith('/' + d.strftime('%Y') + '/') - ): + article_ele = h3.find_next_sibling( + "div", attrs={"class": "stream-item-info"} + ) + if not article_ele: + continue - title = self.tag_to_string(a) - url = a['href'] - url = 'https://hbr.org' + url - div = h3.find_next_sibling('div', attrs={'class': 'stream-item-info'}) - if div: - aut = self.tag_to_string(div).replace('Magazine Article ', '') - auth = re.sub(r"(?<=\w)([A-Z])", r", \1", aut) - dek = h3.find_next_sibling('div', attrs={'class': 'dek'}) - if dek: - des = self.tag_to_string(dek) - desc = des + ' |' + auth.title() - sec = h3.findParent('li').find_previous_sibling('div', **classes('stream-section-label')).find('h4') - section_title = self.tag_to_string(sec).title() - self.log(section_title) - self.log('\t', title) - self.log('\t', desc) - self.log('\t\t', url) + title = self.tag_to_string(article_link_ele) + url = urljoin(self.base_url, article_link_ele["href"]) - articles.append({ - 'title': title, - 'url': url, - 'description': desc}) - if articles: - if section_title not in feeds: - feeds[section_title] = [] - feeds[section_title] += articles - ans = [(key, val) for key, val in feeds.items()] - return ans + authors_ele = article_ele.select("ul.byline li") + authors = ", ".join([self.tag_to_string(a) for a in authors_ele]) - def preprocess_html(self, soup): - for slug in soup.findAll(**classes('slug-content')): - del slug['href'] - for dek in soup.findAll(**classes('article-byline')): - for by in dek.findAll('span', attrs={'class':'by-prefix'}): - by.extract() - for li in dek.findAll('li'): - li.name = 'span' - for h2 in soup.findAll(('h2','h3')): - h2.name = 'h5' - return soup - - # HBR changes the content it delivers based on cookies, so the - # following ensures that we send no cookies - def get_browser(self, *args, **kwargs): - return self - - def clone_browser(self, *args, **kwargs): - return self.get_browser() - - def open_novisit(self, *args, **kwargs): - br = browser() - return br.open_novisit(*args, **kwargs) - - open = open_novisit + article_desc = "" + dek_ele = h3.find_next_sibling("div", attrs={"class": "dek"}) + if dek_ele: + article_desc = self.tag_to_string(dek_ele) + " | " + authors + section_ele = ( + h3.findParent("li") + .find_previous_sibling("div", **classes("stream-section-label")) + .find("h4") + ) + section_title = self.tag_to_string(section_ele).title() + feeds.setdefault(section_title, []).append( + {"title": title, "url": url, "description": article_desc} + ) + return feeds.items() diff --git a/recipes/high_country_news.recipe b/recipes/high_country_news.recipe index 547c8b728c..c9b564579e 100644 --- a/recipes/high_country_news.recipe +++ b/recipes/high_country_news.recipe @@ -11,9 +11,11 @@ from calibre.web.feeds.news import BasicNewsRecipe class HighCountryNews(BasicNewsRecipe): ## # Written: 2012-01-28 - # Last Edited: 2022-08-17 + # Last Edited: 2023-06-30 # - # Remark: Version 2.2 + # Remark: Version 2.3 + # Update language to fix how it appears in UI tree + # Version 2.2 # Update RSS feeds to hcn.org and keep the old feedburner feeds still in place # as there are some old articles available only at feedburner adress # 2019-07-04 @@ -39,7 +41,7 @@ class HighCountryNews(BasicNewsRecipe): publisher = 'High Country News' category = 'News, Politics, Social, Nature, Environmental, Western United States, Native American' timefmt = ' [%a, %d %b %Y]' - language = 'en-Us' + language = 'en' encoding = 'UTF-8' publication_type = 'newspaper' oldest_article = 30 @@ -48,6 +50,7 @@ class HighCountryNews(BasicNewsRecipe): auto_cleanup = False remove_javascript = True remove_empty_feeds = True + remove_attributes = ['width', 'height'] use_embedded_content = False masthead_url = 'http://www.hcn.org/logo.jpg' diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index 7101a88bf1..c8a1a75e70 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -4,33 +4,50 @@ from collections import defaultdict from datetime import date from calibre.web.feeds.news import BasicNewsRecipe, classes - def absurl(url): if url.startswith('/'): url = 'https://www.thehindu.com' + url return url - -local_edition = None # Chennai is default edition, for other editions use 'th_hyderabad', 'th_bangalore', 'th_delhi', 'th_kolkata' etc +local_edition = None +# For past editions, set date to, for example, '2023-01-28' +past_edition = None + +is_monday = date.today().weekday() == 0 +is_friday = date.today().weekday() == 4 +is_saturday = date.today().weekday() == 5 +is_sunday = date.today().weekday() == 6 + +if past_edition: + year, month, day = (int(x) for x in past_edition.split('-')) + dt = date(year, month, day) + is_monday = dt.weekday() == 0 + is_saturday = dt.weekday() == 5 + is_sunday = dt.weekday() == 6 class TheHindu(BasicNewsRecipe): title = 'The Hindu' __author__ = 'unkn0wn' + description = 'Articles from The Hindu, Today\'s Paper.' language = 'en_IN' no_stylesheets = True masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg' remove_attributes = ['style', 'height', 'width'] - extra_css = '.caption{font-size:small; text-align:center;}'\ - '.author{font-size:small; font-weight:bold;}'\ - '.subhead, .subhead_lead {font-weight:bold;}'\ - 'img {display:block; margin:0 auto;}' + + extra_css = ''' + .caption {font-size:small; text-align:center;} + .author, .dateLine {font-size:small; font-weight:bold;} + .subhead, .subhead_lead, .bold {font-weight:bold;} + img {display:block; margin:0 auto;} + .italic {font-style:italic; color:#202020;} + ''' ignore_duplicate_articles = {'url'} keep_only_tags = [ - classes('article-section ') + classes('article-section') ] remove_tags = [ @@ -44,14 +61,42 @@ class TheHindu(BasicNewsRecipe): img['src'] = img['data-original'] return soup + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + if not past_edition: + self.title = 'The Hindu ' + date.today().strftime('%b %d, %Y') + else: + self.title = 'The Hindu ' + dt.strftime('%b %d, %Y') + def parse_index(self): - if local_edition: - yr = str(date.today().year) - mn = date.today().strftime('%m') - dy = date.today().strftime('%d') - url = 'https://www.thehindu.com/todays-paper/' + yr + '-' + mn + '-' + dy + '/' + local_edition + '/' + mag_url = None + global local_edition + if local_edition or past_edition: + if local_edition is None: + local_edition = 'th_chennai' + today = date.today().strftime('%Y-%m-%d') + if past_edition: + today = past_edition + self.log('Downloading past edition of', local_edition + ' from ' + today) + url = absurl('/todays-paper/' + today + '/' + local_edition + '/') + if is_monday: + mag_url = url + '?supplement=' + local_edition + '-epbs' + if is_saturday: + mag_url = url + '?supplement=' + local_edition + '-mp' + if is_sunday: + mag_url = url + '?supplement=' + local_edition + '-sm' else: url = 'https://www.thehindu.com/todays-paper/' + if is_monday: + mag_url = url + '?supplement=th_chennai-epbs' + if is_friday: + mag_url = url + '?supplement=th_chennai-fr' + if is_saturday: + mag_url = url + '?supplement=th_chennai-mp' + if is_sunday: + mag_url = url + '?supplement=th_chennai-sm' + raw = self.index_to_soup(url, raw=True) soup = self.index_to_soup(raw) ans = self.hindu_parse_index(soup) @@ -62,15 +107,23 @@ class TheHindu(BasicNewsRecipe): raise ValueError( 'The Hindu Newspaper is not published Today.' ) + if mag_url: + self.log('\nFetching Magazine') + soup = self.index_to_soup(mag_url) + ans2 = self.hindu_parse_index(soup) + if ans2: + return ans + ans2 + self.log('\tMagazine not Found') + return ans return ans def hindu_parse_index(self, soup): for script in soup.findAll('script'): - if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'): + if not self.tag_to_string(script).__contains__('grouped_articles = {"'): continue if script is not None: - art = re.search(r'grouped_articles = ({\"[^<]+?]})', self.tag_to_string(script)) - data = json.loads(art.group(1)) + art = re.search(r'grouped_articles = ({\".*)', self.tag_to_string(script)) + data = json.JSONDecoder().raw_decode(art.group(1))[0] feeds_dict = defaultdict(list) diff --git a/recipes/hindu_business_line_print_edition.recipe b/recipes/hindu_business_line_print_edition.recipe index 7a83251a9e..c61df77105 100644 --- a/recipes/hindu_business_line_print_edition.recipe +++ b/recipes/hindu_business_line_print_edition.recipe @@ -39,7 +39,7 @@ class BusinessLine(BasicNewsRecipe): ] remove_tags = [ - classes('hide-mobile comments-shares share-page editiondetails') + classes('hide-mobile comments-shares share-page editiondetails author-img') ] def preprocess_html(self, soup): @@ -50,13 +50,13 @@ class BusinessLine(BasicNewsRecipe): return soup def parse_index(self): + dt = date.today().strftime('%Y-%m-%d') + # For past editions, set date to, for example, '2023-01-28' + # dt = '2023-01-28' if local_edition: - yr = str(date.today().year) - mn = date.today().strftime('%m') - dy = date.today().strftime('%d') - url = absurl('/todays-paper/' + yr + '-' + mn + '-' + dy + '/' + local_edition + '/') + url = absurl('/todays-paper/' + dt + '/' + local_edition + '/') else: - url = 'https://www.thehindubusinessline.com/todays-paper/' + url = absurl('/todays-paper/' + dt + '/bl_chennai/') raw = self.index_to_soup(url, raw=True) soup = self.index_to_soup(raw) ans = self.hindu_parse_index(soup) @@ -74,8 +74,8 @@ class BusinessLine(BasicNewsRecipe): if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'): continue if script is not None: - art = re.search(r'grouped_articles = ({\"[^<]+?]})', self.tag_to_string(script)) - data = json.loads(art.group(1)) + art = re.search(r'grouped_articles = ({\".*)', self.tag_to_string(script)) + data = json.JSONDecoder().raw_decode(art.group(1))[0] feeds_dict = defaultdict(list) diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe new file mode 100644 index 0000000000..9018824cc5 --- /dev/null +++ b/recipes/hindufeeds.recipe @@ -0,0 +1,86 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes +from datetime import date + +class TheHindufeeds(BasicNewsRecipe): + title = 'The Hindu (Feeds)' + __author__ = 'unkn0wn' + description = 'The Hindu, based on RSS feeds.' + language = 'en_IN' + no_stylesheets = True + masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg' + remove_attributes = ['style', 'height', 'width'] + resolve_internal_links = True + remove_empty_feeds = True + max_articles_per_feed = 25 + oldest_article = 1.15 # days + + extra_css = ''' + .caption {font-size:small; text-align:center;} + .author, .dateLine, .publish-time {font-size:small; font-weight:bold;} + .subhead, .subhead_lead, .bold {font-weight:bold;} + img {display:block; margin:0 auto;} + .italic {font-style:italic; color:#202020;} + ''' + + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + classes('article-section') + ] + + remove_tags = [ + dict(name='button'), + dict(attrs={'target':'_self'}), + classes( + 'hide-mobile comments-shares share-page editiondetails breadcrumb' + ' related-topics related-stories also-read premium-label' + ) + ] + + def preprocess_html(self, soup): + for cap in soup.findAll('p', attrs={'class':'caption'}): + cap.name = 'figcaption' + for img in soup.findAll('img', attrs={'data-original':True}): + if img['data-original'].endswith('1x1_spacer.png'): + source = img.findPrevious('source', srcset=True) + img.extract() + if source: + source['src'] = source['srcset'].replace('_320','_1200') + source.name = 'img' + else: + img['src'] = img['data-original'] + for img in soup.findAll('img', attrs={'data-src-template':True}): + img['src'] = img['data-src-template'] + return soup + + def postprocess_html(self, soup, first_fetch): + for src in soup.findAll('source'): + src.extract() + return soup + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'The Hindu (Feeds) ' + date.today().strftime('%b %d, %Y') + + + def get_cover_url(self): + soup = self.index_to_soup('https://www.thehindu.com/todays-paper/') + if cover := soup.find(attrs={'class':'hindu-ad'}): + return cover.img['src'] + + # https://www.thehindu.com/rssfeeds/ + feeds = [ + ('India', 'https://www.thehindu.com/news/national/feeder/default.rss'), + ('States', 'https://www.thehindu.com/news/states/feeder/default.rss'), + # ('Cities', 'https://www.thehindu.com/news/cities/feeder/default.rss'), + ('Opinion', 'https://www.thehindu.com/opinion/feeder/default.rss'), + ('Business', 'https://www.thehindu.com/business/feeder/default.rss'), + ('World', 'https://www.thehindu.com/news/international/feeder/default.rss'), + # ('Sport', 'https://www.thehindu.com/sport/feeder/default.rss'), + ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'), + # ('Crossword', 'https://crossword.thehindu.com/?utm_source=thehindu&utm_medium=mainmenufeeder/default.rss'), + ('Science', 'https://www.thehindu.com/sci-tech/science/feeder/default.rss'), + ('Life and Style', 'https://www.thehindu.com/life-and-style/feeder/default.rss'), + ('thRead', 'https://www.thehindu.com/thread/feeder/default.rss') + ] diff --git a/recipes/hindustan_times_print.recipe b/recipes/hindustan_times_print.recipe new file mode 100644 index 0000000000..295aaf8660 --- /dev/null +++ b/recipes/hindustan_times_print.recipe @@ -0,0 +1,113 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date +from collections import defaultdict + +# figure out your local_edition from the fetch news log of this recipe +local_edition = 'Delhi' + +today = date.today().strftime('%d/%m/%Y') + +# for older edition, change today +# today = '22/12/2023' + +day, month, year = (int(x) for x in today.split('/')) +dt = date(year, month, day) +today = today.replace('/', '%2F') + +index = 'https://epaper.hindustantimes.com' + +class ht(BasicNewsRecipe): + title = 'Hindustan Times Print Edition' + language = 'en_IN' + __author__ = 'unkn0wn' + masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the Hindustan Times epaper, digital edition' + encoding = 'utf-8' + delay = 1 + ignore_duplicate_articles = {'title'} + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def parse_index(self): + + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + + get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today + edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) + cities = [] + for edi in edi_data: + cities.append(edi['EditionName']) + self.log('## For your local_edition, modify this recipe to match your city from the names below\n(', ', '.join(cities), ')\n') + for edi in edi_data: + if edi['EditionName'] == local_edition: + edi_name = edi['EditionName'] + edi_id = str(edi['EditionId']) + self.log('Downloading', edi_name, 'Edition') + + url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today + main_data = json.loads(self.index_to_soup(url, raw=True)) + + feeds_dict = defaultdict(list) + + for page in main_data: + page_no = page['PageNumber'] + sec_name = page['NewsProPageTitle'] + if sec_name == 'Full Page Ad': + continue + if sec_name.startswith('Front'): + self.cover_url = page['HighResolution'] + art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId']) + self.log(sec_name, ' ', page_no) + art_data = json.loads(self.index_to_soup(art, raw=True)) + for snaps in art_data: + section = sec_name + url = str(snaps['OrgId']) + title = ' '.join(snaps['StoryTitle'].split()[:15]) + if not title: + continue + desc = page_no + self.log('\t', title, ' ', desc) + feeds_dict[section].append({"title": title, "description": desc, "url": url}) + return [(section, articles) for section, articles in feeds_dict.items()] + + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data['StoryContent']: + if x['Headlines']: + if len(x['Headlines']) > 0: + body += '

    ' + x['Headlines'][0].replace('\n', ' ') + '

    ' + for y in x['Headlines'][1:]: + body += '

    ' + y.replace('\n', ' ') + '

    ' + if data['LinkPicture']: + for pics in data['LinkPicture']: + if pics['fullpathlinkpic']: + body += '
    '.format(pics['fullpathlinkpic']) + if pics['caption']: + body += '
    ' + pics['caption'] + '

    ' + for x in data['StoryContent']: + if x['Body']: + body += x['Body'] + # if data['filepathstorypic']: # this gives you a snap image of the article from page + # body += '

    '.format(data['filepathstorypic'].replace('\\', '/')) + return '
    ' + body + '
    ' + + def print_version(self, url): + return index + '/User/ShowArticleView?OrgId=' + url + + def populate_article_metadata(self, article, soup, first): + article.url = '***' diff --git a/recipes/hindutamil.recipe b/recipes/hindutamil.recipe new file mode 100644 index 0000000000..02d831501a --- /dev/null +++ b/recipes/hindutamil.recipe @@ -0,0 +1,73 @@ +from calibre.ptempfile import PersistentTemporaryFile +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class hindutamil(BasicNewsRecipe): + title = 'இந்து தமிழ் திசை' + __author__ = 'unkn0wn' + description = ( + 'Hindu Tamil Thisai stands differentiated from the rest of the language dailies in Tamil Nadu ' + 'through its unbiased news coverage, in-depth analysis of international, national and local issues.' + ) + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'ta' + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://static.hindutamil.in/hindu/static/store/images/logo.png' + + def get_browser(self): + return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + + keep_only_tags = [ + classes('main-article') + ] + + remove_tags = [ + classes('newsbot-ads article-details-ads-inner art-follow-title1 dont-miss-it') + ] + + ignore_duplicate_articles = {'title'} + remove_empty_feeds = True + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + feeds = [] + + sections = [ + ('தமிழகம்', 'tamilnadu'), + ('இந்தியா', 'india'), + ('கருத்துப் பேழை', 'opinion'), + ('உலகம்', 'world'), + ('வணிகம்', 'business'), + # ('விளையாட்டு', 'sports'), + # ('தமிழ் சினிமா', 'cinema'), + ('தொழில்நுட்பம்', 'technology'), + # ('இணைப்பிதழ்கள்', 'supplements'), + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:hindutamil.in%2Fnews{}&hl=ta-IN&gl=IN&ceid=IN:ta' + feeds.append((sec[0], a.format('%2F' + sec[1] + '%2F'))) + # feeds.append(('Others', a.format(''))) diff --git a/recipes/history_today.recipe b/recipes/history_today.recipe index e536682f35..61f17128d8 100644 --- a/recipes/history_today.recipe +++ b/recipes/history_today.recipe @@ -23,13 +23,13 @@ class HistoryToday(BasicNewsRecipe): def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: - br.open('http://www.historytoday.com/user/login') + br.open('https://www.historytoday.com/user/login') br.select_form(nr=1) br['name'] = self.username br['pass'] = self.password res = br.submit() raw = res.read() - if 'Session limit exceeded' in raw: + if b'Session limit exceeded' in raw: br.select_form(nr=1) control = br.find_control('sid').items[1] sid = [] @@ -40,13 +40,13 @@ class HistoryToday(BasicNewsRecipe): def parse_index(self): # Find date - soup0 = self.index_to_soup('http://www.historytoday.com/') + soup0 = self.index_to_soup('https://www.historytoday.com/') dates = self.tag_to_string(soup0.find( 'div', attrs={'id': 'block-block-226'}).span) self.timefmt = u' [%s]' % dates # Go to issue - soup = self.index_to_soup('http://www.historytoday.com/contents') + soup = self.index_to_soup('https://www.historytoday.com/contents') cover = soup.find('div', attrs={ 'id': 'content-area'}).find('img', attrs={'src': re.compile('.*cover.*')})['src'] self.cover_url = cover @@ -69,12 +69,12 @@ class HistoryToday(BasicNewsRecipe): if len(subarticle) < 2: continue title = self.tag_to_string(subarticle[0]) - originalurl = "http://www.historytoday.com" + \ + originalurl = "https://www.historytoday.com" + \ subarticle[0].span.a['href'].strip() originalpage = self.index_to_soup(originalurl) printurl = originalpage.find( 'div', attrs={'id': 'ht-tools'}).a['href'].strip() - url = "http://www.historytoday.com" + printurl + url = "https://www.historytoday.com" + printurl desc = self.tag_to_string(subarticle[1]) articles.append({'title': title, 'url': url, 'description': desc, 'date': ''}) @@ -88,4 +88,4 @@ class HistoryToday(BasicNewsRecipe): return ans def cleanup(self): - self.browser.open('http://www.historytoday.com/logout') + self.browser.open('https://www.historytoday.com/logout') diff --git a/recipes/hna.recipe b/recipes/hna.recipe index b742143119..814476dd9e 100644 --- a/recipes/hna.recipe +++ b/recipes/hna.recipe @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal ' Fetch Hessisch Niedersachsische Allgemeine. ''' -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class hnaDe(BasicNewsRecipe): @@ -20,78 +20,28 @@ class hnaDe(BasicNewsRecipe): max_articles_per_feed = 40 no_stylesheets = True remove_javascript = True - auto_cleanup = True encoding = 'utf-8' + masthead_url = 'https://idcdn.de/west/assets/hna-de/img/logo--cf5324e1.svg' - remove_tags = [dict(id='topnav'), - dict(id='nav_main'), - dict(id='teaser'), - dict(id='suchen'), - dict(id='superbanner'), - dict(id='navigation'), - dict(id='skyscraper'), - dict(id='idHeaderSearchForm'), - dict(id='idHeaderSearchBar'), - dict(id='idLoginBarWrap'), - dict(id='idAccountButtons'), - dict(id='idHeadButtons'), - dict(id='idBoxesWrap'), - dict(id='idJSMainNavigation'), - dict(id=''), - dict(name='span'), - dict(name='ul', attrs={'class': 'linklist'}), - dict(name='ul', attrs={ - 'class': 'idMainNavi idJSActive idHeadHomeBtn'}), - dict(name='ul', attrs={ - 'class': 'idHiddenNavi idNaviSubcategories'}), - dict(name='a', attrs={'href': '#'}), - dict(name='a', attrs={'class': 'idImgLink'}), - dict(name='a', attrs={'class': 'idListLink'}), - dict(name='div', attrs={'class': 'hlist'}), - dict(name='div', attrs={'class': 'idTabWrap'}), - dict(name='li', attrs={ - 'class': 'idButton idIsLoginGroup idHeaderRegister '}), - dict(name='li', attrs={'class': 'idVideoBar idFirst'}), - dict(name='li', attrs={ - 'class': 'idSetStartPageLink idLast'}), - dict(name='li', attrs={'class': 'idKinderNetzBar idLast'}), - dict(name='li', attrs={'class': 'idFotoBar '}), - dict(name='div', attrs={'class': 'subc noprint'}), - dict(name='div', attrs={'class': 'idTxtLay'}), - dict(name='div', attrs={ - 'class': 'idLay idClStandard idStaticHtml'}), - dict(name='div', attrs={'class': 'idHeaderWrap'}), - dict(name='div', attrs={ - 'class': 'idLay idRss idClStandard'}), - dict(name='div', attrs={ - 'class': 'idLay idClStandard idLeadStoriesFocus idLeadStoriesFocusOverlay '}), - dict(name='div', attrs={ - 'class': 'idTeaserLay idTeaserFloat idMediaLeft idLast'}), - dict(name='div', attrs={ - 'class': 'idHeaderButtons idAccountButtons'}), - dict(name='div', attrs={ - 'class': 'idTeaserLay idTeaserWithImg idSize4 idMediaLeft'}), - dict(name='div', attrs={ - 'class': 'idHeaderButtons idHeadButtons'}), - dict(name='div', attrs={ - 'class': 'idHeaderButtons idSetStartPage'}), - dict(name='div', attrs={ - 'class': 'idLay idClHl idTeaserList '}), - dict(name='div', attrs={'class': 'idNavigationWrap'}), - dict(name='div', attrs={'class': 'idBreadcrumbWrap'}), - dict(name='div', attrs={'class': 'idBoxesWrap'}), - dict(name='div', attrs={'class': 'idBreadcrumb'}), - dict(name='div', attrs={ - 'class': 'idLay idAdvertising idClStandard '}), - dict(name='span', attrs={'class': 'idHeadLineIntro'}), - dict(name='p', attrs={'class': 'breadcrumb'}), - dict(name='a', attrs={'style': 'cursor:hand'}), - dict(name='p', attrs={'class': 'h5'}), - dict(name='p', attrs={'class': 'idMoreEnd'})] - remove_tags_after = [ - dict(name='div', attrs={'class': 'idTxtLay idStaticHtmlIEHelper'})] + def get_cover_url(self): + soup = self.index_to_soup('https://epaper.meinehna.de/') + if a := soup.find('a', attrs={'class':'edition-cover__link'}): + if citem := a.find('img', src=True): + return citem['src'] - feeds = [('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), - ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'), - ('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'), - ('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur')] + keep_only_tags = [ + dict(name='article', attrs={'class':lambda x: x and 'id-Story' in x.split()}) + ] + remove_tags = [ + classes( + 'id-DonaldBreadcrumb id-StoryElement-interactionBar id-Recommendation ' + 'id-Comments id-Comments--targetHelper id-StoryElement-inArticleReco' + ) + ] + + feeds = [ + ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), + ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'), + ('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'), + ('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur') + ] diff --git a/recipes/horizons.recipe b/recipes/horizons.recipe new file mode 100644 index 0000000000..fa95ad7a77 --- /dev/null +++ b/recipes/horizons.recipe @@ -0,0 +1,72 @@ +''' +https://www.cirsd.org/en/horizons +''' + +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class horizons(BasicNewsRecipe): + title = 'Horizons' + __author__ = 'unkn0wn' + description = (' Horizons – Journal of International Relations and Sustainable Development.' + ' Horizons serves as a high-level platform for influential voices from around the world to' + ' provide informed analysis and conduct reasoned exchanges on the full spectrum of issues' + ' that shape international developments.') + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en' + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.cirsd.org/bundles/olpublic/images/horizons-logo.jpg' + ignore_duplicate_articles = {'url'} + extra_css = 'em{color:#404040;}' + + keep_only_tags = [ + dict(name='div', attrs={'class':'article'}) + ] + remove_tags = [ + classes('back-link'), + dict(name='div', attrs={'class':'single-post-footer'}) + ] + + def get_browser(self): + return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) + + def parse_index(self): + soup = self.index_to_soup('https://www.cirsd.org/en/horizons') + a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition + url = a['href'] + if url.startswith('/'): + url = 'https://www.cirsd.org' + url + self.cover_url = a.find('img')['src'] + self.log(self.cover_url) + issue = a.find('div', attrs={'class':'horizon-gallery-title'}) + if issue: + self.title = self.tag_to_string(issue).strip() + self.timefmt = ' [' + self.tag_to_string(issue).strip().replace('Horizons ', '') + ']' + self.log('Downloading Issue: ', self.timefmt, self.title) + soup = self.index_to_soup(url) + + feeds = [] + for section in soup.findAll('h2', attrs={'class':'mt-3'}): + secname = self.tag_to_string(section).strip() + self.log(secname) + articles = [] + div = section.findNext('div', attrs={'class':'mb-3'}) + for li in div.findAll('li', attrs={'class':'mb-2'}): + a = li.find('a', href=True) + url = a['href'] + if url.startswith('/'): + url = 'https://www.cirsd.org' + url + title = self.tag_to_string(a) + span = li.find('span', attrs={'class':'section-author'}) + desc = '' + if span: + desc = self.tag_to_string(span).strip() + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({ + 'title': title, + 'url': url, + 'description': desc}) + if articles: + feeds.append((secname, articles)) + return feeds diff --git a/recipes/houston_chronicle.recipe b/recipes/houston_chronicle.recipe index e6ab9e50a2..70fdf55ee6 100644 --- a/recipes/houston_chronicle.recipe +++ b/recipes/houston_chronicle.recipe @@ -16,7 +16,7 @@ from collections import OrderedDict from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.BeautifulSoup import NavigableString -from calibre.utils.date import dt_factory, local_tz +from calibre.utils.date import dt_factory, local_tz, utcfromtimestamp regex_date_only = re.compile(r"""(?:January|February|March|April| {8}May|June|July|August|September|October|November| @@ -216,7 +216,7 @@ class HoustonChronicle(BasicNewsRecipe): summary = self.get_article_description_from_doc(soup) article_date = self.get_published_time_from_doc(soup) if article_date is not None: - article_timestamp = float((article_date - datetime.utcfromtimestamp(0)).total_seconds()) + article_timestamp = float((article_date - utcfromtimestamp(0)).total_seconds()) article.date = article_timestamp article.utctime = dt_factory(article_date.timetuple(), assume_utc=True, as_utc=True) article.localtime = article.utctime.astimezone(local_tz) diff --git a/recipes/icons/180.png b/recipes/icons/180.png index 57a5f410da..2e8da5b5a8 100644 Binary files a/recipes/icons/180.png and b/recipes/icons/180.png differ diff --git a/recipes/icons/1843.png b/recipes/icons/1843.png index d41a557c0f..eaad0b2eab 100644 Binary files a/recipes/icons/1843.png and b/recipes/icons/1843.png differ diff --git a/recipes/icons/20_minutos.png b/recipes/icons/20_minutos.png index 1992482711..165cf8e48b 100644 Binary files a/recipes/icons/20_minutos.png and b/recipes/icons/20_minutos.png differ diff --git a/recipes/icons/20minutes.png b/recipes/icons/20minutes.png index 81e23871cc..8be97a8997 100644 Binary files a/recipes/icons/20minutes.png and b/recipes/icons/20minutes.png differ diff --git a/recipes/icons/7seri.png b/recipes/icons/7seri.png index a8b9254ae2..a296733d85 100644 Binary files a/recipes/icons/7seri.png and b/recipes/icons/7seri.png differ diff --git a/recipes/icons/7x7.png b/recipes/icons/7x7.png index 9d8bbd02cf..c56e505aa4 100644 Binary files a/recipes/icons/7x7.png and b/recipes/icons/7x7.png differ diff --git a/recipes/icons/DrawAndCook.png b/recipes/icons/DrawAndCook.png index fc3b2a2abc..7fe589ac89 100644 Binary files a/recipes/icons/DrawAndCook.png and b/recipes/icons/DrawAndCook.png differ diff --git a/recipes/icons/TheMITPressReader.png b/recipes/icons/TheMITPressReader.png new file mode 100644 index 0000000000..fe56ca7974 Binary files /dev/null and b/recipes/icons/TheMITPressReader.png differ diff --git a/recipes/icons/aabenraalokalavisen_dk.png b/recipes/icons/aabenraalokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/aabenraalokalavisen_dk.png and b/recipes/icons/aabenraalokalavisen_dk.png differ diff --git a/recipes/icons/aachener_nachrichten.png b/recipes/icons/aachener_nachrichten.png deleted file mode 100644 index 8bc6e36174..0000000000 Binary files a/recipes/icons/aachener_nachrichten.png and /dev/null differ diff --git a/recipes/icons/aarhuslokalavisen_dk.png b/recipes/icons/aarhuslokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/aarhuslokalavisen_dk.png and b/recipes/icons/aarhuslokalavisen_dk.png differ diff --git a/recipes/icons/aarhusmidtlokalavisen_dk.png b/recipes/icons/aarhusmidtlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/aarhusmidtlokalavisen_dk.png and b/recipes/icons/aarhusmidtlokalavisen_dk.png differ diff --git a/recipes/icons/aarhusnordlokalavisen_dk.png b/recipes/icons/aarhusnordlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/aarhusnordlokalavisen_dk.png and b/recipes/icons/aarhusnordlokalavisen_dk.png differ diff --git a/recipes/icons/aarhussydlokalavisen_dk.png b/recipes/icons/aarhussydlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/aarhussydlokalavisen_dk.png and b/recipes/icons/aarhussydlokalavisen_dk.png differ diff --git a/recipes/icons/aarhusvestlokalavisen_dk.png b/recipes/icons/aarhusvestlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/aarhusvestlokalavisen_dk.png and b/recipes/icons/aarhusvestlokalavisen_dk.png differ diff --git a/recipes/icons/abc.png b/recipes/icons/abc.png index d30c724b34..bcce79c89f 100644 Binary files a/recipes/icons/abc.png and b/recipes/icons/abc.png differ diff --git a/recipes/icons/abc_au.png b/recipes/icons/abc_au.png index e1b14c72c9..77f1fb6764 100644 Binary files a/recipes/icons/abc_au.png and b/recipes/icons/abc_au.png differ diff --git a/recipes/icons/abc_py.png b/recipes/icons/abc_py.png index e6138769cc..d5d9c7f8e1 100644 Binary files a/recipes/icons/abc_py.png and b/recipes/icons/abc_py.png differ diff --git a/recipes/icons/ad.png b/recipes/icons/ad.png index 30ec236871..0c38d6d491 100644 Binary files a/recipes/icons/ad.png and b/recipes/icons/ad.png differ diff --git a/recipes/icons/adnkronos.png b/recipes/icons/adnkronos.png index d783965df5..d7973c7bab 100644 Binary files a/recipes/icons/adnkronos.png and b/recipes/icons/adnkronos.png differ diff --git a/recipes/icons/adventuregamers.png b/recipes/icons/adventuregamers.png index 1d548e6291..37bebf1c7a 100644 Binary files a/recipes/icons/adventuregamers.png and b/recipes/icons/adventuregamers.png differ diff --git a/recipes/icons/afr.png b/recipes/icons/afr.png new file mode 100644 index 0000000000..f3deacc513 Binary files /dev/null and b/recipes/icons/afr.png differ diff --git a/recipes/icons/aftenposten.png b/recipes/icons/aftenposten.png deleted file mode 100644 index c7f427aafc..0000000000 Binary files a/recipes/icons/aftenposten.png and /dev/null differ diff --git a/recipes/icons/aftonbladet.png b/recipes/icons/aftonbladet.png index 54edcd849d..0964e9183d 100644 Binary files a/recipes/icons/aftonbladet.png and b/recipes/icons/aftonbladet.png differ diff --git a/recipes/icons/agrogerila.png b/recipes/icons/agrogerila.png deleted file mode 100644 index a95cbfc2b6..0000000000 Binary files a/recipes/icons/agrogerila.png and /dev/null differ diff --git a/recipes/icons/ainonline.png b/recipes/icons/ainonline.png index e7e4834100..480d31e156 100644 Binary files a/recipes/icons/ainonline.png and b/recipes/icons/ainonline.png differ diff --git a/recipes/icons/air_force_times.png b/recipes/icons/air_force_times.png deleted file mode 100644 index dcae18de1e..0000000000 Binary files a/recipes/icons/air_force_times.png and /dev/null differ diff --git a/recipes/icons/ajc.png b/recipes/icons/ajc.png index 7cac2ddd0b..ac4fcc76d0 100644 Binary files a/recipes/icons/ajc.png and b/recipes/icons/ajc.png differ diff --git a/recipes/icons/ajiajin.png b/recipes/icons/ajiajin.png index 850549d75c..3a9f3a26e7 100644 Binary files a/recipes/icons/ajiajin.png and b/recipes/icons/ajiajin.png differ diff --git a/recipes/icons/aksiyon_derigisi.png b/recipes/icons/aksiyon_derigisi.png index 6edbd48ad6..3af2487f32 100644 Binary files a/recipes/icons/aksiyon_derigisi.png and b/recipes/icons/aksiyon_derigisi.png differ diff --git a/recipes/icons/aktualne.cz.png b/recipes/icons/aktualne.cz.png index c84ab48ac0..e633413723 100644 Binary files a/recipes/icons/aktualne.cz.png and b/recipes/icons/aktualne.cz.png differ diff --git a/recipes/icons/al_jazeera.png b/recipes/icons/al_jazeera.png index 92aaeaaa23..efea8df449 100644 Binary files a/recipes/icons/al_jazeera.png and b/recipes/icons/al_jazeera.png differ diff --git a/recipes/icons/al_masry_alyoum_arabic.png b/recipes/icons/al_masry_alyoum_arabic.png index 92f21891cf..0bc59e7f23 100644 Binary files a/recipes/icons/al_masry_alyoum_arabic.png and b/recipes/icons/al_masry_alyoum_arabic.png differ diff --git a/recipes/icons/al_monitor.png b/recipes/icons/al_monitor.png index aa93606aed..d2ac87f67e 100644 Binary files a/recipes/icons/al_monitor.png and b/recipes/icons/al_monitor.png differ diff --git a/recipes/icons/albert_mohler.png b/recipes/icons/albert_mohler.png index 6df02e4330..d153d6d6f2 100644 Binary files a/recipes/icons/albert_mohler.png and b/recipes/icons/albert_mohler.png differ diff --git a/recipes/icons/alleroedlokalavisen_dk.png b/recipes/icons/alleroedlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/alleroedlokalavisen_dk.png and b/recipes/icons/alleroedlokalavisen_dk.png differ diff --git a/recipes/icons/alt_om_herning.png b/recipes/icons/alt_om_herning.png index e10faded09..f20ec93408 100644 Binary files a/recipes/icons/alt_om_herning.png and b/recipes/icons/alt_om_herning.png differ diff --git a/recipes/icons/alternet.png b/recipes/icons/alternet.png index 280e3abbe4..c414410f92 100644 Binary files a/recipes/icons/alternet.png and b/recipes/icons/alternet.png differ diff --git a/recipes/icons/altomdata_dk.png b/recipes/icons/altomdata_dk.png index 3688b6993f..22815b8eee 100644 Binary files a/recipes/icons/altomdata_dk.png and b/recipes/icons/altomdata_dk.png differ diff --git a/recipes/icons/am730.png b/recipes/icons/am730.png index 2bbba2a517..c96929822b 100644 Binary files a/recipes/icons/am730.png and b/recipes/icons/am730.png differ diff --git a/recipes/icons/ambito.png b/recipes/icons/ambito.png index 76ca2296b6..e1d29899e0 100644 Binary files a/recipes/icons/ambito.png and b/recipes/icons/ambito.png differ diff --git a/recipes/icons/ambito_financiero.png b/recipes/icons/ambito_financiero.png index 76ca2296b6..e1d29899e0 100644 Binary files a/recipes/icons/ambito_financiero.png and b/recipes/icons/ambito_financiero.png differ diff --git a/recipes/icons/amspec.png b/recipes/icons/amspec.png index 4ae255b4c1..6057ca86e5 100644 Binary files a/recipes/icons/amspec.png and b/recipes/icons/amspec.png differ diff --git a/recipes/icons/anandtech.png b/recipes/icons/anandtech.png index f416e26b3e..b425ef5f71 100644 Binary files a/recipes/icons/anandtech.png and b/recipes/icons/anandtech.png differ diff --git a/recipes/icons/anchorage_daily.png b/recipes/icons/anchorage_daily.png index 761bc1053e..c401d0a337 100644 Binary files a/recipes/icons/anchorage_daily.png and b/recipes/icons/anchorage_daily.png differ diff --git a/recipes/icons/andhrajyothy_ap.png b/recipes/icons/andhrajyothy_ap.png new file mode 100644 index 0000000000..f0a09c537a Binary files /dev/null and b/recipes/icons/andhrajyothy_ap.png differ diff --git a/recipes/icons/andhrajyothy_tel.png b/recipes/icons/andhrajyothy_tel.png new file mode 100644 index 0000000000..f0a09c537a Binary files /dev/null and b/recipes/icons/andhrajyothy_tel.png differ diff --git a/recipes/icons/animal_politico.png b/recipes/icons/animal_politico.png index f84385c28c..5bd813ae05 100644 Binary files a/recipes/icons/animal_politico.png and b/recipes/icons/animal_politico.png differ diff --git a/recipes/icons/anthony_muroni.png b/recipes/icons/anthony_muroni.png index 791faadb79..4a0df4f3a0 100644 Binary files a/recipes/icons/anthony_muroni.png and b/recipes/icons/anthony_muroni.png differ diff --git a/recipes/icons/antyweb.png b/recipes/icons/antyweb.png index 8ca9870f60..f6188b0db9 100644 Binary files a/recipes/icons/antyweb.png and b/recipes/icons/antyweb.png differ diff --git a/recipes/icons/ap.png b/recipes/icons/ap.png index 996abb6621..cc4030d745 100644 Binary files a/recipes/icons/ap.png and b/recipes/icons/ap.png differ diff --git a/recipes/icons/appledaily_tw.png b/recipes/icons/appledaily_tw.png index 20f81bdc01..fa26911e1c 100644 Binary files a/recipes/icons/appledaily_tw.png and b/recipes/icons/appledaily_tw.png differ diff --git a/recipes/icons/aprospect.png b/recipes/icons/aprospect.png index 921d8e48a5..d0b3914565 100644 Binary files a/recipes/icons/aprospect.png and b/recipes/icons/aprospect.png differ diff --git a/recipes/icons/ara.png b/recipes/icons/ara.png index 05e9e19378..39d21b39b3 100644 Binary files a/recipes/icons/ara.png and b/recipes/icons/ara.png differ diff --git a/recipes/icons/ara_info.png b/recipes/icons/ara_info.png index 821ad0ecdb..32e0edc0f1 100644 Binary files a/recipes/icons/ara_info.png and b/recipes/icons/ara_info.png differ diff --git a/recipes/icons/arabian_business.png b/recipes/icons/arabian_business.png index 9daf114a00..cb7ef139cd 100644 Binary files a/recipes/icons/arabian_business.png and b/recipes/icons/arabian_business.png differ diff --git a/recipes/icons/arbetaren.png b/recipes/icons/arbetaren.png index ca26f7f8cf..eca81fb246 100644 Binary files a/recipes/icons/arbetaren.png and b/recipes/icons/arbetaren.png differ diff --git a/recipes/icons/arcadia.png b/recipes/icons/arcadia.png index 0c4d6f9bce..3ed304becd 100644 Binary files a/recipes/icons/arcadia.png and b/recipes/icons/arcadia.png differ diff --git a/recipes/icons/arcamax.png b/recipes/icons/arcamax.png index c9f7de7415..3782dfe85b 100644 Binary files a/recipes/icons/arcamax.png and b/recipes/icons/arcamax.png differ diff --git a/recipes/icons/arizona_republic.png b/recipes/icons/arizona_republic.png index e5c9eeb4a4..19a813a1ed 100644 Binary files a/recipes/icons/arizona_republic.png and b/recipes/icons/arizona_republic.png differ diff --git a/recipes/icons/arret_sur_images.png b/recipes/icons/arret_sur_images.png index 5be829d87f..712ec3572c 100644 Binary files a/recipes/icons/arret_sur_images.png and b/recipes/icons/arret_sur_images.png differ diff --git a/recipes/icons/ars_technica.png b/recipes/icons/ars_technica.png index a50955d48b..9042426fc7 100644 Binary files a/recipes/icons/ars_technica.png and b/recipes/icons/ars_technica.png differ diff --git a/recipes/icons/asahi_shimbun_en.png b/recipes/icons/asahi_shimbun_en.png index 67bb860bf9..775fed3917 100644 Binary files a/recipes/icons/asahi_shimbun_en.png and b/recipes/icons/asahi_shimbun_en.png differ diff --git a/recipes/icons/asco_de_vida.png b/recipes/icons/asco_de_vida.png index 3b8fa36bc4..3ec5432c5e 100644 Binary files a/recipes/icons/asco_de_vida.png and b/recipes/icons/asco_de_vida.png differ diff --git a/recipes/icons/asia_one.png b/recipes/icons/asia_one.png index 01b099df4a..391f9a7683 100644 Binary files a/recipes/icons/asia_one.png and b/recipes/icons/asia_one.png differ diff --git a/recipes/icons/astro_news_pl.png b/recipes/icons/astro_news_pl.png index 279ecdb0bc..5a18c684a0 100644 Binary files a/recipes/icons/astro_news_pl.png and b/recipes/icons/astro_news_pl.png differ diff --git a/recipes/icons/atlantic.png b/recipes/icons/atlantic.png index 639d377e76..2c8384562c 100644 Binary files a/recipes/icons/atlantic.png and b/recipes/icons/atlantic.png differ diff --git a/recipes/icons/atlantic_com.png b/recipes/icons/atlantic_com.png index ced037ba72..ab69283704 100644 Binary files a/recipes/icons/atlantic_com.png and b/recipes/icons/atlantic_com.png differ diff --git a/recipes/icons/attac_es.png b/recipes/icons/attac_es.png index cdd887f7d0..27dafebaee 100644 Binary files a/recipes/icons/attac_es.png and b/recipes/icons/attac_es.png differ diff --git a/recipes/icons/auto.png b/recipes/icons/auto.png index 27e0c017d5..10679e87ba 100644 Binary files a/recipes/icons/auto.png and b/recipes/icons/auto.png differ diff --git a/recipes/icons/auto_blog.png b/recipes/icons/auto_blog.png index ecbc2265e1..69ba85d8f9 100644 Binary files a/recipes/icons/auto_blog.png and b/recipes/icons/auto_blog.png differ diff --git a/recipes/icons/auto_prove.png b/recipes/icons/auto_prove.png index 27e0c017d5..10679e87ba 100644 Binary files a/recipes/icons/auto_prove.png and b/recipes/icons/auto_prove.png differ diff --git a/recipes/icons/automatiseringgids.png b/recipes/icons/automatiseringgids.png index 9375f9fbc5..6f2b464441 100644 Binary files a/recipes/icons/automatiseringgids.png and b/recipes/icons/automatiseringgids.png differ diff --git a/recipes/icons/autosport.png b/recipes/icons/autosport.png index 1a5ed8895c..f47fb352dc 100644 Binary files a/recipes/icons/autosport.png and b/recipes/icons/autosport.png differ diff --git a/recipes/icons/avantaje.png b/recipes/icons/avantaje.png index 6dc40aa6fe..be3e00908d 100644 Binary files a/recipes/icons/avantaje.png and b/recipes/icons/avantaje.png differ diff --git a/recipes/icons/baikaljournal.png b/recipes/icons/baikaljournal.png index 9dff29b8e4..c4a88efdb8 100644 Binary files a/recipes/icons/baikaljournal.png and b/recipes/icons/baikaljournal.png differ diff --git a/recipes/icons/balkanist.png b/recipes/icons/balkanist.png index cedf699086..72c6d25b9e 100644 Binary files a/recipes/icons/balkanist.png and b/recipes/icons/balkanist.png differ diff --git a/recipes/icons/baltimore_sun.png b/recipes/icons/baltimore_sun.png index f586676562..3d9d9d96b5 100644 Binary files a/recipes/icons/baltimore_sun.png and b/recipes/icons/baltimore_sun.png differ diff --git a/recipes/icons/bangkok_biz.png b/recipes/icons/bangkok_biz.png index 6028e77b37..d531a38ee6 100644 Binary files a/recipes/icons/bangkok_biz.png and b/recipes/icons/bangkok_biz.png differ diff --git a/recipes/icons/bangkokpost.png b/recipes/icons/bangkokpost.png index cb6d99fcb7..81a9218df6 100644 Binary files a/recipes/icons/bangkokpost.png and b/recipes/icons/bangkokpost.png differ diff --git a/recipes/icons/bar_and_bench.png b/recipes/icons/bar_and_bench.png new file mode 100644 index 0000000000..e76980b62d Binary files /dev/null and b/recipes/icons/bar_and_bench.png differ diff --git a/recipes/icons/bay_citizen.png b/recipes/icons/bay_citizen.png index d1a52ca240..997b74038f 100644 Binary files a/recipes/icons/bay_citizen.png and b/recipes/icons/bay_citizen.png differ diff --git a/recipes/icons/bbc_fast.png b/recipes/icons/bbc_fast.png index ffe9b0b22f..352ea242bf 100644 Binary files a/recipes/icons/bbc_fast.png and b/recipes/icons/bbc_fast.png differ diff --git a/recipes/icons/bbc_sport.png b/recipes/icons/bbc_sport.png index ffe9b0b22f..352ea242bf 100644 Binary files a/recipes/icons/bbc_sport.png and b/recipes/icons/bbc_sport.png differ diff --git a/recipes/icons/berliner_zeitung.png b/recipes/icons/berliner_zeitung.png index 616df6f865..f8f681ba2a 100644 Binary files a/recipes/icons/berliner_zeitung.png and b/recipes/icons/berliner_zeitung.png differ diff --git a/recipes/icons/berria.png b/recipes/icons/berria.png index 354dcc8057..253dd0cd6b 100644 Binary files a/recipes/icons/berria.png and b/recipes/icons/berria.png differ diff --git a/recipes/icons/biamag.png b/recipes/icons/biamag.png index 895c08cb80..c79834afd5 100644 Binary files a/recipes/icons/biamag.png and b/recipes/icons/biamag.png differ diff --git a/recipes/icons/biamag_en.png b/recipes/icons/biamag_en.png index 895c08cb80..c79834afd5 100644 Binary files a/recipes/icons/biamag_en.png and b/recipes/icons/biamag_en.png differ diff --git a/recipes/icons/bianet.png b/recipes/icons/bianet.png index 895c08cb80..c79834afd5 100644 Binary files a/recipes/icons/bianet.png and b/recipes/icons/bianet.png differ diff --git a/recipes/icons/big_oven.png b/recipes/icons/big_oven.png index 45a01d3b0a..d17e2adccd 100644 Binary files a/recipes/icons/big_oven.png and b/recipes/icons/big_oven.png differ diff --git a/recipes/icons/biggovernment.png b/recipes/icons/biggovernment.png index 29ee425cb9..4af5ec55b2 100644 Binary files a/recipes/icons/biggovernment.png and b/recipes/icons/biggovernment.png differ diff --git a/recipes/icons/bighollywood.png b/recipes/icons/bighollywood.png index 29ee425cb9..4af5ec55b2 100644 Binary files a/recipes/icons/bighollywood.png and b/recipes/icons/bighollywood.png differ diff --git a/recipes/icons/bild_de.png b/recipes/icons/bild_de.png index b5adc0818c..94e7afecb1 100644 Binary files a/recipes/icons/bild_de.png and b/recipes/icons/bild_de.png differ diff --git a/recipes/icons/billorielly.png b/recipes/icons/billorielly.png index 39cd909615..7e00851c5f 100644 Binary files a/recipes/icons/billorielly.png and b/recipes/icons/billorielly.png differ diff --git a/recipes/icons/birmingham_evening_mail.png b/recipes/icons/birmingham_evening_mail.png index 7831ce575c..4512affb2c 100644 Binary files a/recipes/icons/birmingham_evening_mail.png and b/recipes/icons/birmingham_evening_mail.png differ diff --git a/recipes/icons/birmingham_post.png b/recipes/icons/birmingham_post.png index 7c766837c3..e899d9e4b5 100644 Binary files a/recipes/icons/birmingham_post.png and b/recipes/icons/birmingham_post.png differ diff --git a/recipes/icons/biz_portal.png b/recipes/icons/biz_portal.png index 928b32596b..170ec43d99 100644 Binary files a/recipes/icons/biz_portal.png and b/recipes/icons/biz_portal.png differ diff --git a/recipes/icons/blesk.png b/recipes/icons/blesk.png index d7ccc57a7a..75c7646bf6 100644 Binary files a/recipes/icons/blesk.png and b/recipes/icons/blesk.png differ diff --git a/recipes/icons/bloomberg-business-week.png b/recipes/icons/bloomberg-business-week.png new file mode 100644 index 0000000000..21314cdc3b Binary files /dev/null and b/recipes/icons/bloomberg-business-week.png differ diff --git a/recipes/icons/bloomberg.png b/recipes/icons/bloomberg.png new file mode 100644 index 0000000000..21314cdc3b Binary files /dev/null and b/recipes/icons/bloomberg.png differ diff --git a/recipes/icons/bookforummagazine.png b/recipes/icons/bookforummagazine.png new file mode 100644 index 0000000000..5e6eac016d Binary files /dev/null and b/recipes/icons/bookforummagazine.png differ diff --git a/recipes/icons/borse_online.png b/recipes/icons/borse_online.png index ead59a3311..6fa8875650 100644 Binary files a/recipes/icons/borse_online.png and b/recipes/icons/borse_online.png differ diff --git a/recipes/icons/borsen_dk.png b/recipes/icons/borsen_dk.png index 08a96dd2be..11ac7bac4b 100644 Binary files a/recipes/icons/borsen_dk.png and b/recipes/icons/borsen_dk.png differ diff --git a/recipes/icons/bq_prime.png b/recipes/icons/bq_prime.png index 71fc66b2f5..9997313519 100644 Binary files a/recipes/icons/bq_prime.png and b/recipes/icons/bq_prime.png differ diff --git a/recipes/icons/brasil_de_fato.png b/recipes/icons/brasil_de_fato.png index 7de105e208..f19a37594f 100644 Binary files a/recipes/icons/brasil_de_fato.png and b/recipes/icons/brasil_de_fato.png differ diff --git a/recipes/icons/breakingmad.png b/recipes/icons/breakingmad.png index 2b050bd287..0326ef63a5 100644 Binary files a/recipes/icons/breakingmad.png and b/recipes/icons/breakingmad.png differ diff --git a/recipes/icons/brecha.png b/recipes/icons/brecha.png index 146921dd47..90eb75ba3f 100644 Binary files a/recipes/icons/brecha.png and b/recipes/icons/brecha.png differ diff --git a/recipes/icons/bsi_news.png b/recipes/icons/bsi_news.png index 63d2791028..01f884391b 100644 Binary files a/recipes/icons/bsi_news.png and b/recipes/icons/bsi_news.png differ diff --git a/recipes/icons/business_standard_print.png b/recipes/icons/business_standard_print.png new file mode 100644 index 0000000000..41800ff4f1 Binary files /dev/null and b/recipes/icons/business_standard_print.png differ diff --git a/recipes/icons/business_standard_print_edition.png b/recipes/icons/business_standard_print_edition.png deleted file mode 100644 index 83a1b55c06..0000000000 Binary files a/recipes/icons/business_standard_print_edition.png and /dev/null differ diff --git a/recipes/icons/business_today.png b/recipes/icons/business_today.png index c05caac1a3..59bf3fd168 100644 Binary files a/recipes/icons/business_today.png and b/recipes/icons/business_today.png differ diff --git a/recipes/icons/cacm.png b/recipes/icons/cacm.png index 2238035155..06a971b144 100644 Binary files a/recipes/icons/cacm.png and b/recipes/icons/cacm.png differ diff --git a/recipes/icons/cafcaf_dergisi.png b/recipes/icons/cafcaf_dergisi.png index 9565b0688a..46529cb072 100644 Binary files a/recipes/icons/cafcaf_dergisi.png and b/recipes/icons/cafcaf_dergisi.png differ diff --git a/recipes/icons/calcalist.png b/recipes/icons/calcalist.png index 9a30d28478..b5ba62f232 100644 Binary files a/recipes/icons/calcalist.png and b/recipes/icons/calcalist.png differ diff --git a/recipes/icons/calgary_herald.png b/recipes/icons/calgary_herald.png index 6fef06de60..8f41b65b0a 100644 Binary files a/recipes/icons/calgary_herald.png and b/recipes/icons/calgary_herald.png differ diff --git a/recipes/icons/camera_di_commercio_di_bari.png b/recipes/icons/camera_di_commercio_di_bari.png index 2ae6e9bc42..8884148798 100644 Binary files a/recipes/icons/camera_di_commercio_di_bari.png and b/recipes/icons/camera_di_commercio_di_bari.png differ diff --git a/recipes/icons/canardpc.png b/recipes/icons/canardpc.png index 620878a91f..ba632c3fd5 100644 Binary files a/recipes/icons/canardpc.png and b/recipes/icons/canardpc.png differ diff --git a/recipes/icons/capital_de.png b/recipes/icons/capital_de.png index 0e63a787d9..1d7343d2c2 100644 Binary files a/recipes/icons/capital_de.png and b/recipes/icons/capital_de.png differ diff --git a/recipes/icons/capital_gr.png b/recipes/icons/capital_gr.png index d87233f6a3..0159120d27 100644 Binary files a/recipes/icons/capital_gr.png and b/recipes/icons/capital_gr.png differ diff --git a/recipes/icons/caravan_magazine.png b/recipes/icons/caravan_magazine.png index d680d3f631..fdf7bb4bf5 100644 Binary files a/recipes/icons/caravan_magazine.png and b/recipes/icons/caravan_magazine.png differ diff --git a/recipes/icons/caravan_magazine_hindi.png b/recipes/icons/caravan_magazine_hindi.png index 1cb7f5edc3..fdf7bb4bf5 100644 Binary files a/recipes/icons/caravan_magazine_hindi.png and b/recipes/icons/caravan_magazine_hindi.png differ diff --git a/recipes/icons/catholic_daily_readings.png b/recipes/icons/catholic_daily_readings.png index 354b615442..a602ffe118 100644 Binary files a/recipes/icons/catholic_daily_readings.png and b/recipes/icons/catholic_daily_readings.png differ diff --git a/recipes/icons/catholic_news_agency.png b/recipes/icons/catholic_news_agency.png index 109e11fa38..bc1ee6ca66 100644 Binary files a/recipes/icons/catholic_news_agency.png and b/recipes/icons/catholic_news_agency.png differ diff --git a/recipes/icons/cbc_canada.png b/recipes/icons/cbc_canada.png index f11b31daec..9cc6c3018d 100644 Binary files a/recipes/icons/cbc_canada.png and b/recipes/icons/cbc_canada.png differ diff --git a/recipes/icons/cbn.png b/recipes/icons/cbn.png index 937113819d..5a957a9e09 100644 Binary files a/recipes/icons/cbn.png and b/recipes/icons/cbn.png differ diff --git a/recipes/icons/cdrinfo_pl.png b/recipes/icons/cdrinfo_pl.png index ec6efce608..fe7dd0dedc 100644 Binary files a/recipes/icons/cdrinfo_pl.png and b/recipes/icons/cdrinfo_pl.png differ diff --git a/recipes/icons/cedar.png b/recipes/icons/cedar.png index 6ab627fe4c..8559a78238 100644 Binary files a/recipes/icons/cedar.png and b/recipes/icons/cedar.png differ diff --git a/recipes/icons/ceske_noviny.png b/recipes/icons/ceske_noviny.png index cda4a3031d..2e89edffd1 100644 Binary files a/recipes/icons/ceske_noviny.png and b/recipes/icons/ceske_noviny.png differ diff --git a/recipes/icons/cesky_rozhlas_6.png b/recipes/icons/cesky_rozhlas_6.png index 316fdac18c..00c62b6454 100644 Binary files a/recipes/icons/cesky_rozhlas_6.png and b/recipes/icons/cesky_rozhlas_6.png differ diff --git a/recipes/icons/champion.png b/recipes/icons/champion.png index 4beead7219..8737d59930 100644 Binary files a/recipes/icons/champion.png and b/recipes/icons/champion.png differ diff --git a/recipes/icons/cherta.png b/recipes/icons/cherta.png index 72044c1019..980d17f037 100644 Binary files a/recipes/icons/cherta.png and b/recipes/icons/cherta.png differ diff --git a/recipes/icons/chetnixploitation.png b/recipes/icons/chetnixploitation.png index dc616b95cc..04c29c68dd 100644 Binary files a/recipes/icons/chetnixploitation.png and b/recipes/icons/chetnixploitation.png differ diff --git a/recipes/icons/chicago_tribune.png b/recipes/icons/chicago_tribune.png index 87ceeebca5..3ae6b10dbf 100644 Binary files a/recipes/icons/chicago_tribune.png and b/recipes/icons/chicago_tribune.png differ diff --git a/recipes/icons/china_economic_net.png b/recipes/icons/china_economic_net.png index 72564db7e6..aaa81c8b4b 100644 Binary files a/recipes/icons/china_economic_net.png and b/recipes/icons/china_economic_net.png differ diff --git a/recipes/icons/china_post.png b/recipes/icons/china_post.png index 7fdefee164..2da7a7e506 100644 Binary files a/recipes/icons/china_post.png and b/recipes/icons/china_post.png differ diff --git a/recipes/icons/china_times.png b/recipes/icons/china_times.png index 6f64696f8b..22cfdfff1f 100644 Binary files a/recipes/icons/china_times.png and b/recipes/icons/china_times.png differ diff --git a/recipes/icons/chinadaily.png b/recipes/icons/chinadaily.png index 030944da82..156f0b78d2 100644 Binary files a/recipes/icons/chinadaily.png and b/recipes/icons/chinadaily.png differ diff --git a/recipes/icons/chipro.png b/recipes/icons/chipro.png index 1debdf19c5..77ef6fdbd6 100644 Binary files a/recipes/icons/chipro.png and b/recipes/icons/chipro.png differ diff --git a/recipes/icons/chosun.png b/recipes/icons/chosun.png index 41356b5b39..58a17eea2d 100644 Binary files a/recipes/icons/chosun.png and b/recipes/icons/chosun.png differ diff --git a/recipes/icons/christian_post.png b/recipes/icons/christian_post.png index 44f7ad11b7..48ff2b92f9 100644 Binary files a/recipes/icons/christian_post.png and b/recipes/icons/christian_post.png differ diff --git a/recipes/icons/chronicle_higher_ed.png b/recipes/icons/chronicle_higher_ed.png index 930ce9647b..6c09bc7e62 100644 Binary files a/recipes/icons/chronicle_higher_ed.png and b/recipes/icons/chronicle_higher_ed.png differ diff --git a/recipes/icons/cicero.png b/recipes/icons/cicero.png index 80ad3aafac..fa71d0a779 100644 Binary files a/recipes/icons/cicero.png and b/recipes/icons/cicero.png differ diff --git a/recipes/icons/cincinnati_enquirer.png b/recipes/icons/cincinnati_enquirer.png index 7152b8f0d1..c331075776 100644 Binary files a/recipes/icons/cincinnati_enquirer.png and b/recipes/icons/cincinnati_enquirer.png differ diff --git a/recipes/icons/cinebel_be.png b/recipes/icons/cinebel_be.png index e07b102ff3..f4379714a9 100644 Binary files a/recipes/icons/cinebel_be.png and b/recipes/icons/cinebel_be.png differ diff --git a/recipes/icons/cio.png b/recipes/icons/cio.png index 2650f44078..1b34faa85f 100644 Binary files a/recipes/icons/cio.png and b/recipes/icons/cio.png differ diff --git a/recipes/icons/ciperchile.png b/recipes/icons/ciperchile.png index cecc160ace..2a6df76839 100644 Binary files a/recipes/icons/ciperchile.png and b/recipes/icons/ciperchile.png differ diff --git a/recipes/icons/cityavisen_dk.png b/recipes/icons/cityavisen_dk.png index 05f107580a..6fc915ca3d 100644 Binary files a/recipes/icons/cityavisen_dk.png and b/recipes/icons/cityavisen_dk.png differ diff --git a/recipes/icons/cjr.png b/recipes/icons/cjr.png index 73b15015df..d780d82759 100644 Binary files a/recipes/icons/cjr.png and b/recipes/icons/cjr.png differ diff --git a/recipes/icons/clarin.png b/recipes/icons/clarin.png index 98767b97f2..bb3124042c 100644 Binary files a/recipes/icons/clarin.png and b/recipes/icons/clarin.png differ diff --git a/recipes/icons/clarion_ledger.png b/recipes/icons/clarion_ledger.png index 3d123f2fae..0087d74754 100644 Binary files a/recipes/icons/clarion_ledger.png and b/recipes/icons/clarion_ledger.png differ diff --git a/recipes/icons/clic_rbs.png b/recipes/icons/clic_rbs.png index 86bae37d6f..3dae6f3730 100644 Binary files a/recipes/icons/clic_rbs.png and b/recipes/icons/clic_rbs.png differ diff --git a/recipes/icons/cm_journal.png b/recipes/icons/cm_journal.png index 069bcf0bc3..ce76616b82 100644 Binary files a/recipes/icons/cm_journal.png and b/recipes/icons/cm_journal.png differ diff --git a/recipes/icons/cnn.png b/recipes/icons/cnn.png index f14b8e7812..47ca3a3d3b 100644 Binary files a/recipes/icons/cnn.png and b/recipes/icons/cnn.png differ diff --git a/recipes/icons/colta.png b/recipes/icons/colta.png index 95f21f5ab8..41ed59b9e4 100644 Binary files a/recipes/icons/colta.png and b/recipes/icons/colta.png differ diff --git a/recipes/icons/columbusdispatch.png b/recipes/icons/columbusdispatch.png index 9b7d8c6fd4..ba4f2250ba 100644 Binary files a/recipes/icons/columbusdispatch.png and b/recipes/icons/columbusdispatch.png differ diff --git a/recipes/icons/common_dreams.png b/recipes/icons/common_dreams.png index 08465cb988..96cd52a335 100644 Binary files a/recipes/icons/common_dreams.png and b/recipes/icons/common_dreams.png differ diff --git a/recipes/icons/computerworld_dk.png b/recipes/icons/computerworld_dk.png index 4b439afcec..a715bc0327 100644 Binary files a/recipes/icons/computerworld_dk.png and b/recipes/icons/computerworld_dk.png differ diff --git a/recipes/icons/consortium_news.png b/recipes/icons/consortium_news.png index 0562a478c6..2ceed69a46 100644 Binary files a/recipes/icons/consortium_news.png and b/recipes/icons/consortium_news.png differ diff --git a/recipes/icons/contemporary_argentine_writers.png b/recipes/icons/contemporary_argentine_writers.png index 9ccd38d2f3..6e1793fb9d 100644 Binary files a/recipes/icons/contemporary_argentine_writers.png and b/recipes/icons/contemporary_argentine_writers.png differ diff --git a/recipes/icons/contropiano.png b/recipes/icons/contropiano.png index fc61cc5355..1224224bbd 100644 Binary files a/recipes/icons/contropiano.png and b/recipes/icons/contropiano.png differ diff --git a/recipes/icons/corriere_della_sera_en.png b/recipes/icons/corriere_della_sera_en.png index eb31bb08b0..90df7f4d16 100644 Binary files a/recipes/icons/corriere_della_sera_en.png and b/recipes/icons/corriere_della_sera_en.png differ diff --git a/recipes/icons/corriere_della_sera_it.png b/recipes/icons/corriere_della_sera_it.png index eb31bb08b0..90df7f4d16 100644 Binary files a/recipes/icons/corriere_della_sera_it.png and b/recipes/icons/corriere_della_sera_it.png differ diff --git a/recipes/icons/corriere_dello_sport.png b/recipes/icons/corriere_dello_sport.png index f981be4789..08d1c861b0 100644 Binary files a/recipes/icons/corriere_dello_sport.png and b/recipes/icons/corriere_dello_sport.png differ diff --git a/recipes/icons/cosmopolitan.png b/recipes/icons/cosmopolitan.png index 9bb8a68a2f..7a9a423183 100644 Binary files a/recipes/icons/cosmopolitan.png and b/recipes/icons/cosmopolitan.png differ diff --git a/recipes/icons/cosmopolitan_de.png b/recipes/icons/cosmopolitan_de.png index 57868cc801..d86aa017c8 100644 Binary files a/recipes/icons/cosmopolitan_de.png and b/recipes/icons/cosmopolitan_de.png differ diff --git a/recipes/icons/cosmopolitan_uk.png b/recipes/icons/cosmopolitan_uk.png index 878ef372bf..f73fbf673f 100644 Binary files a/recipes/icons/cosmopolitan_uk.png and b/recipes/icons/cosmopolitan_uk.png differ diff --git a/recipes/icons/cosmos.png b/recipes/icons/cosmos.png index 963729f077..ce3ef9f57a 100644 Binary files a/recipes/icons/cosmos.png and b/recipes/icons/cosmos.png differ diff --git a/recipes/icons/cotidianul.png b/recipes/icons/cotidianul.png index df652d9400..d3cfcf6451 100644 Binary files a/recipes/icons/cotidianul.png and b/recipes/icons/cotidianul.png differ diff --git a/recipes/icons/counterpunch.png b/recipes/icons/counterpunch.png index 6f95b98e53..ba299c016e 100644 Binary files a/recipes/icons/counterpunch.png and b/recipes/icons/counterpunch.png differ diff --git a/recipes/icons/countryfile.png b/recipes/icons/countryfile.png index 179ef0a335..10f1e9be82 100644 Binary files a/recipes/icons/countryfile.png and b/recipes/icons/countryfile.png differ diff --git a/recipes/icons/courrier.png b/recipes/icons/courrier.png index e081fc7224..3bf9224330 100644 Binary files a/recipes/icons/courrier.png and b/recipes/icons/courrier.png differ diff --git a/recipes/icons/cracked_com.png b/recipes/icons/cracked_com.png index aa81ff1faf..4f7db8b112 100644 Binary files a/recipes/icons/cracked_com.png and b/recipes/icons/cracked_com.png differ diff --git a/recipes/icons/crikey.png b/recipes/icons/crikey.png index 5a8aad8850..7daaa7f0ac 100644 Binary files a/recipes/icons/crikey.png and b/recipes/icons/crikey.png differ diff --git a/recipes/icons/cronica.png b/recipes/icons/cronica.png index 4bdebe0fa0..e0e9680da8 100644 Binary files a/recipes/icons/cronica.png and b/recipes/icons/cronica.png differ diff --git a/recipes/icons/csid.png b/recipes/icons/csid.png index b16c6bba9e..08bcc75718 100644 Binary files a/recipes/icons/csid.png and b/recipes/icons/csid.png differ diff --git a/recipes/icons/ct24.png b/recipes/icons/ct24.png index 2cc9c79ddb..91c4989af7 100644 Binary files a/recipes/icons/ct24.png and b/recipes/icons/ct24.png differ diff --git a/recipes/icons/cubadebate.png b/recipes/icons/cubadebate.png index 69a8919828..6d196fce80 100644 Binary files a/recipes/icons/cubadebate.png and b/recipes/icons/cubadebate.png differ diff --git a/recipes/icons/cumhuriyet.png b/recipes/icons/cumhuriyet.png index 63c4eac267..1de5b5d305 100644 Binary files a/recipes/icons/cumhuriyet.png and b/recipes/icons/cumhuriyet.png differ diff --git a/recipes/icons/currenttime.png b/recipes/icons/currenttime.png index 0b62dc0c39..8622109a9c 100644 Binary files a/recipes/icons/currenttime.png and b/recipes/icons/currenttime.png differ diff --git a/recipes/icons/cvecezla.png b/recipes/icons/cvecezla.png index 9ccd38d2f3..6e1793fb9d 100644 Binary files a/recipes/icons/cvecezla.png and b/recipes/icons/cvecezla.png differ diff --git a/recipes/icons/cyberpresse.png b/recipes/icons/cyberpresse.png index b7971473e3..f3789fbceb 100644 Binary files a/recipes/icons/cyberpresse.png and b/recipes/icons/cyberpresse.png differ diff --git a/recipes/icons/cyprus_weekly.png b/recipes/icons/cyprus_weekly.png index 4bc3ecf017..8bbe9f0776 100644 Binary files a/recipes/icons/cyprus_weekly.png and b/recipes/icons/cyprus_weekly.png differ diff --git a/recipes/icons/dachauer_nachrichten.png b/recipes/icons/dachauer_nachrichten.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/dachauer_nachrichten.png and b/recipes/icons/dachauer_nachrichten.png differ diff --git a/recipes/icons/dagens_industri.png b/recipes/icons/dagens_industri.png index dcb1d3676d..89b37881ce 100644 Binary files a/recipes/icons/dagens_industri.png and b/recipes/icons/dagens_industri.png differ diff --git a/recipes/icons/dagensmedicin_dk.png b/recipes/icons/dagensmedicin_dk.png index ce8619b7e8..c1073d1d6c 100644 Binary files a/recipes/icons/dagensmedicin_dk.png and b/recipes/icons/dagensmedicin_dk.png differ diff --git a/recipes/icons/dagenspharma_dk.png b/recipes/icons/dagenspharma_dk.png index 8f7f429289..e2b33fdc77 100644 Binary files a/recipes/icons/dagenspharma_dk.png and b/recipes/icons/dagenspharma_dk.png differ diff --git a/recipes/icons/daily_telegraph.png b/recipes/icons/daily_telegraph.png index 16cdae9b71..c55e9c054e 100644 Binary files a/recipes/icons/daily_telegraph.png and b/recipes/icons/daily_telegraph.png differ diff --git a/recipes/icons/daily_writing_tips.png b/recipes/icons/daily_writing_tips.png index 4d0ab9ec5f..428c8e214b 100644 Binary files a/recipes/icons/daily_writing_tips.png and b/recipes/icons/daily_writing_tips.png differ diff --git a/recipes/icons/dainik_bhaskar.png b/recipes/icons/dainik_bhaskar.png index 40de95e4de..ae3b2b38ce 100644 Binary files a/recipes/icons/dainik_bhaskar.png and b/recipes/icons/dainik_bhaskar.png differ diff --git a/recipes/icons/danas.png b/recipes/icons/danas.png index 85ae185e48..f222399d49 100644 Binary files a/recipes/icons/danas.png and b/recipes/icons/danas.png differ diff --git a/recipes/icons/datasport.png b/recipes/icons/datasport.png index a1fbc460f8..72c44a775d 100644 Binary files a/recipes/icons/datasport.png and b/recipes/icons/datasport.png differ diff --git a/recipes/icons/daum_net.png b/recipes/icons/daum_net.png index f2e5feeb95..73d4a92ac9 100644 Binary files a/recipes/icons/daum_net.png and b/recipes/icons/daum_net.png differ diff --git a/recipes/icons/dawn.png b/recipes/icons/dawn.png index d989066115..14722ffe21 100644 Binary files a/recipes/icons/dawn.png and b/recipes/icons/dawn.png differ diff --git a/recipes/icons/dbb.png b/recipes/icons/dbb.png index f3f61cfdc9..0270ce437e 100644 Binary files a/recipes/icons/dbb.png and b/recipes/icons/dbb.png differ diff --git a/recipes/icons/de_redactie_be.png b/recipes/icons/de_redactie_be.png index bb95145615..5309a6b940 100644 Binary files a/recipes/icons/de_redactie_be.png and b/recipes/icons/de_redactie_be.png differ diff --git a/recipes/icons/deccan_herald.png b/recipes/icons/deccan_herald.png index bd49446814..f2d134d9d0 100644 Binary files a/recipes/icons/deccan_herald.png and b/recipes/icons/deccan_herald.png differ diff --git a/recipes/icons/defensenews.png b/recipes/icons/defensenews.png index 9a77714757..1f11a69dba 100644 Binary files a/recipes/icons/defensenews.png and b/recipes/icons/defensenews.png differ diff --git a/recipes/icons/degentenaar.png b/recipes/icons/degentenaar.png index dea820bbd4..96ef144829 100644 Binary files a/recipes/icons/degentenaar.png and b/recipes/icons/degentenaar.png differ diff --git a/recipes/icons/delco_times.png b/recipes/icons/delco_times.png index a245cdc763..9bb94cb23b 100644 Binary files a/recipes/icons/delco_times.png and b/recipes/icons/delco_times.png differ diff --git a/recipes/icons/democracy_journal.png b/recipes/icons/democracy_journal.png index 2568412b94..65c71ad92d 100644 Binary files a/recipes/icons/democracy_journal.png and b/recipes/icons/democracy_journal.png differ diff --git a/recipes/icons/democracy_now.png b/recipes/icons/democracy_now.png index 3c88dce431..f224ec6ea9 100644 Binary files a/recipes/icons/democracy_now.png and b/recipes/icons/democracy_now.png differ diff --git a/recipes/icons/demorgen_be.png b/recipes/icons/demorgen_be.png index 17a3cf9cc3..b28ec0509e 100644 Binary files a/recipes/icons/demorgen_be.png and b/recipes/icons/demorgen_be.png differ diff --git a/recipes/icons/denik.cz.png b/recipes/icons/denik.cz.png index 092a51aba2..fefced96ff 100644 Binary files a/recipes/icons/denik.cz.png and b/recipes/icons/denik.cz.png differ diff --git a/recipes/icons/denik_referendum.png b/recipes/icons/denik_referendum.png index 6107f0ef6b..09b213aded 100644 Binary files a/recipes/icons/denik_referendum.png and b/recipes/icons/denik_referendum.png differ diff --git a/recipes/icons/denikn.cz.png b/recipes/icons/denikn.cz.png index 0bb29d4664..223d18dd9f 100644 Binary files a/recipes/icons/denikn.cz.png and b/recipes/icons/denikn.cz.png differ diff --git a/recipes/icons/der_standard.png b/recipes/icons/der_standard.png index 318d558b10..369004eb4d 100644 Binary files a/recipes/icons/der_standard.png and b/recipes/icons/der_standard.png differ diff --git a/recipes/icons/deredactie.png b/recipes/icons/deredactie.png index bd901ac8a0..d62ffc392b 100644 Binary files a/recipes/icons/deredactie.png and b/recipes/icons/deredactie.png differ diff --git a/recipes/icons/descopera.png b/recipes/icons/descopera.png index 8b40c88c21..52b7c9ed35 100644 Binary files a/recipes/icons/descopera.png and b/recipes/icons/descopera.png differ diff --git a/recipes/icons/desiring_god.png b/recipes/icons/desiring_god.png index 22ec3e845f..eb6aa6995e 100644 Binary files a/recipes/icons/desiring_god.png and b/recipes/icons/desiring_god.png differ diff --git a/recipes/icons/detroit_news.png b/recipes/icons/detroit_news.png index 7c0c6ffb20..388cdb0110 100644 Binary files a/recipes/icons/detroit_news.png and b/recipes/icons/detroit_news.png differ diff --git a/recipes/icons/deutsche_welle_bs.png b/recipes/icons/deutsche_welle_bs.png index 034074e83d..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_bs.png and b/recipes/icons/deutsche_welle_bs.png differ diff --git a/recipes/icons/deutsche_welle_de.png b/recipes/icons/deutsche_welle_de.png index 0cea86328c..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_de.png and b/recipes/icons/deutsche_welle_de.png differ diff --git a/recipes/icons/deutsche_welle_en.png b/recipes/icons/deutsche_welle_en.png index 034074e83d..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_en.png and b/recipes/icons/deutsche_welle_en.png differ diff --git a/recipes/icons/deutsche_welle_es.png b/recipes/icons/deutsche_welle_es.png index 034074e83d..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_es.png and b/recipes/icons/deutsche_welle_es.png differ diff --git a/recipes/icons/deutsche_welle_hr.png b/recipes/icons/deutsche_welle_hr.png index 034074e83d..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_hr.png and b/recipes/icons/deutsche_welle_hr.png differ diff --git a/recipes/icons/deutsche_welle_pt.png b/recipes/icons/deutsche_welle_pt.png index 034074e83d..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_pt.png and b/recipes/icons/deutsche_welle_pt.png differ diff --git a/recipes/icons/deutsche_welle_ru.png b/recipes/icons/deutsche_welle_ru.png index c0c5b8c0eb..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_ru.png and b/recipes/icons/deutsche_welle_ru.png differ diff --git a/recipes/icons/deutsche_welle_sr.png b/recipes/icons/deutsche_welle_sr.png index 034074e83d..b9c9b8374b 100644 Binary files a/recipes/icons/deutsche_welle_sr.png and b/recipes/icons/deutsche_welle_sr.png differ diff --git a/recipes/icons/developpez.png b/recipes/icons/developpez.png index f322f4fe10..1e11406da4 100644 Binary files a/recipes/icons/developpez.png and b/recipes/icons/developpez.png differ diff --git a/recipes/icons/dhnet_be.png b/recipes/icons/dhnet_be.png index 3047fe8ee1..651e3fea8d 100644 Binary files a/recipes/icons/dhnet_be.png and b/recipes/icons/dhnet_be.png differ diff --git a/recipes/icons/di.png b/recipes/icons/di.png index 4ee466470b..91675982ab 100644 Binary files a/recipes/icons/di.png and b/recipes/icons/di.png differ diff --git a/recipes/icons/diagonal.png b/recipes/icons/diagonal.png index a8fdb0d4a3..91cd062739 100644 Binary files a/recipes/icons/diagonal.png and b/recipes/icons/diagonal.png differ diff --git a/recipes/icons/diario_de_noticias.png b/recipes/icons/diario_de_noticias.png index 42135b7aaf..436a0ccc9a 100644 Binary files a/recipes/icons/diario_de_noticias.png and b/recipes/icons/diario_de_noticias.png differ diff --git a/recipes/icons/diario_extra.png b/recipes/icons/diario_extra.png index 6bf53ba09d..270bfa3433 100644 Binary files a/recipes/icons/diario_extra.png and b/recipes/icons/diario_extra.png differ diff --git a/recipes/icons/diario_la_republica.png b/recipes/icons/diario_la_republica.png index 6f1eb15269..d84d0f97d4 100644 Binary files a/recipes/icons/diario_la_republica.png and b/recipes/icons/diario_la_republica.png differ diff --git a/recipes/icons/diario_sport.png b/recipes/icons/diario_sport.png index 4a686394f5..74ad135559 100644 Binary files a/recipes/icons/diario_sport.png and b/recipes/icons/diario_sport.png differ diff --git a/recipes/icons/diariovasco.png b/recipes/icons/diariovasco.png index 2748d14a94..fec161a132 100644 Binary files a/recipes/icons/diariovasco.png and b/recipes/icons/diariovasco.png differ diff --git a/recipes/icons/digit_magazine.png b/recipes/icons/digit_magazine.png index 0c3cfd1d91..83b2f91b54 100644 Binary files a/recipes/icons/digit_magazine.png and b/recipes/icons/digit_magazine.png differ diff --git a/recipes/icons/digital_arts.png b/recipes/icons/digital_arts.png index 5167b65d43..59b4005649 100644 Binary files a/recipes/icons/digital_arts.png and b/recipes/icons/digital_arts.png differ diff --git a/recipes/icons/digitalspy_uk.png b/recipes/icons/digitalspy_uk.png index b8b612b17c..4a0c1001a4 100644 Binary files a/recipes/icons/digitalspy_uk.png and b/recipes/icons/digitalspy_uk.png differ diff --git a/recipes/icons/digizone.png b/recipes/icons/digizone.png index c00ee4b75c..cd3924dd79 100644 Binary files a/recipes/icons/digizone.png and b/recipes/icons/digizone.png differ diff --git a/recipes/icons/disinformatico.png b/recipes/icons/disinformatico.png index c71208762b..99426c2d4e 100644 Binary files a/recipes/icons/disinformatico.png and b/recipes/icons/disinformatico.png differ diff --git a/recipes/icons/distrowatch_weekly.png b/recipes/icons/distrowatch_weekly.png index 7368ff79af..2949581fc6 100644 Binary files a/recipes/icons/distrowatch_weekly.png and b/recipes/icons/distrowatch_weekly.png differ diff --git a/recipes/icons/djurslandsposten_dk.png b/recipes/icons/djurslandsposten_dk.png index 4f28358ac1..34340aea44 100644 Binary files a/recipes/icons/djurslandsposten_dk.png and b/recipes/icons/djurslandsposten_dk.png differ diff --git a/recipes/icons/dn_se.png b/recipes/icons/dn_se.png index 5b13862890..8e79d94316 100644 Binary files a/recipes/icons/dn_se.png and b/recipes/icons/dn_se.png differ diff --git a/recipes/icons/dnevnik_cro.png b/recipes/icons/dnevnik_cro.png index a43cac2d16..166937695b 100644 Binary files a/recipes/icons/dnevnik_cro.png and b/recipes/icons/dnevnik_cro.png differ diff --git a/recipes/icons/dobanevinosti.png b/recipes/icons/dobanevinosti.png index c71208762b..99426c2d4e 100644 Binary files a/recipes/icons/dobanevinosti.png and b/recipes/icons/dobanevinosti.png differ diff --git a/recipes/icons/dobreprogamy.png b/recipes/icons/dobreprogamy.png index aa979ca58b..10bd84e11c 100644 Binary files a/recipes/icons/dobreprogamy.png and b/recipes/icons/dobreprogamy.png differ diff --git a/recipes/icons/doghousediaries.png b/recipes/icons/doghousediaries.png index ec648bb390..63b77c7174 100644 Binary files a/recipes/icons/doghousediaries.png and b/recipes/icons/doghousediaries.png differ diff --git a/recipes/icons/dominion.png b/recipes/icons/dominion.png index 314a57d5fb..163db06469 100644 Binary files a/recipes/icons/dominion.png and b/recipes/icons/dominion.png differ diff --git a/recipes/icons/donga.png b/recipes/icons/donga.png index fa732048d2..3fd5552d73 100644 Binary files a/recipes/icons/donga.png and b/recipes/icons/donga.png differ diff --git a/recipes/icons/dorfener_anzeiger.png b/recipes/icons/dorfener_anzeiger.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/dorfener_anzeiger.png and b/recipes/icons/dorfener_anzeiger.png differ diff --git a/recipes/icons/dosisdiarias.png b/recipes/icons/dosisdiarias.png index c71208762b..99426c2d4e 100644 Binary files a/recipes/icons/dosisdiarias.png and b/recipes/icons/dosisdiarias.png differ diff --git a/recipes/icons/dot_net.png b/recipes/icons/dot_net.png index da02148279..c33740196d 100644 Binary files a/recipes/icons/dot_net.png and b/recipes/icons/dot_net.png differ diff --git a/recipes/icons/dotpod.png b/recipes/icons/dotpod.png index 0e395bf4d6..ea78b813b3 100644 Binary files a/recipes/icons/dotpod.png and b/recipes/icons/dotpod.png differ diff --git a/recipes/icons/dunya_bizim.png b/recipes/icons/dunya_bizim.png index 48e6f804a1..9e2927a933 100644 Binary files a/recipes/icons/dunya_bizim.png and b/recipes/icons/dunya_bizim.png differ diff --git a/recipes/icons/dunyahalleri.png b/recipes/icons/dunyahalleri.png index f6dd377ca6..70952e3cfa 100644 Binary files a/recipes/icons/dunyahalleri.png and b/recipes/icons/dunyahalleri.png differ diff --git a/recipes/icons/dunyahalleri_haftaninozeti.png b/recipes/icons/dunyahalleri_haftaninozeti.png index f6dd377ca6..70952e3cfa 100644 Binary files a/recipes/icons/dunyahalleri_haftaninozeti.png and b/recipes/icons/dunyahalleri_haftaninozeti.png differ diff --git a/recipes/icons/dw_de.png b/recipes/icons/dw_de.png deleted file mode 100644 index 0cea86328c..0000000000 Binary files a/recipes/icons/dw_de.png and /dev/null differ diff --git a/recipes/icons/dzieje_pl.png b/recipes/icons/dzieje_pl.png index b61f173a49..6060512004 100644 Binary files a/recipes/icons/dzieje_pl.png and b/recipes/icons/dzieje_pl.png differ diff --git a/recipes/icons/dziennik_polski.png b/recipes/icons/dziennik_polski.png index b5acecb211..ae3cb1b1e9 100644 Binary files a/recipes/icons/dziennik_polski.png and b/recipes/icons/dziennik_polski.png differ diff --git a/recipes/icons/dziennik_zachodni.png b/recipes/icons/dziennik_zachodni.png index c9e2b30d77..45fc21db8c 100644 Binary files a/recipes/icons/dziennik_zachodni.png and b/recipes/icons/dziennik_zachodni.png differ diff --git a/recipes/icons/dziennikzwiazkowy.png b/recipes/icons/dziennikzwiazkowy.png index bc57c74016..df6e617427 100644 Binary files a/recipes/icons/dziennikzwiazkowy.png and b/recipes/icons/dziennikzwiazkowy.png differ diff --git a/recipes/icons/ebeltoftlokalavisen_dk.png b/recipes/icons/ebeltoftlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/ebeltoftlokalavisen_dk.png and b/recipes/icons/ebeltoftlokalavisen_dk.png differ diff --git a/recipes/icons/ebetrsberger_zeitung.png b/recipes/icons/ebetrsberger_zeitung.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/ebetrsberger_zeitung.png and b/recipes/icons/ebetrsberger_zeitung.png differ diff --git a/recipes/icons/echo_moskvy.png b/recipes/icons/echo_moskvy.png index a9fc5287ca..ab8d7bfb59 100644 Binary files a/recipes/icons/echo_moskvy.png and b/recipes/icons/echo_moskvy.png differ diff --git a/recipes/icons/echo_online.png b/recipes/icons/echo_online.png index 50acf24ff3..2f154be9ca 100644 Binary files a/recipes/icons/echo_online.png and b/recipes/icons/echo_online.png differ diff --git a/recipes/icons/economia.png b/recipes/icons/economia.png index cfc055ffc4..54034fefc6 100644 Binary files a/recipes/icons/economia.png and b/recipes/icons/economia.png differ diff --git a/recipes/icons/economist.png b/recipes/icons/economist.png index 8e5a4713e8..7fc33d3842 100644 Binary files a/recipes/icons/economist.png and b/recipes/icons/economist.png differ diff --git a/recipes/icons/economist_espresso.png b/recipes/icons/economist_espresso.png new file mode 100644 index 0000000000..c338e06599 Binary files /dev/null and b/recipes/icons/economist_espresso.png differ diff --git a/recipes/icons/economist_world_ahead.png b/recipes/icons/economist_world_ahead.png new file mode 100644 index 0000000000..7fc33d3842 Binary files /dev/null and b/recipes/icons/economist_world_ahead.png differ diff --git a/recipes/icons/editor_and_publisher.png b/recipes/icons/editor_and_publisher.png index c31650f95a..ad303c9ed2 100644 Binary files a/recipes/icons/editor_and_publisher.png and b/recipes/icons/editor_and_publisher.png differ diff --git a/recipes/icons/editoriali.png b/recipes/icons/editoriali.png index 6013646336..18e35f0395 100644 Binary files a/recipes/icons/editoriali.png and b/recipes/icons/editoriali.png differ diff --git a/recipes/icons/egedallokalavisen_dk.png b/recipes/icons/egedallokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/egedallokalavisen_dk.png and b/recipes/icons/egedallokalavisen_dk.png differ diff --git a/recipes/icons/ekantipur.png b/recipes/icons/ekantipur.png index 1c70341e57..bffbf306b5 100644 Binary files a/recipes/icons/ekantipur.png and b/recipes/icons/ekantipur.png differ diff --git a/recipes/icons/ekathemerini.png b/recipes/icons/ekathemerini.png index 9ccbb1b158..9ab7269a6c 100644 Binary files a/recipes/icons/ekathemerini.png and b/recipes/icons/ekathemerini.png differ diff --git a/recipes/icons/ekologia_pl.png b/recipes/icons/ekologia_pl.png index 47cb447d9c..7f14852e44 100644 Binary files a/recipes/icons/ekologia_pl.png and b/recipes/icons/ekologia_pl.png differ diff --git a/recipes/icons/ekot.png b/recipes/icons/ekot.png index c674a89d24..fc42507a4a 100644 Binary files a/recipes/icons/ekot.png and b/recipes/icons/ekot.png differ diff --git a/recipes/icons/el_colombiano.png b/recipes/icons/el_colombiano.png index 871f76f14e..9e3947961f 100644 Binary files a/recipes/icons/el_colombiano.png and b/recipes/icons/el_colombiano.png differ diff --git a/recipes/icons/el_correo.png b/recipes/icons/el_correo.png index 9eaffd027f..5adaebfe1d 100644 Binary files a/recipes/icons/el_correo.png and b/recipes/icons/el_correo.png differ diff --git a/recipes/icons/el_diario.png b/recipes/icons/el_diario.png index d9b0b016d4..c0b7f9f41f 100644 Binary files a/recipes/icons/el_diario.png and b/recipes/icons/el_diario.png differ diff --git a/recipes/icons/el_diplo.png b/recipes/icons/el_diplo.png index 0092c83a28..1f331e572e 100644 Binary files a/recipes/icons/el_diplo.png and b/recipes/icons/el_diplo.png differ diff --git a/recipes/icons/el_espectador.png b/recipes/icons/el_espectador.png index a1785e99a4..4a7003c1e4 100644 Binary files a/recipes/icons/el_espectador.png and b/recipes/icons/el_espectador.png differ diff --git a/recipes/icons/el_mostrador.png b/recipes/icons/el_mostrador.png index 858ee530f8..e7272687d0 100644 Binary files a/recipes/icons/el_mostrador.png and b/recipes/icons/el_mostrador.png differ diff --git a/recipes/icons/el_mundo_co.png b/recipes/icons/el_mundo_co.png index 9768dc9fbf..a95ea9a2ae 100644 Binary files a/recipes/icons/el_mundo_co.png and b/recipes/icons/el_mundo_co.png differ diff --git a/recipes/icons/el_nacional.png b/recipes/icons/el_nacional.png index 5e684ca4b1..a66ced0883 100644 Binary files a/recipes/icons/el_nacional.png and b/recipes/icons/el_nacional.png differ diff --git a/recipes/icons/el_pais.png b/recipes/icons/el_pais.png index 9dae01be77..b19a42a142 100644 Binary files a/recipes/icons/el_pais.png and b/recipes/icons/el_pais.png differ diff --git a/recipes/icons/el_pais_uy.png b/recipes/icons/el_pais_uy.png index 54552af057..85388bb687 100644 Binary files a/recipes/icons/el_pais_uy.png and b/recipes/icons/el_pais_uy.png differ diff --git a/recipes/icons/el_periodico.png b/recipes/icons/el_periodico.png index a33df8d73d..076cb598c7 100644 Binary files a/recipes/icons/el_periodico.png and b/recipes/icons/el_periodico.png differ diff --git a/recipes/icons/el_publico.png b/recipes/icons/el_publico.png index 5b82306ee3..c47cb56ca0 100644 Binary files a/recipes/icons/el_publico.png and b/recipes/icons/el_publico.png differ diff --git a/recipes/icons/el_tiempo.png b/recipes/icons/el_tiempo.png index fef8a13531..6a7c04ea6a 100644 Binary files a/recipes/icons/el_tiempo.png and b/recipes/icons/el_tiempo.png differ diff --git a/recipes/icons/el_universal.png b/recipes/icons/el_universal.png index 25957571bc..266e50199c 100644 Binary files a/recipes/icons/el_universal.png and b/recipes/icons/el_universal.png differ diff --git a/recipes/icons/elcohetealaluna.png b/recipes/icons/elcohetealaluna.png index f072519564..86f4b6d38a 100644 Binary files a/recipes/icons/elcohetealaluna.png and b/recipes/icons/elcohetealaluna.png differ diff --git a/recipes/icons/elcomercio.png b/recipes/icons/elcomercio.png index 4e0a601b7b..05aa43f329 100644 Binary files a/recipes/icons/elcomercio.png and b/recipes/icons/elcomercio.png differ diff --git a/recipes/icons/elcronista-arg.png b/recipes/icons/elcronista-arg.png index 10df14fb85..ef78913025 100644 Binary files a/recipes/icons/elcronista-arg.png and b/recipes/icons/elcronista-arg.png differ diff --git a/recipes/icons/elektroda_pl.png b/recipes/icons/elektroda_pl.png index 7c7cb8bcb1..4f7e870b62 100644 Binary files a/recipes/icons/elektroda_pl.png and b/recipes/icons/elektroda_pl.png differ diff --git a/recipes/icons/elet_es_irodalom.png b/recipes/icons/elet_es_irodalom.png index 9fdd0d9c4e..b62cb39db2 100644 Binary files a/recipes/icons/elet_es_irodalom.png and b/recipes/icons/elet_es_irodalom.png differ diff --git a/recipes/icons/elmundo.png b/recipes/icons/elmundo.png index eade02bb3c..4fae1dbcbb 100644 Binary files a/recipes/icons/elmundo.png and b/recipes/icons/elmundo.png differ diff --git a/recipes/icons/elperiodico_catalan.png b/recipes/icons/elperiodico_catalan.png index f2c3308105..e99a778ae6 100644 Binary files a/recipes/icons/elperiodico_catalan.png and b/recipes/icons/elperiodico_catalan.png differ diff --git a/recipes/icons/elperiodico_spanish.png b/recipes/icons/elperiodico_spanish.png index f2c3308105..e99a778ae6 100644 Binary files a/recipes/icons/elperiodico_spanish.png and b/recipes/icons/elperiodico_spanish.png differ diff --git a/recipes/icons/elsevier.png b/recipes/icons/elsevier.png index f367370e34..0987f66194 100644 Binary files a/recipes/icons/elsevier.png and b/recipes/icons/elsevier.png differ diff --git a/recipes/icons/eluniverso_ec.png b/recipes/icons/eluniverso_ec.png index f98da8c24a..a1b2ae9a8d 100644 Binary files a/recipes/icons/eluniverso_ec.png and b/recipes/icons/eluniverso_ec.png differ diff --git a/recipes/icons/empire_magazine.png b/recipes/icons/empire_magazine.png index 34ef063072..37dea24d46 100644 Binary files a/recipes/icons/empire_magazine.png and b/recipes/icons/empire_magazine.png differ diff --git a/recipes/icons/entrepeneur.png b/recipes/icons/entrepeneur.png index 9e25569e42..e1b9aed5c5 100644 Binary files a/recipes/icons/entrepeneur.png and b/recipes/icons/entrepeneur.png differ diff --git a/recipes/icons/epoch_times.png b/recipes/icons/epoch_times.png index 13c5c5d7da..4fd12ce8ad 100644 Binary files a/recipes/icons/epoch_times.png and b/recipes/icons/epoch_times.png differ diff --git a/recipes/icons/epw.png b/recipes/icons/epw.png index 7d843d6119..dde185b6e9 100644 Binary files a/recipes/icons/epw.png and b/recipes/icons/epw.png differ diff --git a/recipes/icons/epw_magazine.png b/recipes/icons/epw_magazine.png index 7d843d6119..dde185b6e9 100644 Binary files a/recipes/icons/epw_magazine.png and b/recipes/icons/epw_magazine.png differ diff --git a/recipes/icons/equestria_daily.png b/recipes/icons/equestria_daily.png index 9698976d7f..ec1aacce68 100644 Binary files a/recipes/icons/equestria_daily.png and b/recipes/icons/equestria_daily.png differ diff --git a/recipes/icons/erdinger_anzeiger.png b/recipes/icons/erdinger_anzeiger.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/erdinger_anzeiger.png and b/recipes/icons/erdinger_anzeiger.png differ diff --git a/recipes/icons/esbjerglokalavisen_dk.png b/recipes/icons/esbjerglokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/esbjerglokalavisen_dk.png and b/recipes/icons/esbjerglokalavisen_dk.png differ diff --git a/recipes/icons/esensja_(rss).png b/recipes/icons/esensja_(rss).png index c6d30bfb6b..b441c2d359 100644 Binary files a/recipes/icons/esensja_(rss).png and b/recipes/icons/esensja_(rss).png differ diff --git a/recipes/icons/espn.png b/recipes/icons/espn.png index 4c15a5956e..194f190a48 100644 Binary files a/recipes/icons/espn.png and b/recipes/icons/espn.png differ diff --git a/recipes/icons/estadao.png b/recipes/icons/estadao.png index dc99c5c77e..b77e576f40 100644 Binary files a/recipes/icons/estadao.png and b/recipes/icons/estadao.png differ diff --git a/recipes/icons/eu_commission.png b/recipes/icons/eu_commission.png index 1d949a2473..47d2c00acd 100644 Binary files a/recipes/icons/eu_commission.png and b/recipes/icons/eu_commission.png differ diff --git a/recipes/icons/europa_press.png b/recipes/icons/europa_press.png index d808026225..fe5b338b9e 100644 Binary files a/recipes/icons/europa_press.png and b/recipes/icons/europa_press.png differ diff --git a/recipes/icons/evangelizo.png b/recipes/icons/evangelizo.png index d414317d85..68a6cab905 100644 Binary files a/recipes/icons/evangelizo.png and b/recipes/icons/evangelizo.png differ diff --git a/recipes/icons/everett_herald.png b/recipes/icons/everett_herald.png index 9e54112076..218c370eef 100644 Binary files a/recipes/icons/everett_herald.png and b/recipes/icons/everett_herald.png differ diff --git a/recipes/icons/evz.ro.png b/recipes/icons/evz.ro.png index 80ec458690..2fc04f5e48 100644 Binary files a/recipes/icons/evz.ro.png and b/recipes/icons/evz.ro.png differ diff --git a/recipes/icons/expansion_spanish.png b/recipes/icons/expansion_spanish.png index 3ce4a3de95..ffde87b733 100644 Binary files a/recipes/icons/expansion_spanish.png and b/recipes/icons/expansion_spanish.png differ diff --git a/recipes/icons/explosm.png b/recipes/icons/explosm.png index e82141971e..1d92e62984 100644 Binary files a/recipes/icons/explosm.png and b/recipes/icons/explosm.png differ diff --git a/recipes/icons/express_de.png b/recipes/icons/express_de.png index 57a979ee07..accb699882 100644 Binary files a/recipes/icons/express_de.png and b/recipes/icons/express_de.png differ diff --git a/recipes/icons/f1_ultra.png b/recipes/icons/f1_ultra.png index 5022dd49e9..45565f2788 100644 Binary files a/recipes/icons/f1_ultra.png and b/recipes/icons/f1_ultra.png differ diff --git a/recipes/icons/f_secure.png b/recipes/icons/f_secure.png index 9032fedea8..a36e72ce3d 100644 Binary files a/recipes/icons/f_secure.png and b/recipes/icons/f_secure.png differ diff --git a/recipes/icons/factcheck.png b/recipes/icons/factcheck.png index 6199445464..43544f4fd7 100644 Binary files a/recipes/icons/factcheck.png and b/recipes/icons/factcheck.png differ diff --git a/recipes/icons/fairbanks_daily.png b/recipes/icons/fairbanks_daily.png index ac2835344c..43d4f61e0b 100644 Binary files a/recipes/icons/fairbanks_daily.png and b/recipes/icons/fairbanks_daily.png differ diff --git a/recipes/icons/fan_graphs.png b/recipes/icons/fan_graphs.png index ea6a43fc85..c144a2cc4b 100644 Binary files a/recipes/icons/fan_graphs.png and b/recipes/icons/fan_graphs.png differ diff --git a/recipes/icons/fastcompany.png b/recipes/icons/fastcompany.png index 57e350f7bb..0cbc51b7ad 100644 Binary files a/recipes/icons/fastcompany.png and b/recipes/icons/fastcompany.png differ diff --git a/recipes/icons/favrskovlokalavisen_dk.png b/recipes/icons/favrskovlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/favrskovlokalavisen_dk.png and b/recipes/icons/favrskovlokalavisen_dk.png differ diff --git a/recipes/icons/faznet.png b/recipes/icons/faznet.png index e3da074559..8f6e9b2dad 100644 Binary files a/recipes/icons/faznet.png and b/recipes/icons/faznet.png differ diff --git a/recipes/icons/fc_knudde.png b/recipes/icons/fc_knudde.png index e358cefac3..8253e6eb27 100644 Binary files a/recipes/icons/fc_knudde.png and b/recipes/icons/fc_knudde.png differ diff --git a/recipes/icons/fdb_pl.png b/recipes/icons/fdb_pl.png index 0ae3bb87b1..a6bb59e9bb 100644 Binary files a/recipes/icons/fdb_pl.png and b/recipes/icons/fdb_pl.png differ diff --git a/recipes/icons/fhm_uk.png b/recipes/icons/fhm_uk.png index 7efbca60f7..d62dcedf26 100644 Binary files a/recipes/icons/fhm_uk.png and b/recipes/icons/fhm_uk.png differ diff --git a/recipes/icons/fhmro.png b/recipes/icons/fhmro.png index 42a9b63242..d95c1fd06c 100644 Binary files a/recipes/icons/fhmro.png and b/recipes/icons/fhmro.png differ diff --git a/recipes/icons/fifty_two.png b/recipes/icons/fifty_two.png new file mode 100644 index 0000000000..48923e874d Binary files /dev/null and b/recipes/icons/fifty_two.png differ diff --git a/recipes/icons/film_web.png b/recipes/icons/film_web.png index 1791a862ba..b8367c936c 100644 Binary files a/recipes/icons/film_web.png and b/recipes/icons/film_web.png differ diff --git a/recipes/icons/financial_times.png b/recipes/icons/financial_times.png index 553a9d5155..7681f5bcfc 100644 Binary files a/recipes/icons/financial_times.png and b/recipes/icons/financial_times.png differ diff --git a/recipes/icons/financial_times_print_edition.png b/recipes/icons/financial_times_print_edition.png deleted file mode 100644 index 553a9d5155..0000000000 Binary files a/recipes/icons/financial_times_print_edition.png and /dev/null differ diff --git a/recipes/icons/financieele_dagblad.png b/recipes/icons/financieele_dagblad.png index cd902244c4..c676576a18 100644 Binary files a/recipes/icons/financieele_dagblad.png and b/recipes/icons/financieele_dagblad.png differ diff --git a/recipes/icons/firstpost.png b/recipes/icons/firstpost.png new file mode 100644 index 0000000000..693d6365d1 Binary files /dev/null and b/recipes/icons/firstpost.png differ diff --git a/recipes/icons/flickr.png b/recipes/icons/flickr.png index a0e6b14bdd..dd397eede3 100644 Binary files a/recipes/icons/flickr.png and b/recipes/icons/flickr.png differ diff --git a/recipes/icons/flickr_es.png b/recipes/icons/flickr_es.png index a0e6b14bdd..dd397eede3 100644 Binary files a/recipes/icons/flickr_es.png and b/recipes/icons/flickr_es.png differ diff --git a/recipes/icons/focus_de.png b/recipes/icons/focus_de.png index 5e9e879346..b8cd4375f1 100644 Binary files a/recipes/icons/focus_de.png and b/recipes/icons/focus_de.png differ diff --git a/recipes/icons/fokkeensukke.png b/recipes/icons/fokkeensukke.png index 66acde67de..c67d58926b 100644 Binary files a/recipes/icons/fokkeensukke.png and b/recipes/icons/fokkeensukke.png differ diff --git a/recipes/icons/folhadesaopaulo.png b/recipes/icons/folhadesaopaulo.png index 6fa1ab6097..2373341a66 100644 Binary files a/recipes/icons/folhadesaopaulo.png and b/recipes/icons/folhadesaopaulo.png differ diff --git a/recipes/icons/folhadesaopaulo_sub.png b/recipes/icons/folhadesaopaulo_sub.png index 7b96e3291b..da53218eb9 100644 Binary files a/recipes/icons/folhadesaopaulo_sub.png and b/recipes/icons/folhadesaopaulo_sub.png differ diff --git a/recipes/icons/folkebladet_dk.png b/recipes/icons/folkebladet_dk.png index a2fcf5f6d9..1bae425939 100644 Binary files a/recipes/icons/folkebladet_dk.png and b/recipes/icons/folkebladet_dk.png differ diff --git a/recipes/icons/folketidende_dk.png b/recipes/icons/folketidende_dk.png index 51aa1b0d8b..e85e348b6a 100644 Binary files a/recipes/icons/folketidende_dk.png and b/recipes/icons/folketidende_dk.png differ diff --git a/recipes/icons/fontanka.png b/recipes/icons/fontanka.png index fa2990d1a4..3278e919df 100644 Binary files a/recipes/icons/fontanka.png and b/recipes/icons/fontanka.png differ diff --git a/recipes/icons/fooballua.png b/recipes/icons/fooballua.png new file mode 100644 index 0000000000..c9bdaf260c Binary files /dev/null and b/recipes/icons/fooballua.png differ diff --git a/recipes/icons/forbes.png b/recipes/icons/forbes.png index 8d06cdb090..80ac1805c7 100644 Binary files a/recipes/icons/forbes.png and b/recipes/icons/forbes.png differ diff --git a/recipes/icons/foreign_policy.png b/recipes/icons/foreign_policy.png index bde57efe31..dad44f5a46 100644 Binary files a/recipes/icons/foreign_policy.png and b/recipes/icons/foreign_policy.png differ diff --git a/recipes/icons/foxnews.png b/recipes/icons/foxnews.png index c9934b9151..dbd2c15584 100644 Binary files a/recipes/icons/foxnews.png and b/recipes/icons/foxnews.png differ diff --git a/recipes/icons/fr_online.png b/recipes/icons/fr_online.png index 0a3a94ae93..6752828ad5 100644 Binary files a/recipes/icons/fr_online.png and b/recipes/icons/fr_online.png differ diff --git a/recipes/icons/frandroid.png b/recipes/icons/frandroid.png index fdc932e4b9..ecc40f7a42 100644 Binary files a/recipes/icons/frandroid.png and b/recipes/icons/frandroid.png differ diff --git a/recipes/icons/frankfurter_rundschau.png b/recipes/icons/frankfurter_rundschau.png index 0a3a94ae93..6752828ad5 100644 Binary files a/recipes/icons/frankfurter_rundschau.png and b/recipes/icons/frankfurter_rundschau.png differ diff --git a/recipes/icons/freakonomics.png b/recipes/icons/freakonomics.png index 1d5855bfd8..16f623a725 100644 Binary files a/recipes/icons/freakonomics.png and b/recipes/icons/freakonomics.png differ diff --git a/recipes/icons/fredensborglokalavisen_dk.png b/recipes/icons/fredensborglokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/fredensborglokalavisen_dk.png and b/recipes/icons/fredensborglokalavisen_dk.png differ diff --git a/recipes/icons/fredericialokalavisen_dk.png b/recipes/icons/fredericialokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/fredericialokalavisen_dk.png and b/recipes/icons/fredericialokalavisen_dk.png differ diff --git a/recipes/icons/frederiksbergbladet_dk.png b/recipes/icons/frederiksbergbladet_dk.png index 05f107580a..6fc915ca3d 100644 Binary files a/recipes/icons/frederiksbergbladet_dk.png and b/recipes/icons/frederiksbergbladet_dk.png differ diff --git a/recipes/icons/frederikssundlokalavisen_dk.png b/recipes/icons/frederikssundlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/frederikssundlokalavisen_dk.png and b/recipes/icons/frederikssundlokalavisen_dk.png differ diff --git a/recipes/icons/free_inquiry.png b/recipes/icons/free_inquiry.png index cca09d8d07..7c22d29008 100644 Binary files a/recipes/icons/free_inquiry.png and b/recipes/icons/free_inquiry.png differ diff --git a/recipes/icons/freenature.png b/recipes/icons/freenature.png index 1910f9451c..17ca46d236 100644 Binary files a/recipes/icons/freenature.png and b/recipes/icons/freenature.png differ diff --git a/recipes/icons/freisinger_tagblatt.png b/recipes/icons/freisinger_tagblatt.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/freisinger_tagblatt.png and b/recipes/icons/freisinger_tagblatt.png differ diff --git a/recipes/icons/frontline.png b/recipes/icons/frontline.png index ef8cee3012..04b9ace9cf 100644 Binary files a/recipes/icons/frontline.png and b/recipes/icons/frontline.png differ diff --git a/recipes/icons/fstream.png b/recipes/icons/fstream.png index 556d6cde47..4e7fa48815 100644 Binary files a/recipes/icons/fstream.png and b/recipes/icons/fstream.png differ diff --git a/recipes/icons/fudzilla.png b/recipes/icons/fudzilla.png index b61d5e7320..f78223e842 100644 Binary files a/recipes/icons/fudzilla.png and b/recipes/icons/fudzilla.png differ diff --git a/recipes/icons/furesoelokalavisen_dk.png b/recipes/icons/furesoelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/furesoelokalavisen_dk.png and b/recipes/icons/furesoelokalavisen_dk.png differ diff --git a/recipes/icons/furstenfeldbrucker_tagblatt.png b/recipes/icons/furstenfeldbrucker_tagblatt.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/furstenfeldbrucker_tagblatt.png and b/recipes/icons/furstenfeldbrucker_tagblatt.png differ diff --git a/recipes/icons/galicia_confidential.png b/recipes/icons/galicia_confidential.png index f45be5f33c..2f191b97f4 100644 Binary files a/recipes/icons/galicia_confidential.png and b/recipes/icons/galicia_confidential.png differ diff --git a/recipes/icons/gamekult.png b/recipes/icons/gamekult.png index 99502d0ccb..84bb5bb32f 100644 Binary files a/recipes/icons/gamekult.png and b/recipes/icons/gamekult.png differ diff --git a/recipes/icons/gamespot.png b/recipes/icons/gamespot.png index 2f2be9b45e..37eb696399 100644 Binary files a/recipes/icons/gamespot.png and b/recipes/icons/gamespot.png differ diff --git a/recipes/icons/gandul.png b/recipes/icons/gandul.png index ba50b7b4bc..b4cb8ba2c5 100644 Binary files a/recipes/icons/gandul.png and b/recipes/icons/gandul.png differ diff --git a/recipes/icons/garmischer_tagblatt.png b/recipes/icons/garmischer_tagblatt.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/garmischer_tagblatt.png and b/recipes/icons/garmischer_tagblatt.png differ diff --git a/recipes/icons/gazeta_krakowska.png b/recipes/icons/gazeta_krakowska.png index fc88052b42..15676ce35e 100644 Binary files a/recipes/icons/gazeta_krakowska.png and b/recipes/icons/gazeta_krakowska.png differ diff --git a/recipes/icons/gazeta_lubuska.png b/recipes/icons/gazeta_lubuska.png index 2706cfffdd..50c46dc980 100644 Binary files a/recipes/icons/gazeta_lubuska.png and b/recipes/icons/gazeta_lubuska.png differ diff --git a/recipes/icons/gazeta_wspolczesna.png b/recipes/icons/gazeta_wspolczesna.png index f2f1eb82e4..31a38235f7 100644 Binary files a/recipes/icons/gazeta_wspolczesna.png and b/recipes/icons/gazeta_wspolczesna.png differ diff --git a/recipes/icons/gazetaua_ru.png b/recipes/icons/gazetaua_ru.png index bc8402a93b..05de7e0c20 100644 Binary files a/recipes/icons/gazetaua_ru.png and b/recipes/icons/gazetaua_ru.png differ diff --git a/recipes/icons/gazetaua_ua.png b/recipes/icons/gazetaua_ua.png index bc8402a93b..05de7e0c20 100644 Binary files a/recipes/icons/gazetaua_ua.png and b/recipes/icons/gazetaua_ua.png differ diff --git a/recipes/icons/gcn.png b/recipes/icons/gcn.png index 0951dd588b..e5bdfe87e8 100644 Binary files a/recipes/icons/gcn.png and b/recipes/icons/gcn.png differ diff --git a/recipes/icons/geek_poke.png b/recipes/icons/geek_poke.png index 1be49bc8a9..8d4b1dca9a 100644 Binary files a/recipes/icons/geek_poke.png and b/recipes/icons/geek_poke.png differ diff --git a/recipes/icons/gentoftelokalavisen_dk.png b/recipes/icons/gentoftelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/gentoftelokalavisen_dk.png and b/recipes/icons/gentoftelokalavisen_dk.png differ diff --git a/recipes/icons/geretsrieder_merkur.png b/recipes/icons/geretsrieder_merkur.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/geretsrieder_merkur.png and b/recipes/icons/geretsrieder_merkur.png differ diff --git a/recipes/icons/german_gov.png b/recipes/icons/german_gov.png index 0eedb87311..0b2245ca4a 100644 Binary files a/recipes/icons/german_gov.png and b/recipes/icons/german_gov.png differ diff --git a/recipes/icons/gezgin_dergi.png b/recipes/icons/gezgin_dergi.png index 36b77142fc..4688fc237f 100644 Binary files a/recipes/icons/gezgin_dergi.png and b/recipes/icons/gezgin_dergi.png differ diff --git a/recipes/icons/gizmodo.png b/recipes/icons/gizmodo.png index a0678cf317..02ba202d47 100644 Binary files a/recipes/icons/gizmodo.png and b/recipes/icons/gizmodo.png differ diff --git a/recipes/icons/glamour.png b/recipes/icons/glamour.png index d64144f56b..130d477afd 100644 Binary files a/recipes/icons/glamour.png and b/recipes/icons/glamour.png differ diff --git a/recipes/icons/glenn_greenwald.png b/recipes/icons/glenn_greenwald.png index f629c5475e..9546e15b09 100644 Binary files a/recipes/icons/glenn_greenwald.png and b/recipes/icons/glenn_greenwald.png differ diff --git a/recipes/icons/glennbeck.png b/recipes/icons/glennbeck.png index cfd131f95e..75cafd494e 100644 Binary files a/recipes/icons/glennbeck.png and b/recipes/icons/glennbeck.png differ diff --git a/recipes/icons/globaltimes.png b/recipes/icons/globaltimes.png index 5c44613c0f..198d4a39d1 100644 Binary files a/recipes/icons/globaltimes.png and b/recipes/icons/globaltimes.png differ diff --git a/recipes/icons/globe_and_mail.png b/recipes/icons/globe_and_mail.png index 4cfdaa28b7..3cc6024913 100644 Binary files a/recipes/icons/globe_and_mail.png and b/recipes/icons/globe_and_mail.png differ diff --git a/recipes/icons/go_comics.png b/recipes/icons/go_comics.png index f3c9cbdbd4..ad8b4ecb15 100644 Binary files a/recipes/icons/go_comics.png and b/recipes/icons/go_comics.png differ diff --git a/recipes/icons/goal.png b/recipes/icons/goal.png index 5c293b9a8e..3d828d0838 100644 Binary files a/recipes/icons/goal.png and b/recipes/icons/goal.png differ diff --git a/recipes/icons/gofin_pl.png b/recipes/icons/gofin_pl.png index 7231bbd720..6efbb2a7af 100644 Binary files a/recipes/icons/gofin_pl.png and b/recipes/icons/gofin_pl.png differ diff --git a/recipes/icons/golem_de.png b/recipes/icons/golem_de.png index a40c31353b..0af79ba099 100644 Binary files a/recipes/icons/golem_de.png and b/recipes/icons/golem_de.png differ diff --git a/recipes/icons/good_to_know.png b/recipes/icons/good_to_know.png index 390fa627b0..256f3647d9 100644 Binary files a/recipes/icons/good_to_know.png and b/recipes/icons/good_to_know.png differ diff --git a/recipes/icons/google_news.png b/recipes/icons/google_news.png index 879ed953aa..8b405e9021 100644 Binary files a/recipes/icons/google_news.png and b/recipes/icons/google_news.png differ diff --git a/recipes/icons/googlemobileblog.png b/recipes/icons/googlemobileblog.png index cd50e6f04b..af6a32434e 100644 Binary files a/recipes/icons/googlemobileblog.png and b/recipes/icons/googlemobileblog.png differ diff --git a/recipes/icons/gorky.png b/recipes/icons/gorky.png index d1ce5a7f51..d901dd936a 100644 Binary files a/recipes/icons/gorky.png and b/recipes/icons/gorky.png differ diff --git a/recipes/icons/grani.png b/recipes/icons/grani.png index 36f8146a19..dc5dcd9704 100644 Binary files a/recipes/icons/grani.png and b/recipes/icons/grani.png differ diff --git a/recipes/icons/granma.png b/recipes/icons/granma.png index be09e8024b..31a52f618a 100644 Binary files a/recipes/icons/granma.png and b/recipes/icons/granma.png differ diff --git a/recipes/icons/granta.png b/recipes/icons/granta.png index 64261e6f97..65328de7f2 100644 Binary files a/recipes/icons/granta.png and b/recipes/icons/granta.png differ diff --git a/recipes/icons/grantland.png b/recipes/icons/grantland.png index 7b1b22f8f8..dbc430e264 100644 Binary files a/recipes/icons/grantland.png and b/recipes/icons/grantland.png differ diff --git a/recipes/icons/greensboro_news_and_record.png b/recipes/icons/greensboro_news_and_record.png index 0d2b8c0182..f9afe6a5a8 100644 Binary files a/recipes/icons/greensboro_news_and_record.png and b/recipes/icons/greensboro_news_and_record.png differ diff --git a/recipes/icons/grenaalokalavisen_dk.png b/recipes/icons/grenaalokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/grenaalokalavisen_dk.png and b/recipes/icons/grenaalokalavisen_dk.png differ diff --git a/recipes/icons/gribskovlokalavisen_dk.png b/recipes/icons/gribskovlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/gribskovlokalavisen_dk.png and b/recipes/icons/gribskovlokalavisen_dk.png differ diff --git a/recipes/icons/grrm.png b/recipes/icons/grrm.png index e66a3be8c7..819b87e8c2 100644 Binary files a/recipes/icons/grrm.png and b/recipes/icons/grrm.png differ diff --git a/recipes/icons/gs24_pl.png b/recipes/icons/gs24_pl.png index 67f4097738..0be176e730 100644 Binary files a/recipes/icons/gs24_pl.png and b/recipes/icons/gs24_pl.png differ diff --git a/recipes/icons/guardian.png b/recipes/icons/guardian.png index b183b3efa4..a683f41a1b 100644 Binary files a/recipes/icons/guardian.png and b/recipes/icons/guardian.png differ diff --git a/recipes/icons/gulfnews.png b/recipes/icons/gulfnews.png index 208bd55f2f..8e4bbc3c4f 100644 Binary files a/recipes/icons/gulfnews.png and b/recipes/icons/gulfnews.png differ diff --git a/recipes/icons/gwup.png b/recipes/icons/gwup.png index e6c8258cde..46529cb072 100644 Binary files a/recipes/icons/gwup.png and b/recipes/icons/gwup.png differ diff --git a/recipes/icons/habr.png b/recipes/icons/habr.png index ad1f85a4e8..869425d86f 100644 Binary files a/recipes/icons/habr.png and b/recipes/icons/habr.png differ diff --git a/recipes/icons/habr_ru.png b/recipes/icons/habr_ru.png index ad1f85a4e8..869425d86f 100644 Binary files a/recipes/icons/habr_ru.png and b/recipes/icons/habr_ru.png differ diff --git a/recipes/icons/haderslevlokalavisen_dk.png b/recipes/icons/haderslevlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/haderslevlokalavisen_dk.png and b/recipes/icons/haderslevlokalavisen_dk.png differ diff --git a/recipes/icons/haksoz.png b/recipes/icons/haksoz.png index 8fed62e5d4..2ca8e0a46a 100644 Binary files a/recipes/icons/haksoz.png and b/recipes/icons/haksoz.png differ diff --git a/recipes/icons/halsnaeslokalavisen_dk.png b/recipes/icons/halsnaeslokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/halsnaeslokalavisen_dk.png and b/recipes/icons/halsnaeslokalavisen_dk.png differ diff --git a/recipes/icons/hamilton_spectator.png b/recipes/icons/hamilton_spectator.png index 6ca3e98089..8c6db18d9d 100644 Binary files a/recipes/icons/hamilton_spectator.png and b/recipes/icons/hamilton_spectator.png differ diff --git a/recipes/icons/handelsblatt.png b/recipes/icons/handelsblatt.png index 2dbe699f1e..60ef6d78e0 100644 Binary files a/recipes/icons/handelsblatt.png and b/recipes/icons/handelsblatt.png differ diff --git a/recipes/icons/hankyoreh.png b/recipes/icons/hankyoreh.png index 5610d7a7ff..5f8721276b 100644 Binary files a/recipes/icons/hankyoreh.png and b/recipes/icons/hankyoreh.png differ diff --git a/recipes/icons/hankyoreh21.png b/recipes/icons/hankyoreh21.png index 5084f3309a..b2bb4b7b01 100644 Binary files a/recipes/icons/hankyoreh21.png and b/recipes/icons/hankyoreh21.png differ diff --git a/recipes/icons/hannoversche_zeitung.png b/recipes/icons/hannoversche_zeitung.png index 36e85c7ae2..34b24775f0 100644 Binary files a/recipes/icons/hannoversche_zeitung.png and b/recipes/icons/hannoversche_zeitung.png differ diff --git a/recipes/icons/harpers.png b/recipes/icons/harpers.png index e0dff06203..1e37988d7c 100644 Binary files a/recipes/icons/harpers.png and b/recipes/icons/harpers.png differ diff --git a/recipes/icons/harpers_full.png b/recipes/icons/harpers_full.png index e0dff06203..1e37988d7c 100644 Binary files a/recipes/icons/harpers_full.png and b/recipes/icons/harpers_full.png differ diff --git a/recipes/icons/heavy_metal_it.png b/recipes/icons/heavy_metal_it.png index e6c8258cde..46529cb072 100644 Binary files a/recipes/icons/heavy_metal_it.png and b/recipes/icons/heavy_metal_it.png differ diff --git a/recipes/icons/heise_ct.png b/recipes/icons/heise_ct.png index f59db714ee..27418ca722 100644 Binary files a/recipes/icons/heise_ct.png and b/recipes/icons/heise_ct.png differ diff --git a/recipes/icons/heise_ix.png b/recipes/icons/heise_ix.png index f59db714ee..27418ca722 100644 Binary files a/recipes/icons/heise_ix.png and b/recipes/icons/heise_ix.png differ diff --git a/recipes/icons/heise_open.png b/recipes/icons/heise_open.png index f59db714ee..27418ca722 100644 Binary files a/recipes/icons/heise_open.png and b/recipes/icons/heise_open.png differ diff --git a/recipes/icons/helsingin_sanomat.png b/recipes/icons/helsingin_sanomat.png index 25042f9cb5..97ad6a8755 100644 Binary files a/recipes/icons/helsingin_sanomat.png and b/recipes/icons/helsingin_sanomat.png differ diff --git a/recipes/icons/high_country_news.png b/recipes/icons/high_country_news.png index b53f46f5ab..e87bb8fa55 100644 Binary files a/recipes/icons/high_country_news.png and b/recipes/icons/high_country_news.png differ diff --git a/recipes/icons/himal_southasian.png b/recipes/icons/himal_southasian.png new file mode 100644 index 0000000000..ba9d16bf1d Binary files /dev/null and b/recipes/icons/himal_southasian.png differ diff --git a/recipes/icons/hindu.png b/recipes/icons/hindu.png index c51a4fa5ed..07c4828a31 100644 Binary files a/recipes/icons/hindu.png and b/recipes/icons/hindu.png differ diff --git a/recipes/icons/hindu_business_line.png b/recipes/icons/hindu_business_line.png index 7c75da753b..94791ebf0d 100644 Binary files a/recipes/icons/hindu_business_line.png and b/recipes/icons/hindu_business_line.png differ diff --git a/recipes/icons/hindu_business_line_print_edition.png b/recipes/icons/hindu_business_line_print_edition.png index 7c75da753b..94791ebf0d 100644 Binary files a/recipes/icons/hindu_business_line_print_edition.png and b/recipes/icons/hindu_business_line_print_edition.png differ diff --git a/recipes/icons/hindu_post.png b/recipes/icons/hindu_post.png index 093adbb6f1..39e4c60027 100644 Binary files a/recipes/icons/hindu_post.png and b/recipes/icons/hindu_post.png differ diff --git a/recipes/icons/hindufeeds.png b/recipes/icons/hindufeeds.png new file mode 100644 index 0000000000..07c4828a31 Binary files /dev/null and b/recipes/icons/hindufeeds.png differ diff --git a/recipes/icons/hinduism_today.png b/recipes/icons/hinduism_today.png index 7f78929ebd..ca3886b34a 100644 Binary files a/recipes/icons/hinduism_today.png and b/recipes/icons/hinduism_today.png differ diff --git a/recipes/icons/hindustan_times.png b/recipes/icons/hindustan_times.png index 0ae964147c..01ab8981a5 100644 Binary files a/recipes/icons/hindustan_times.png and b/recipes/icons/hindustan_times.png differ diff --git a/recipes/icons/hindustan_times_print.png b/recipes/icons/hindustan_times_print.png new file mode 100644 index 0000000000..01ab8981a5 Binary files /dev/null and b/recipes/icons/hindustan_times_print.png differ diff --git a/recipes/icons/hindutamil.png b/recipes/icons/hindutamil.png new file mode 100644 index 0000000000..029c72b471 Binary files /dev/null and b/recipes/icons/hindutamil.png differ diff --git a/recipes/icons/history_today.png b/recipes/icons/history_today.png index 1de9424c2a..cb9ff5bf45 100644 Binary files a/recipes/icons/history_today.png and b/recipes/icons/history_today.png differ diff --git a/recipes/icons/hnonline.png b/recipes/icons/hnonline.png index 2b51e42dda..b0df23d5f1 100644 Binary files a/recipes/icons/hnonline.png and b/recipes/icons/hnonline.png differ diff --git a/recipes/icons/hoersholmlokalavisen_dk.png b/recipes/icons/hoersholmlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/hoersholmlokalavisen_dk.png and b/recipes/icons/hoersholmlokalavisen_dk.png differ diff --git a/recipes/icons/holzkirchener_merkur.png b/recipes/icons/holzkirchener_merkur.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/holzkirchener_merkur.png and b/recipes/icons/holzkirchener_merkur.png differ diff --git a/recipes/icons/horizons.png b/recipes/icons/horizons.png new file mode 100644 index 0000000000..28f98997f0 Binary files /dev/null and b/recipes/icons/horizons.png differ diff --git a/recipes/icons/hornsherredlokalavisen_dk.png b/recipes/icons/hornsherredlokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/hornsherredlokalavisen_dk.png and b/recipes/icons/hornsherredlokalavisen_dk.png differ diff --git a/recipes/icons/hotnews.png b/recipes/icons/hotnews.png index 8eebaba294..9e4deeb436 100644 Binary files a/recipes/icons/hotnews.png and b/recipes/icons/hotnews.png differ diff --git a/recipes/icons/howtogeek.png b/recipes/icons/howtogeek.png index 0e7fbbfa53..03d412ffa0 100644 Binary files a/recipes/icons/howtogeek.png and b/recipes/icons/howtogeek.png differ diff --git a/recipes/icons/hoy.png b/recipes/icons/hoy.png index 1ed6a26ac4..64e33ef0d4 100644 Binary files a/recipes/icons/hoy.png and b/recipes/icons/hoy.png differ diff --git a/recipes/icons/huffingtonpost.png b/recipes/icons/huffingtonpost.png index 5b96f55dfd..0bd11bd96f 100644 Binary files a/recipes/icons/huffingtonpost.png and b/recipes/icons/huffingtonpost.png differ diff --git a/recipes/icons/huffingtonpost_uk.png b/recipes/icons/huffingtonpost_uk.png index 46a3fbc680..392cf8b710 100644 Binary files a/recipes/icons/huffingtonpost_uk.png and b/recipes/icons/huffingtonpost_uk.png differ diff --git a/recipes/icons/hvg.png b/recipes/icons/hvg.png index a9e30e735b..cbc6ebd2d1 100644 Binary files a/recipes/icons/hvg.png and b/recipes/icons/hvg.png differ diff --git a/recipes/icons/hvidovrelokalavisen_dk.png b/recipes/icons/hvidovrelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/hvidovrelokalavisen_dk.png and b/recipes/icons/hvidovrelokalavisen_dk.png differ diff --git a/recipes/icons/id_pixel.png b/recipes/icons/id_pixel.png index e2de47e666..5ccbacee05 100644 Binary files a/recipes/icons/id_pixel.png and b/recipes/icons/id_pixel.png differ diff --git a/recipes/icons/ideal_almeria.png b/recipes/icons/ideal_almeria.png index 90b5e85b4a..022305f78b 100644 Binary files a/recipes/icons/ideal_almeria.png and b/recipes/icons/ideal_almeria.png differ diff --git a/recipes/icons/ideal_granada.png b/recipes/icons/ideal_granada.png index 90b5e85b4a..022305f78b 100644 Binary files a/recipes/icons/ideal_granada.png and b/recipes/icons/ideal_granada.png differ diff --git a/recipes/icons/ideal_jaen.png b/recipes/icons/ideal_jaen.png index 90b5e85b4a..022305f78b 100644 Binary files a/recipes/icons/ideal_jaen.png and b/recipes/icons/ideal_jaen.png differ diff --git a/recipes/icons/idg_se.png b/recipes/icons/idg_se.png index 8dbfa687ed..f914292957 100644 Binary files a/recipes/icons/idg_se.png and b/recipes/icons/idg_se.png differ diff --git a/recipes/icons/idnes.png b/recipes/icons/idnes.png index 11b517979c..1ad49c7459 100644 Binary files a/recipes/icons/idnes.png and b/recipes/icons/idnes.png differ diff --git a/recipes/icons/ifzm.png b/recipes/icons/ifzm.png new file mode 100644 index 0000000000..1e660b7409 Binary files /dev/null and b/recipes/icons/ifzm.png differ diff --git a/recipes/icons/iht.png b/recipes/icons/iht.png index 8cbaf6d618..a9d598da97 100644 Binary files a/recipes/icons/iht.png and b/recipes/icons/iht.png differ diff --git a/recipes/icons/iktibas.png b/recipes/icons/iktibas.png index 9380a44775..c448285f5f 100644 Binary files a/recipes/icons/iktibas.png and b/recipes/icons/iktibas.png differ diff --git a/recipes/icons/il_cambiamento.png b/recipes/icons/il_cambiamento.png index 85c8f19970..dff897c024 100644 Binary files a/recipes/icons/il_cambiamento.png and b/recipes/icons/il_cambiamento.png differ diff --git a/recipes/icons/il_giornale.png b/recipes/icons/il_giornale.png index c1b16163fa..c99857a233 100644 Binary files a/recipes/icons/il_giornale.png and b/recipes/icons/il_giornale.png differ diff --git a/recipes/icons/il_messaggero.png b/recipes/icons/il_messaggero.png index 392b9019ae..5232cc63f3 100644 Binary files a/recipes/icons/il_messaggero.png and b/recipes/icons/il_messaggero.png differ diff --git a/recipes/icons/il_post.png b/recipes/icons/il_post.png index 7e8dcb8ed7..2dce3f1e3a 100644 Binary files a/recipes/icons/il_post.png and b/recipes/icons/il_post.png differ diff --git a/recipes/icons/iliteratura_cz.png b/recipes/icons/iliteratura_cz.png index 5a762fb704..9b7784fe67 100644 Binary files a/recipes/icons/iliteratura_cz.png and b/recipes/icons/iliteratura_cz.png differ diff --git a/recipes/icons/ilsole24ore.png b/recipes/icons/ilsole24ore.png index 9e1aec5cab..4fe5d25a36 100644 Binary files a/recipes/icons/ilsole24ore.png and b/recipes/icons/ilsole24ore.png differ diff --git a/recipes/icons/impulse_de.png b/recipes/icons/impulse_de.png index 56c89a206b..f76dcb7542 100644 Binary files a/recipes/icons/impulse_de.png and b/recipes/icons/impulse_de.png differ diff --git a/recipes/icons/in_gr.png b/recipes/icons/in_gr.png index 40b9b231ac..f8a357efdf 100644 Binary files a/recipes/icons/in_gr.png and b/recipes/icons/in_gr.png differ diff --git a/recipes/icons/inc42.png b/recipes/icons/inc42.png new file mode 100644 index 0000000000..9edc13d383 Binary files /dev/null and b/recipes/icons/inc42.png differ diff --git a/recipes/icons/independent_australia.png b/recipes/icons/independent_australia.png index 5091d43d99..a7765401b3 100644 Binary files a/recipes/icons/independent_australia.png and b/recipes/icons/independent_australia.png differ diff --git a/recipes/icons/india_legal_magazine.png b/recipes/icons/india_legal_magazine.png index 1aca792d8d..4396210e8c 100644 Binary files a/recipes/icons/india_legal_magazine.png and b/recipes/icons/india_legal_magazine.png differ diff --git a/recipes/icons/india_speaks_reddit.png b/recipes/icons/india_speaks_reddit.png index 32d870af3b..397879fe93 100644 Binary files a/recipes/icons/india_speaks_reddit.png and b/recipes/icons/india_speaks_reddit.png differ diff --git a/recipes/icons/indian_express.png b/recipes/icons/indian_express.png index 5b6ff374c8..e389cae4f9 100644 Binary files a/recipes/icons/indian_express.png and b/recipes/icons/indian_express.png differ diff --git a/recipes/icons/indic_today.png b/recipes/icons/indic_today.png index dc71eb826a..a269916eeb 100644 Binary files a/recipes/icons/indic_today.png and b/recipes/icons/indic_today.png differ diff --git a/recipes/icons/indy_star.png b/recipes/icons/indy_star.png index 43c79bd08c..59769c64c6 100644 Binary files a/recipes/icons/indy_star.png and b/recipes/icons/indy_star.png differ diff --git a/recipes/icons/infomotori.png b/recipes/icons/infomotori.png index 2edbb08f29..2146ac4e6b 100644 Binary files a/recipes/icons/infomotori.png and b/recipes/icons/infomotori.png differ diff --git a/recipes/icons/information_dk.png b/recipes/icons/information_dk.png index f7e72d5c80..5224c20341 100644 Binary files a/recipes/icons/information_dk.png and b/recipes/icons/information_dk.png differ diff --git a/recipes/icons/ing_dk.png b/recipes/icons/ing_dk.png index 0ce51091d2..82c5d35b98 100644 Binary files a/recipes/icons/ing_dk.png and b/recipes/icons/ing_dk.png differ diff --git a/recipes/icons/insan_okur.png b/recipes/icons/insan_okur.png index afb4b5f1ae..9ef1c79a72 100644 Binary files a/recipes/icons/insan_okur.png and b/recipes/icons/insan_okur.png differ diff --git a/recipes/icons/insider.png b/recipes/icons/insider.png index 02da9d64a3..b791c89e18 100644 Binary files a/recipes/icons/insider.png and b/recipes/icons/insider.png differ diff --git a/recipes/icons/instapaper.png b/recipes/icons/instapaper.png index ea12926375..9819bb25e2 100644 Binary files a/recipes/icons/instapaper.png and b/recipes/icons/instapaper.png differ diff --git a/recipes/icons/intelligencer.png b/recipes/icons/intelligencer.png index 1e316305d9..e75c280ca1 100644 Binary files a/recipes/icons/intelligencer.png and b/recipes/icons/intelligencer.png differ diff --git a/recipes/icons/interfax.png b/recipes/icons/interfax.png index c4d39186fd..43a460fa39 100644 Binary files a/recipes/icons/interfax.png and b/recipes/icons/interfax.png differ diff --git a/recipes/icons/interfax_ua.png b/recipes/icons/interfax_ua.png index 1bc28f5f77..2dcfbe8f86 100644 Binary files a/recipes/icons/interfax_ua.png and b/recipes/icons/interfax_ua.png differ diff --git a/recipes/icons/interfax_uk.png b/recipes/icons/interfax_uk.png index c4d39186fd..43a460fa39 100644 Binary files a/recipes/icons/interfax_uk.png and b/recipes/icons/interfax_uk.png differ diff --git a/recipes/icons/io9.png b/recipes/icons/io9.png index ccca65cbce..19208caf56 100644 Binary files a/recipes/icons/io9.png and b/recipes/icons/io9.png differ diff --git a/recipes/icons/irish_independent.png b/recipes/icons/irish_independent.png index 54cf59161c..58e00abd74 100644 Binary files a/recipes/icons/irish_independent.png and b/recipes/icons/irish_independent.png differ diff --git a/recipes/icons/irish_times.png b/recipes/icons/irish_times.png index 3f9153bf74..72eaba90ff 100644 Binary files a/recipes/icons/irish_times.png and b/recipes/icons/irish_times.png differ diff --git a/recipes/icons/isar-loisachbote.png b/recipes/icons/isar-loisachbote.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/isar-loisachbote.png and b/recipes/icons/isar-loisachbote.png differ diff --git a/recipes/icons/istorias.png b/recipes/icons/istorias.png index 82ebd1d6e5..0bf35ead40 100644 Binary files a/recipes/icons/istorias.png and b/recipes/icons/istorias.png differ diff --git a/recipes/icons/istories.png b/recipes/icons/istories.png index 8d9e65e6c8..cd8b923b70 100644 Binary files a/recipes/icons/istories.png and b/recipes/icons/istories.png differ diff --git a/recipes/icons/ivanamilakovic.png b/recipes/icons/ivanamilakovic.png index 4a3a02de9f..293ea03423 100644 Binary files a/recipes/icons/ivanamilakovic.png and b/recipes/icons/ivanamilakovic.png differ diff --git a/recipes/icons/ixbt.png b/recipes/icons/ixbt.png index 73e09a6185..a80e06510e 100644 Binary files a/recipes/icons/ixbt.png and b/recipes/icons/ixbt.png differ diff --git a/recipes/icons/jacobinmag.png b/recipes/icons/jacobinmag.png index 1ec92ecb4b..0a301bbde2 100644 Binary files a/recipes/icons/jacobinmag.png and b/recipes/icons/jacobinmag.png differ diff --git a/recipes/icons/jagran_josh.png b/recipes/icons/jagran_josh.png index 3e6ba0e542..bf07ef70c3 100644 Binary files a/recipes/icons/jagran_josh.png and b/recipes/icons/jagran_josh.png differ diff --git a/recipes/icons/japaa.png b/recipes/icons/japaa.png index bc618a2c4c..cb52280935 100644 Binary files a/recipes/icons/japaa.png and b/recipes/icons/japaa.png differ diff --git a/recipes/icons/japan_news.png b/recipes/icons/japan_news.png index 45bd84128c..b12e697be0 100644 Binary files a/recipes/icons/japan_news.png and b/recipes/icons/japan_news.png differ diff --git a/recipes/icons/javalobby.png b/recipes/icons/javalobby.png index bb07a0de28..952d026fa7 100644 Binary files a/recipes/icons/javalobby.png and b/recipes/icons/javalobby.png differ diff --git a/recipes/icons/jbpress.png b/recipes/icons/jbpress.png index 56eecc17e7..77e35a3fc3 100644 Binary files a/recipes/icons/jbpress.png and b/recipes/icons/jbpress.png differ diff --git a/recipes/icons/jeuxvideo.png b/recipes/icons/jeuxvideo.png index 6aaca998ba..8e16d1509c 100644 Binary files a/recipes/icons/jeuxvideo.png and b/recipes/icons/jeuxvideo.png differ diff --git a/recipes/icons/johm.png b/recipes/icons/johm.png index e4f399116b..930025b91a 100644 Binary files a/recipes/icons/johm.png and b/recipes/icons/johm.png differ diff --git a/recipes/icons/joongang.png b/recipes/icons/joongang.png index ad8596b686..ddfbbc94a2 100644 Binary files a/recipes/icons/joongang.png and b/recipes/icons/joongang.png differ diff --git a/recipes/icons/journalofaccountancy.png b/recipes/icons/journalofaccountancy.png index ddc65c8c5e..27c245eba2 100644 Binary files a/recipes/icons/journalofaccountancy.png and b/recipes/icons/journalofaccountancy.png differ diff --git a/recipes/icons/jp_dk.png b/recipes/icons/jp_dk.png index b07346f4d8..65b87c65fb 100644 Binary files a/recipes/icons/jp_dk.png and b/recipes/icons/jp_dk.png differ diff --git a/recipes/icons/juve_la_stampa.png b/recipes/icons/juve_la_stampa.png index e4fcb8eae8..bea808f77a 100644 Binary files a/recipes/icons/juve_la_stampa.png and b/recipes/icons/juve_la_stampa.png differ diff --git a/recipes/icons/juventudrebelde.png b/recipes/icons/juventudrebelde.png index 0b2de7f97a..2e08cd9f64 100644 Binary files a/recipes/icons/juventudrebelde.png and b/recipes/icons/juventudrebelde.png differ diff --git a/recipes/icons/jv_dk.png b/recipes/icons/jv_dk.png index 24c532aab8..b9fb2bc9b8 100644 Binary files a/recipes/icons/jv_dk.png and b/recipes/icons/jv_dk.png differ diff --git a/recipes/icons/kahokushinpo.png b/recipes/icons/kahokushinpo.png index 15fb5d7577..60049a41dc 100644 Binary files a/recipes/icons/kahokushinpo.png and b/recipes/icons/kahokushinpo.png differ diff --git a/recipes/icons/kaloeviglokalavisen_dk.png b/recipes/icons/kaloeviglokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/kaloeviglokalavisen_dk.png and b/recipes/icons/kaloeviglokalavisen_dk.png differ diff --git a/recipes/icons/kamikaze.png b/recipes/icons/kamikaze.png index d981c1b46c..d176f48469 100644 Binary files a/recipes/icons/kamikaze.png and b/recipes/icons/kamikaze.png differ diff --git a/recipes/icons/karlsruhe.png b/recipes/icons/karlsruhe.png index 028271ba1a..a82fb05b00 100644 Binary files a/recipes/icons/karlsruhe.png and b/recipes/icons/karlsruhe.png differ diff --git a/recipes/icons/karsi_gazete.png b/recipes/icons/karsi_gazete.png index 87bf7dfa42..bd44458ff7 100644 Binary files a/recipes/icons/karsi_gazete.png and b/recipes/icons/karsi_gazete.png differ diff --git a/recipes/icons/kellog_insight.png b/recipes/icons/kellog_insight.png index 3b162084c9..df0d0e5d0d 100644 Binary files a/recipes/icons/kellog_insight.png and b/recipes/icons/kellog_insight.png differ diff --git a/recipes/icons/kerrang.png b/recipes/icons/kerrang.png index 2fff619e1e..bc9f019b3a 100644 Binary files a/recipes/icons/kerrang.png and b/recipes/icons/kerrang.png differ diff --git a/recipes/icons/kgsenghavebladet_dk.png b/recipes/icons/kgsenghavebladet_dk.png index 05f107580a..6fc915ca3d 100644 Binary files a/recipes/icons/kgsenghavebladet_dk.png and b/recipes/icons/kgsenghavebladet_dk.png differ diff --git a/recipes/icons/kirkusreviews.png b/recipes/icons/kirkusreviews.png new file mode 100644 index 0000000000..abca544dbd Binary files /dev/null and b/recipes/icons/kirkusreviews.png differ diff --git a/recipes/icons/kitekinto.png b/recipes/icons/kitekinto.png index 9768dc9fbf..a95ea9a2ae 100644 Binary files a/recipes/icons/kitekinto.png and b/recipes/icons/kitekinto.png differ diff --git a/recipes/icons/kitsapun.png b/recipes/icons/kitsapun.png index d2be8741ea..9423c9e9b0 100644 Binary files a/recipes/icons/kitsapun.png and b/recipes/icons/kitsapun.png differ diff --git a/recipes/icons/kleinezeitung.png b/recipes/icons/kleinezeitung.png index 57c02bd4f7..6bf926744b 100644 Binary files a/recipes/icons/kleinezeitung.png and b/recipes/icons/kleinezeitung.png differ diff --git a/recipes/icons/km_blog.png b/recipes/icons/km_blog.png index dcdfb07f46..27d8499689 100644 Binary files a/recipes/icons/km_blog.png and b/recipes/icons/km_blog.png differ diff --git a/recipes/icons/knack_be.png b/recipes/icons/knack_be.png index d6ffcd43dd..24c13ba039 100644 Binary files a/recipes/icons/knack_be.png and b/recipes/icons/knack_be.png differ diff --git a/recipes/icons/knife_media.png b/recipes/icons/knife_media.png index 8efc7d131a..220754c555 100644 Binary files a/recipes/icons/knife_media.png and b/recipes/icons/knife_media.png differ diff --git a/recipes/icons/koegelokalavisen_dk.png b/recipes/icons/koegelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/koegelokalavisen_dk.png and b/recipes/icons/koegelokalavisen_dk.png differ diff --git a/recipes/icons/koldinglokalavisen_dk.png b/recipes/icons/koldinglokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/koldinglokalavisen_dk.png and b/recipes/icons/koldinglokalavisen_dk.png differ diff --git a/recipes/icons/komchadluek.png b/recipes/icons/komchadluek.png index 7ff27eaf31..a0dddc7589 100644 Binary files a/recipes/icons/komchadluek.png and b/recipes/icons/komchadluek.png differ diff --git a/recipes/icons/kommersant.png b/recipes/icons/kommersant.png index 316954a88f..8b05c203ad 100644 Binary files a/recipes/icons/kommersant.png and b/recipes/icons/kommersant.png differ diff --git a/recipes/icons/kompiutierra.png b/recipes/icons/kompiutierra.png index 51acaca8de..41e4356b34 100644 Binary files a/recipes/icons/kompiutierra.png and b/recipes/icons/kompiutierra.png differ diff --git a/recipes/icons/krebs_on_security.png b/recipes/icons/krebs_on_security.png index 578dc01473..8ce42285b2 100644 Binary files a/recipes/icons/krebs_on_security.png and b/recipes/icons/krebs_on_security.png differ diff --git a/recipes/icons/kresy_pl.png b/recipes/icons/kresy_pl.png index d12192ba7b..0a3f26f811 100644 Binary files a/recipes/icons/kresy_pl.png and b/recipes/icons/kresy_pl.png differ diff --git a/recipes/icons/kristeligt_dagblad_dk.png b/recipes/icons/kristeligt_dagblad_dk.png index 14c16b3291..ef138b9caf 100644 Binary files a/recipes/icons/kristeligt_dagblad_dk.png and b/recipes/icons/kristeligt_dagblad_dk.png differ diff --git a/recipes/icons/krstarica_en.png b/recipes/icons/krstarica_en.png index 2ece457161..93cc7056c4 100644 Binary files a/recipes/icons/krstarica_en.png and b/recipes/icons/krstarica_en.png differ diff --git a/recipes/icons/kudy_z_nudy.png b/recipes/icons/kudy_z_nudy.png index 3a21cfc7e3..e00b761ae2 100644 Binary files a/recipes/icons/kudy_z_nudy.png and b/recipes/icons/kudy_z_nudy.png differ diff --git a/recipes/icons/kukuburi.png b/recipes/icons/kukuburi.png index c73cb09d83..64ded80d47 100644 Binary files a/recipes/icons/kukuburi.png and b/recipes/icons/kukuburi.png differ diff --git a/recipes/icons/kurier_lubelski.png b/recipes/icons/kurier_lubelski.png index c81dc6a733..4b3d76de2f 100644 Binary files a/recipes/icons/kurier_lubelski.png and b/recipes/icons/kurier_lubelski.png differ diff --git a/recipes/icons/kurier_poranny.png b/recipes/icons/kurier_poranny.png index 1bc6b9aaed..0ed02efdd8 100644 Binary files a/recipes/icons/kurier_poranny.png and b/recipes/icons/kurier_poranny.png differ diff --git a/recipes/icons/kurier_szczecinski.png b/recipes/icons/kurier_szczecinski.png index eeecefadfe..2c8a3684dd 100644 Binary files a/recipes/icons/kurier_szczecinski.png and b/recipes/icons/kurier_szczecinski.png differ diff --git a/recipes/icons/kyivpost_ar.png b/recipes/icons/kyivpost_ar.png index da4cd27519..67a06f3559 100644 Binary files a/recipes/icons/kyivpost_ar.png and b/recipes/icons/kyivpost_ar.png differ diff --git a/recipes/icons/kyivpost_en.png b/recipes/icons/kyivpost_en.png index da4cd27519..67a06f3559 100644 Binary files a/recipes/icons/kyivpost_en.png and b/recipes/icons/kyivpost_en.png differ diff --git a/recipes/icons/kyivpost_ua.png b/recipes/icons/kyivpost_ua.png index da4cd27519..67a06f3559 100644 Binary files a/recipes/icons/kyivpost_ua.png and b/recipes/icons/kyivpost_ua.png differ diff --git a/recipes/icons/l_espresso.png b/recipes/icons/l_espresso.png new file mode 100644 index 0000000000..af146383b3 Binary files /dev/null and b/recipes/icons/l_espresso.png differ diff --git a/recipes/icons/la_cuarta.png b/recipes/icons/la_cuarta.png index 751b944756..e13b932354 100644 Binary files a/recipes/icons/la_cuarta.png and b/recipes/icons/la_cuarta.png differ diff --git a/recipes/icons/la_gazetta_del_mezzogiorno.png b/recipes/icons/la_gazetta_del_mezzogiorno.png new file mode 100644 index 0000000000..42b17a5a29 Binary files /dev/null and b/recipes/icons/la_gazetta_del_mezzogiorno.png differ diff --git a/recipes/icons/la_jornada.png b/recipes/icons/la_jornada.png index 0e8ede8034..54ca955714 100644 Binary files a/recipes/icons/la_jornada.png and b/recipes/icons/la_jornada.png differ diff --git a/recipes/icons/la_nacion_cr.png b/recipes/icons/la_nacion_cr.png index 181481f5b4..7eff7a30b3 100644 Binary files a/recipes/icons/la_nacion_cr.png and b/recipes/icons/la_nacion_cr.png differ diff --git a/recipes/icons/la_nueva.png b/recipes/icons/la_nueva.png index 3e7e9cbeb0..20b0c797c2 100644 Binary files a/recipes/icons/la_nueva.png and b/recipes/icons/la_nueva.png differ diff --git a/recipes/icons/la_rioja.png b/recipes/icons/la_rioja.png index ea6d29f607..0322d1c0d1 100644 Binary files a/recipes/icons/la_rioja.png and b/recipes/icons/la_rioja.png differ diff --git a/recipes/icons/la_segunda.png b/recipes/icons/la_segunda.png new file mode 100644 index 0000000000..81a50e8b7f Binary files /dev/null and b/recipes/icons/la_segunda.png differ diff --git a/recipes/icons/la_tercera.png b/recipes/icons/la_tercera.png index fb70af7790..5b42fdc19b 100644 Binary files a/recipes/icons/la_tercera.png and b/recipes/icons/la_tercera.png differ diff --git a/recipes/icons/la_tribuna.png b/recipes/icons/la_tribuna.png new file mode 100644 index 0000000000..ccb85fe1b7 Binary files /dev/null and b/recipes/icons/la_tribuna.png differ diff --git a/recipes/icons/la_voce.png b/recipes/icons/la_voce.png new file mode 100644 index 0000000000..bbbef124ed Binary files /dev/null and b/recipes/icons/la_voce.png differ diff --git a/recipes/icons/lalibre_be.png b/recipes/icons/lalibre_be.png index ec673d6044..2974240184 100644 Binary files a/recipes/icons/lalibre_be.png and b/recipes/icons/lalibre_be.png differ diff --git a/recipes/icons/lamebook.png b/recipes/icons/lamebook.png index 6aefbb6d37..2fc0befa2f 100644 Binary files a/recipes/icons/lamebook.png and b/recipes/icons/lamebook.png differ diff --git a/recipes/icons/lameuse_be.png b/recipes/icons/lameuse_be.png new file mode 100644 index 0000000000..2d59e15429 Binary files /dev/null and b/recipes/icons/lameuse_be.png differ diff --git a/recipes/icons/lanacion.png b/recipes/icons/lanacion.png index 56614c20e8..2747f8c6fd 100644 Binary files a/recipes/icons/lanacion.png and b/recipes/icons/lanacion.png differ diff --git a/recipes/icons/lanacion_chile.png b/recipes/icons/lanacion_chile.png index da40cc7e51..d2dcb63d03 100644 Binary files a/recipes/icons/lanacion_chile.png and b/recipes/icons/lanacion_chile.png differ diff --git a/recipes/icons/laprensa.png b/recipes/icons/laprensa.png index 64d8d907ed..3eb569296e 100644 Binary files a/recipes/icons/laprensa.png and b/recipes/icons/laprensa.png differ diff --git a/recipes/icons/las_vegas_review.png b/recipes/icons/las_vegas_review.png index 26204116b2..5c0e47c6b6 100644 Binary files a/recipes/icons/las_vegas_review.png and b/recipes/icons/las_vegas_review.png differ diff --git a/recipes/icons/lavanguardia.png b/recipes/icons/lavanguardia.png new file mode 100644 index 0000000000..d5b81909cb Binary files /dev/null and b/recipes/icons/lavanguardia.png differ diff --git a/recipes/icons/lavanguardia_corresponsales_es.png b/recipes/icons/lavanguardia_corresponsales_es.png new file mode 100644 index 0000000000..d5b81909cb Binary files /dev/null and b/recipes/icons/lavanguardia_corresponsales_es.png differ diff --git a/recipes/icons/lavenir_be.png b/recipes/icons/lavenir_be.png index 9022f5895b..6c24d5c1ec 100644 Binary files a/recipes/icons/lavenir_be.png and b/recipes/icons/lavenir_be.png differ diff --git a/recipes/icons/le_gorafi.png b/recipes/icons/le_gorafi.png new file mode 100644 index 0000000000..25f1fe1ca3 Binary files /dev/null and b/recipes/icons/le_gorafi.png differ diff --git a/recipes/icons/le_journal.png b/recipes/icons/le_journal.png new file mode 100644 index 0000000000..18021722f7 Binary files /dev/null and b/recipes/icons/le_journal.png differ diff --git a/recipes/icons/le_monde_diplomatique_fr.png b/recipes/icons/le_monde_diplomatique_fr.png index c63a5b51a3..27447ee2e8 100644 Binary files a/recipes/icons/le_monde_diplomatique_fr.png and b/recipes/icons/le_monde_diplomatique_fr.png differ diff --git a/recipes/icons/le_monde_en.png b/recipes/icons/le_monde_en.png index 79cc40e63d..b2f396d60d 100644 Binary files a/recipes/icons/le_monde_en.png and b/recipes/icons/le_monde_en.png differ diff --git a/recipes/icons/le_monde_sub.png b/recipes/icons/le_monde_sub.png index 097386a2ac..8459790916 100644 Binary files a/recipes/icons/le_monde_sub.png and b/recipes/icons/le_monde_sub.png differ diff --git a/recipes/icons/le_peuple_breton.png b/recipes/icons/le_peuple_breton.png new file mode 100644 index 0000000000..c755462316 Binary files /dev/null and b/recipes/icons/le_peuple_breton.png differ diff --git a/recipes/icons/le_temps.png b/recipes/icons/le_temps.png index 523130c91a..061e9ed748 100644 Binary files a/recipes/icons/le_temps.png and b/recipes/icons/le_temps.png differ diff --git a/recipes/icons/leduc.png b/recipes/icons/leduc.png new file mode 100644 index 0000000000..5a40c3a78e Binary files /dev/null and b/recipes/icons/leduc.png differ diff --git a/recipes/icons/lega_nerd.png b/recipes/icons/lega_nerd.png index a64cc5fc6f..6c9775c8a2 100644 Binary files a/recipes/icons/lega_nerd.png and b/recipes/icons/lega_nerd.png differ diff --git a/recipes/icons/legeartis.png b/recipes/icons/legeartis.png index 8813057265..be0f565a05 100644 Binary files a/recipes/icons/legeartis.png and b/recipes/icons/legeartis.png differ diff --git a/recipes/icons/legitymizm.png b/recipes/icons/legitymizm.png index d101b5d9d4..988febc408 100644 Binary files a/recipes/icons/legitymizm.png and b/recipes/icons/legitymizm.png differ diff --git a/recipes/icons/leipzer_volkszeitung.png b/recipes/icons/leipzer_volkszeitung.png new file mode 100644 index 0000000000..560330f937 Binary files /dev/null and b/recipes/icons/leipzer_volkszeitung.png differ diff --git a/recipes/icons/lemonde_dip.png b/recipes/icons/lemonde_dip.png index 393f5f9360..4065891d82 100644 Binary files a/recipes/icons/lemonde_dip.png and b/recipes/icons/lemonde_dip.png differ diff --git a/recipes/icons/lenta_ru.png b/recipes/icons/lenta_ru.png new file mode 100644 index 0000000000..2e25b9ac13 Binary files /dev/null and b/recipes/icons/lenta_ru.png differ diff --git a/recipes/icons/lepoint.png b/recipes/icons/lepoint.png index a33e3c35ca..f9c1ca3450 100644 Binary files a/recipes/icons/lepoint.png and b/recipes/icons/lepoint.png differ diff --git a/recipes/icons/les_echos.png b/recipes/icons/les_echos.png new file mode 100644 index 0000000000..0671dd12e4 Binary files /dev/null and b/recipes/icons/les_echos.png differ diff --git a/recipes/icons/lescienze.png b/recipes/icons/lescienze.png new file mode 100644 index 0000000000..a5cfa6f694 Binary files /dev/null and b/recipes/icons/lescienze.png differ diff --git a/recipes/icons/lesoir_be.png b/recipes/icons/lesoir_be.png new file mode 100644 index 0000000000..9a0aaa786a Binary files /dev/null and b/recipes/icons/lesoir_be.png differ diff --git a/recipes/icons/levante.png b/recipes/icons/levante.png new file mode 100644 index 0000000000..37451323d8 Binary files /dev/null and b/recipes/icons/levante.png differ diff --git a/recipes/icons/lexpress.png b/recipes/icons/lexpress.png index 1591478ca9..cb15f2cd9c 100644 Binary files a/recipes/icons/lexpress.png and b/recipes/icons/lexpress.png differ diff --git a/recipes/icons/liberatorio_politico.png b/recipes/icons/liberatorio_politico.png new file mode 100644 index 0000000000..bb0ca7e650 Binary files /dev/null and b/recipes/icons/liberatorio_politico.png differ diff --git a/recipes/icons/liberty_times.png b/recipes/icons/liberty_times.png index 44fa23a685..73b2379555 100644 Binary files a/recipes/icons/liberty_times.png and b/recipes/icons/liberty_times.png differ diff --git a/recipes/icons/lidovky.png b/recipes/icons/lidovky.png index 856aa67a86..52db59223d 100644 Binary files a/recipes/icons/lidovky.png and b/recipes/icons/lidovky.png differ diff --git a/recipes/icons/liganet_ru.png b/recipes/icons/liganet_ru.png index cd15080f02..14c1be8dc9 100644 Binary files a/recipes/icons/liganet_ru.png and b/recipes/icons/liganet_ru.png differ diff --git a/recipes/icons/liganet_ua.png b/recipes/icons/liganet_ua.png index cd15080f02..14c1be8dc9 100644 Binary files a/recipes/icons/liganet_ua.png and b/recipes/icons/liganet_ua.png differ diff --git a/recipes/icons/lightspeed_magazine.png b/recipes/icons/lightspeed_magazine.png new file mode 100644 index 0000000000..9c6b9c9cdc Binary files /dev/null and b/recipes/icons/lightspeed_magazine.png differ diff --git a/recipes/icons/limba_sarda.png b/recipes/icons/limba_sarda.png new file mode 100644 index 0000000000..b079189a2f Binary files /dev/null and b/recipes/icons/limba_sarda.png differ diff --git a/recipes/icons/linux_news_de.png b/recipes/icons/linux_news_de.png new file mode 100644 index 0000000000..f003488195 Binary files /dev/null and b/recipes/icons/linux_news_de.png differ diff --git a/recipes/icons/live_law.png b/recipes/icons/live_law.png new file mode 100644 index 0000000000..9db02af90b Binary files /dev/null and b/recipes/icons/live_law.png differ diff --git a/recipes/icons/livemint.png b/recipes/icons/livemint.png index 68f745e747..01e5e9b664 100644 Binary files a/recipes/icons/livemint.png and b/recipes/icons/livemint.png differ diff --git a/recipes/icons/livescience.png b/recipes/icons/livescience.png new file mode 100644 index 0000000000..6e0d767b69 Binary files /dev/null and b/recipes/icons/livescience.png differ diff --git a/recipes/icons/living_stones.png b/recipes/icons/living_stones.png new file mode 100644 index 0000000000..d9245b0083 Binary files /dev/null and b/recipes/icons/living_stones.png differ diff --git a/recipes/icons/london_free_press.png b/recipes/icons/london_free_press.png index 61f0c60dd4..8924535841 100644 Binary files a/recipes/icons/london_free_press.png and b/recipes/icons/london_free_press.png differ diff --git a/recipes/icons/los_danieles.png b/recipes/icons/los_danieles.png new file mode 100644 index 0000000000..ee65d56e94 Binary files /dev/null and b/recipes/icons/los_danieles.png differ diff --git a/recipes/icons/losservatoreromano_it.png b/recipes/icons/losservatoreromano_it.png new file mode 100644 index 0000000000..088ef6c2d7 Binary files /dev/null and b/recipes/icons/losservatoreromano_it.png differ diff --git a/recipes/icons/ludwig_mises.png b/recipes/icons/ludwig_mises.png new file mode 100644 index 0000000000..d342fd390a Binary files /dev/null and b/recipes/icons/ludwig_mises.png differ diff --git a/recipes/icons/luns_a_venres.png b/recipes/icons/luns_a_venres.png new file mode 100644 index 0000000000..6cf87544c0 Binary files /dev/null and b/recipes/icons/luns_a_venres.png differ diff --git a/recipes/icons/lupa.png b/recipes/icons/lupa.png index e63de5e451..d3e6215fec 100644 Binary files a/recipes/icons/lupa.png and b/recipes/icons/lupa.png differ diff --git a/recipes/icons/lwn.png b/recipes/icons/lwn.png index 431f6cde3c..3ecf8febce 100644 Binary files a/recipes/icons/lwn.png and b/recipes/icons/lwn.png differ diff --git a/recipes/icons/lwn_free.png b/recipes/icons/lwn_free.png new file mode 100644 index 0000000000..9e448fa2cb Binary files /dev/null and b/recipes/icons/lwn_free.png differ diff --git a/recipes/icons/lwn_weekly.png b/recipes/icons/lwn_weekly.png index 431f6cde3c..3ecf8febce 100644 Binary files a/recipes/icons/lwn_weekly.png and b/recipes/icons/lwn_weekly.png differ diff --git a/recipes/icons/lyngby-taarbaeklokalavisen_dk.png b/recipes/icons/lyngby-taarbaeklokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/lyngby-taarbaeklokalavisen_dk.png and b/recipes/icons/lyngby-taarbaeklokalavisen_dk.png differ diff --git a/recipes/icons/mac_world.png b/recipes/icons/mac_world.png index 4259b5cf21..f13f14f90f 100644 Binary files a/recipes/icons/mac_world.png and b/recipes/icons/mac_world.png differ diff --git a/recipes/icons/mac_world_uk.png b/recipes/icons/mac_world_uk.png index 6aca234d08..5196c14635 100644 Binary files a/recipes/icons/mac_world_uk.png and b/recipes/icons/mac_world_uk.png differ diff --git a/recipes/icons/macity.png b/recipes/icons/macity.png new file mode 100644 index 0000000000..7bf511e7f2 Binary files /dev/null and b/recipes/icons/macity.png differ diff --git a/recipes/icons/macleans.png b/recipes/icons/macleans.png index ff34f86756..8bf66c1b44 100644 Binary files a/recipes/icons/macleans.png and b/recipes/icons/macleans.png differ diff --git a/recipes/icons/maekyung.png b/recipes/icons/maekyung.png index 0f26245b07..67a7abc43e 100644 Binary files a/recipes/icons/maekyung.png and b/recipes/icons/maekyung.png differ diff --git a/recipes/icons/magyar_nemzet.png b/recipes/icons/magyar_nemzet.png new file mode 100644 index 0000000000..cf90ecfa86 Binary files /dev/null and b/recipes/icons/magyar_nemzet.png differ diff --git a/recipes/icons/maharashtra_times.png b/recipes/icons/maharashtra_times.png new file mode 100644 index 0000000000..44e2556941 Binary files /dev/null and b/recipes/icons/maharashtra_times.png differ diff --git a/recipes/icons/mail_and_guardian.png b/recipes/icons/mail_and_guardian.png index 17345514d9..a92e84ec49 100644 Binary files a/recipes/icons/mail_and_guardian.png and b/recipes/icons/mail_and_guardian.png differ diff --git a/recipes/icons/mainichi_en.png b/recipes/icons/mainichi_en.png new file mode 100644 index 0000000000..4d486799c5 Binary files /dev/null and b/recipes/icons/mainichi_en.png differ diff --git a/recipes/icons/malaya_business_insight.png b/recipes/icons/malaya_business_insight.png new file mode 100644 index 0000000000..a0acdcbf91 Binary files /dev/null and b/recipes/icons/malaya_business_insight.png differ diff --git a/recipes/icons/mallorca_zeitung.png b/recipes/icons/mallorca_zeitung.png new file mode 100644 index 0000000000..1baecf8fb7 Binary files /dev/null and b/recipes/icons/mallorca_zeitung.png differ diff --git a/recipes/icons/mandidner.png b/recipes/icons/mandidner.png new file mode 100644 index 0000000000..edfd965347 Binary files /dev/null and b/recipes/icons/mandidner.png differ diff --git a/recipes/icons/marctv.png b/recipes/icons/marctv.png index 6a5589a900..5f8e528d4b 100644 Binary files a/recipes/icons/marctv.png and b/recipes/icons/marctv.png differ diff --git a/recipes/icons/marine_corps_times.png b/recipes/icons/marine_corps_times.png new file mode 100644 index 0000000000..922c2a61f3 Binary files /dev/null and b/recipes/icons/marine_corps_times.png differ diff --git a/recipes/icons/marketing_magazine.png b/recipes/icons/marketing_magazine.png new file mode 100644 index 0000000000..ba0cca6047 Binary files /dev/null and b/recipes/icons/marketing_magazine.png differ diff --git a/recipes/icons/marketing_sensoriale.png b/recipes/icons/marketing_sensoriale.png index 7aabb0b358..b338881fb5 100644 Binary files a/recipes/icons/marketing_sensoriale.png and b/recipes/icons/marketing_sensoriale.png differ diff --git a/recipes/icons/mateusz_czytania.png b/recipes/icons/mateusz_czytania.png index cec6793f0d..ca9c8e8889 100644 Binary files a/recipes/icons/mateusz_czytania.png and b/recipes/icons/mateusz_czytania.png differ diff --git a/recipes/icons/matichon.png b/recipes/icons/matichon.png new file mode 100644 index 0000000000..b7bee6ed4f Binary files /dev/null and b/recipes/icons/matichon.png differ diff --git a/recipes/icons/max_planck.png b/recipes/icons/max_planck.png index 21f5244641..a747b0a231 100644 Binary files a/recipes/icons/max_planck.png and b/recipes/icons/max_planck.png differ diff --git a/recipes/icons/mayra.png b/recipes/icons/mayra.png index 56f7e22ee9..055a8a5c7b 100644 Binary files a/recipes/icons/mayra.png and b/recipes/icons/mayra.png differ diff --git a/recipes/icons/mdj.png b/recipes/icons/mdj.png new file mode 100644 index 0000000000..b0add5e63c Binary files /dev/null and b/recipes/icons/mdj.png differ diff --git a/recipes/icons/mediaindonesia.png b/recipes/icons/mediaindonesia.png new file mode 100644 index 0000000000..b89b49f743 Binary files /dev/null and b/recipes/icons/mediaindonesia.png differ diff --git a/recipes/icons/mediterraneo.png b/recipes/icons/mediterraneo.png new file mode 100644 index 0000000000..7ad854d54f Binary files /dev/null and b/recipes/icons/mediterraneo.png differ diff --git a/recipes/icons/medscape.png b/recipes/icons/medscape.png index 7a576ede64..be63f573f3 100644 Binary files a/recipes/icons/medscape.png and b/recipes/icons/medscape.png differ diff --git a/recipes/icons/meduza.png b/recipes/icons/meduza.png index 19c9cbbe8e..6acaff4010 100644 Binary files a/recipes/icons/meduza.png and b/recipes/icons/meduza.png differ diff --git a/recipes/icons/meduza_ru.png b/recipes/icons/meduza_ru.png index 19c9cbbe8e..6acaff4010 100644 Binary files a/recipes/icons/meduza_ru.png and b/recipes/icons/meduza_ru.png differ diff --git a/recipes/icons/melbourne_herald_sun.png b/recipes/icons/melbourne_herald_sun.png index c8e51bbc71..00f915f1e4 100644 Binary files a/recipes/icons/melbourne_herald_sun.png and b/recipes/icons/melbourne_herald_sun.png differ diff --git a/recipes/icons/mens_day_out.png b/recipes/icons/mens_day_out.png new file mode 100644 index 0000000000..5189b1cc9b Binary files /dev/null and b/recipes/icons/mens_day_out.png differ diff --git a/recipes/icons/merco_press.png b/recipes/icons/merco_press.png index ba320e482f..24bd1184db 100644 Binary files a/recipes/icons/merco_press.png and b/recipes/icons/merco_press.png differ diff --git a/recipes/icons/metro_montreal.png b/recipes/icons/metro_montreal.png index e12f7e8270..d46cc09dd5 100644 Binary files a/recipes/icons/metro_montreal.png and b/recipes/icons/metro_montreal.png differ diff --git a/recipes/icons/metro_uk.png b/recipes/icons/metro_uk.png index 113a058db7..e20609dc86 100644 Binary files a/recipes/icons/metro_uk.png and b/recipes/icons/metro_uk.png differ diff --git a/recipes/icons/miami_herald.png b/recipes/icons/miami_herald.png index 4f59963470..30d937a909 100644 Binary files a/recipes/icons/miami_herald.png and b/recipes/icons/miami_herald.png differ diff --git a/recipes/icons/michalkiewicz.png b/recipes/icons/michalkiewicz.png index c6a60118b4..04f2a58b65 100644 Binary files a/recipes/icons/michalkiewicz.png and b/recipes/icons/michalkiewicz.png differ diff --git a/recipes/icons/miesbacher_merkur.png b/recipes/icons/miesbacher_merkur.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/miesbacher_merkur.png and b/recipes/icons/miesbacher_merkur.png differ diff --git a/recipes/icons/military_times.png b/recipes/icons/military_times.png new file mode 100644 index 0000000000..e1ee9bfc55 Binary files /dev/null and b/recipes/icons/military_times.png differ diff --git a/recipes/icons/mit_technology_review.png b/recipes/icons/mit_technology_review.png new file mode 100644 index 0000000000..9a731e5127 Binary files /dev/null and b/recipes/icons/mit_technology_review.png differ diff --git a/recipes/icons/mmc_rtv.png b/recipes/icons/mmc_rtv.png index e7ec62ab6c..c156e043ad 100644 Binary files a/recipes/icons/mmc_rtv.png and b/recipes/icons/mmc_rtv.png differ diff --git a/recipes/icons/mobilenations.png b/recipes/icons/mobilenations.png new file mode 100644 index 0000000000..be3fc1f1b9 Binary files /dev/null and b/recipes/icons/mobilenations.png differ diff --git a/recipes/icons/modoros.png b/recipes/icons/modoros.png index 87ff569c5a..3b702313c5 100644 Binary files a/recipes/icons/modoros.png and b/recipes/icons/modoros.png differ diff --git a/recipes/icons/moldovaazi.png b/recipes/icons/moldovaazi.png index 629cfc871e..33c8626d84 100644 Binary files a/recipes/icons/moldovaazi.png and b/recipes/icons/moldovaazi.png differ diff --git a/recipes/icons/monbiot.png b/recipes/icons/monbiot.png new file mode 100644 index 0000000000..2d8e1a91a8 Binary files /dev/null and b/recipes/icons/monbiot.png differ diff --git a/recipes/icons/mondedurable.png b/recipes/icons/mondedurable.png new file mode 100644 index 0000000000..a1f907363c Binary files /dev/null and b/recipes/icons/mondedurable.png differ diff --git a/recipes/icons/money_pl.png b/recipes/icons/money_pl.png index 53b204f5fd..4d8b586b4d 100644 Binary files a/recipes/icons/money_pl.png and b/recipes/icons/money_pl.png differ diff --git a/recipes/icons/moneycontrol.png b/recipes/icons/moneycontrol.png new file mode 100644 index 0000000000..776c865e84 Binary files /dev/null and b/recipes/icons/moneycontrol.png differ diff --git a/recipes/icons/moneyro.png b/recipes/icons/moneyro.png index 7dacf26df5..ebcbf2cf17 100644 Binary files a/recipes/icons/moneyro.png and b/recipes/icons/moneyro.png differ diff --git a/recipes/icons/montevideo_com.png b/recipes/icons/montevideo_com.png index 6ff122a02b..d5660c38b1 100644 Binary files a/recipes/icons/montevideo_com.png and b/recipes/icons/montevideo_com.png differ diff --git a/recipes/icons/montreal_gazette.png b/recipes/icons/montreal_gazette.png new file mode 100644 index 0000000000..31df177076 Binary files /dev/null and b/recipes/icons/montreal_gazette.png differ diff --git a/recipes/icons/moscow_times.png b/recipes/icons/moscow_times.png index c98140cf9e..a831531119 100644 Binary files a/recipes/icons/moscow_times.png and b/recipes/icons/moscow_times.png differ diff --git a/recipes/icons/moscowtimes_en.png b/recipes/icons/moscowtimes_en.png index 99ba9209ea..f978f1a03b 100644 Binary files a/recipes/icons/moscowtimes_en.png and b/recipes/icons/moscowtimes_en.png differ diff --git a/recipes/icons/moscowtimes_ru.png b/recipes/icons/moscowtimes_ru.png index 99ba9209ea..f978f1a03b 100644 Binary files a/recipes/icons/moscowtimes_ru.png and b/recipes/icons/moscowtimes_ru.png differ diff --git a/recipes/icons/msnbc.png b/recipes/icons/msnbc.png index 9b3af8c575..bb420787bc 100644 Binary files a/recipes/icons/msnbc.png and b/recipes/icons/msnbc.png differ diff --git a/recipes/icons/msnsankei.png b/recipes/icons/msnsankei.png index 67a6983f6f..fecbab4ee9 100644 Binary files a/recipes/icons/msnsankei.png and b/recipes/icons/msnsankei.png differ diff --git a/recipes/icons/mult_kor.png b/recipes/icons/mult_kor.png index bc241dccef..c79a7a1610 100644 Binary files a/recipes/icons/mult_kor.png and b/recipes/icons/mult_kor.png differ diff --git a/recipes/icons/munchner_merkur_nord.png b/recipes/icons/munchner_merkur_nord.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/munchner_merkur_nord.png and b/recipes/icons/munchner_merkur_nord.png differ diff --git a/recipes/icons/munchner_merkur_stadt.png b/recipes/icons/munchner_merkur_stadt.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/munchner_merkur_stadt.png and b/recipes/icons/munchner_merkur_stadt.png differ diff --git a/recipes/icons/munchner_merkur_sud.png b/recipes/icons/munchner_merkur_sud.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/munchner_merkur_sud.png and b/recipes/icons/munchner_merkur_sud.png differ diff --git a/recipes/icons/munchner_merkur_wurmtal.png b/recipes/icons/munchner_merkur_wurmtal.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/munchner_merkur_wurmtal.png and b/recipes/icons/munchner_merkur_wurmtal.png differ diff --git a/recipes/icons/murnauer_tagblatt.png b/recipes/icons/murnauer_tagblatt.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/murnauer_tagblatt.png and b/recipes/icons/murnauer_tagblatt.png differ diff --git a/recipes/icons/mwjournal.png b/recipes/icons/mwjournal.png new file mode 100644 index 0000000000..bf4a27dfa0 Binary files /dev/null and b/recipes/icons/mwjournal.png differ diff --git a/recipes/icons/my_dealz_de.png b/recipes/icons/my_dealz_de.png new file mode 100644 index 0000000000..c20476bc6c Binary files /dev/null and b/recipes/icons/my_dealz_de.png differ diff --git a/recipes/icons/myapple_pl.png b/recipes/icons/myapple_pl.png index 1b1c83c917..0e4102fc3d 100644 Binary files a/recipes/icons/myapple_pl.png and b/recipes/icons/myapple_pl.png differ diff --git a/recipes/icons/n_kaliningrad.png b/recipes/icons/n_kaliningrad.png index f903b3c4de..ba7bfc42cb 100644 Binary files a/recipes/icons/n_kaliningrad.png and b/recipes/icons/n_kaliningrad.png differ diff --git a/recipes/icons/n_plus_one.png b/recipes/icons/n_plus_one.png index 4146f35f57..eccb947d05 100644 Binary files a/recipes/icons/n_plus_one.png and b/recipes/icons/n_plus_one.png differ diff --git a/recipes/icons/nachdenkseiten.png b/recipes/icons/nachdenkseiten.png new file mode 100644 index 0000000000..4aeb9307ce Binary files /dev/null and b/recipes/icons/nachdenkseiten.png differ diff --git a/recipes/icons/nadacni_fond_proti_korupci.png b/recipes/icons/nadacni_fond_proti_korupci.png index fd4fa87837..09bcdbfc11 100644 Binary files a/recipes/icons/nadacni_fond_proti_korupci.png and b/recipes/icons/nadacni_fond_proti_korupci.png differ diff --git a/recipes/icons/nakedcapitalism.png b/recipes/icons/nakedcapitalism.png index 848123f6d6..528d538b49 100644 Binary files a/recipes/icons/nakedcapitalism.png and b/recipes/icons/nakedcapitalism.png differ diff --git a/recipes/icons/nasa.png b/recipes/icons/nasa.png index 665acf2cfc..f32f55b4ce 100644 Binary files a/recipes/icons/nasa.png and b/recipes/icons/nasa.png differ diff --git a/recipes/icons/natgeohis.png b/recipes/icons/natgeohis.png new file mode 100644 index 0000000000..8a7f1b583d Binary files /dev/null and b/recipes/icons/natgeohis.png differ diff --git a/recipes/icons/natgeomag.png b/recipes/icons/natgeomag.png new file mode 100644 index 0000000000..8a7f1b583d Binary files /dev/null and b/recipes/icons/natgeomag.png differ diff --git a/recipes/icons/nation_ke.png b/recipes/icons/nation_ke.png new file mode 100644 index 0000000000..a69f743f93 Binary files /dev/null and b/recipes/icons/nation_ke.png differ diff --git a/recipes/icons/national_geographic_es.png b/recipes/icons/national_geographic_es.png index eb98978abb..da0244a11c 100644 Binary files a/recipes/icons/national_geographic_es.png and b/recipes/icons/national_geographic_es.png differ diff --git a/recipes/icons/national_post.png b/recipes/icons/national_post.png new file mode 100644 index 0000000000..024dfdda16 Binary files /dev/null and b/recipes/icons/national_post.png differ diff --git a/recipes/icons/nature.png b/recipes/icons/nature.png new file mode 100644 index 0000000000..0ce0e3d71c Binary files /dev/null and b/recipes/icons/nature.png differ diff --git a/recipes/icons/nautilus.png b/recipes/icons/nautilus.png new file mode 100644 index 0000000000..6cfc351d79 Binary files /dev/null and b/recipes/icons/nautilus.png differ diff --git a/recipes/icons/navy_times.png b/recipes/icons/navy_times.png new file mode 100644 index 0000000000..12f9665785 Binary files /dev/null and b/recipes/icons/navy_times.png differ diff --git a/recipes/icons/nbonline.png b/recipes/icons/nbonline.png new file mode 100644 index 0000000000..2e7c920674 Binary files /dev/null and b/recipes/icons/nbonline.png differ diff --git a/recipes/icons/ncrnext.png b/recipes/icons/ncrnext.png new file mode 100644 index 0000000000..25b156a69a Binary files /dev/null and b/recipes/icons/ncrnext.png differ diff --git a/recipes/icons/nejm.png b/recipes/icons/nejm.png new file mode 100644 index 0000000000..f7f445741b Binary files /dev/null and b/recipes/icons/nejm.png differ diff --git a/recipes/icons/neowin.png b/recipes/icons/neowin.png index c7057ba9e3..b2f7c994f0 100644 Binary files a/recipes/icons/neowin.png and b/recipes/icons/neowin.png differ diff --git a/recipes/icons/nepszabadsag.png b/recipes/icons/nepszabadsag.png new file mode 100644 index 0000000000..6450d5205f Binary files /dev/null and b/recipes/icons/nepszabadsag.png differ diff --git a/recipes/icons/netzpolitik.png b/recipes/icons/netzpolitik.png index db469f309f..0be0ced70e 100644 Binary files a/recipes/icons/netzpolitik.png and b/recipes/icons/netzpolitik.png differ diff --git a/recipes/icons/new_london_day.png b/recipes/icons/new_london_day.png new file mode 100644 index 0000000000..4e1a60a179 Binary files /dev/null and b/recipes/icons/new_london_day.png differ diff --git a/recipes/icons/new_scientist.png b/recipes/icons/new_scientist.png index 65507ebe32..20a4961beb 100644 Binary files a/recipes/icons/new_scientist.png and b/recipes/icons/new_scientist.png differ diff --git a/recipes/icons/new_scientist_mag.png b/recipes/icons/new_scientist_mag.png new file mode 100644 index 0000000000..16523e4981 Binary files /dev/null and b/recipes/icons/new_scientist_mag.png differ diff --git a/recipes/icons/new_statesman.png b/recipes/icons/new_statesman.png index ca51900ee5..f186479c35 100644 Binary files a/recipes/icons/new_statesman.png and b/recipes/icons/new_statesman.png differ diff --git a/recipes/icons/new_york_review_of_books.png b/recipes/icons/new_york_review_of_books.png new file mode 100644 index 0000000000..6bf8d9ee63 Binary files /dev/null and b/recipes/icons/new_york_review_of_books.png differ diff --git a/recipes/icons/new_york_review_of_books_no_sub.png b/recipes/icons/new_york_review_of_books_no_sub.png new file mode 100644 index 0000000000..6bf8d9ee63 Binary files /dev/null and b/recipes/icons/new_york_review_of_books_no_sub.png differ diff --git a/recipes/icons/newrepublicmag.png b/recipes/icons/newrepublicmag.png new file mode 100644 index 0000000000..dc756592a1 Binary files /dev/null and b/recipes/icons/newrepublicmag.png differ diff --git a/recipes/icons/news24.png b/recipes/icons/news24.png index 68bce32d2f..45413c372d 100644 Binary files a/recipes/icons/news24.png and b/recipes/icons/news24.png differ diff --git a/recipes/icons/news324.png b/recipes/icons/news324.png new file mode 100644 index 0000000000..68f6e39456 Binary files /dev/null and b/recipes/icons/news324.png differ diff --git a/recipes/icons/news_busters.png b/recipes/icons/news_busters.png new file mode 100644 index 0000000000..98dee2a0d5 Binary files /dev/null and b/recipes/icons/news_busters.png differ diff --git a/recipes/icons/news_times.png b/recipes/icons/news_times.png index 90f498fb0b..65cd219f1e 100644 Binary files a/recipes/icons/news_times.png and b/recipes/icons/news_times.png differ diff --git a/recipes/icons/newsbeast.png b/recipes/icons/newsbeast.png new file mode 100644 index 0000000000..c1ed81a179 Binary files /dev/null and b/recipes/icons/newsbeast.png differ diff --git a/recipes/icons/newslaundry.png b/recipes/icons/newslaundry.png new file mode 100644 index 0000000000..342e059d0d Binary files /dev/null and b/recipes/icons/newslaundry.png differ diff --git a/recipes/icons/newsminute.png b/recipes/icons/newsminute.png new file mode 100644 index 0000000000..5544ac433e Binary files /dev/null and b/recipes/icons/newsminute.png differ diff --git a/recipes/icons/newsobs.png b/recipes/icons/newsobs.png new file mode 100644 index 0000000000..cdbc0a1f82 Binary files /dev/null and b/recipes/icons/newsobs.png differ diff --git a/recipes/icons/newtab.png b/recipes/icons/newtab.png index 42f269097a..a563ffeb1c 100644 Binary files a/recipes/icons/newtab.png and b/recipes/icons/newtab.png differ diff --git a/recipes/icons/newtimes.png b/recipes/icons/newtimes.png index 3c42a77121..319b48f655 100644 Binary files a/recipes/icons/newtimes.png and b/recipes/icons/newtimes.png differ diff --git a/recipes/icons/newz_dk.png b/recipes/icons/newz_dk.png new file mode 100644 index 0000000000..d38ca16e23 Binary files /dev/null and b/recipes/icons/newz_dk.png differ diff --git a/recipes/icons/nightflier.png b/recipes/icons/nightflier.png new file mode 100644 index 0000000000..04c29c68dd Binary files /dev/null and b/recipes/icons/nightflier.png differ diff --git a/recipes/icons/nikkei_free.png b/recipes/icons/nikkei_free.png index 5d5fcd75dd..c2420cc3af 100644 Binary files a/recipes/icons/nikkei_free.png and b/recipes/icons/nikkei_free.png differ diff --git a/recipes/icons/nikkei_news.png b/recipes/icons/nikkei_news.png new file mode 100644 index 0000000000..d92c878ce7 Binary files /dev/null and b/recipes/icons/nikkei_news.png differ diff --git a/recipes/icons/nikkei_sub.png b/recipes/icons/nikkei_sub.png index 5d5fcd75dd..c2420cc3af 100644 Binary files a/recipes/icons/nikkei_sub.png and b/recipes/icons/nikkei_sub.png differ diff --git a/recipes/icons/nikkeiasia.png b/recipes/icons/nikkeiasia.png new file mode 100644 index 0000000000..966d9fd485 Binary files /dev/null and b/recipes/icons/nikkeiasia.png differ diff --git a/recipes/icons/njp.png b/recipes/icons/njp.png index 69164b0dee..4948d9ce09 100644 Binary files a/recipes/icons/njp.png and b/recipes/icons/njp.png differ diff --git a/recipes/icons/noerrebronordvestbladet_dk.png b/recipes/icons/noerrebronordvestbladet_dk.png new file mode 100644 index 0000000000..6fc915ca3d Binary files /dev/null and b/recipes/icons/noerrebronordvestbladet_dk.png differ diff --git a/recipes/icons/nol.png b/recipes/icons/nol.png new file mode 100644 index 0000000000..6450d5205f Binary files /dev/null and b/recipes/icons/nol.png differ diff --git a/recipes/icons/non_leggerlo.png b/recipes/icons/non_leggerlo.png new file mode 100644 index 0000000000..4b541bc232 Binary files /dev/null and b/recipes/icons/non_leggerlo.png differ diff --git a/recipes/icons/norddjurslokalavisen_dk.png b/recipes/icons/norddjurslokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/norddjurslokalavisen_dk.png and b/recipes/icons/norddjurslokalavisen_dk.png differ diff --git a/recipes/icons/nordjyske_dk.png b/recipes/icons/nordjyske_dk.png index ec95fd360f..21298c9bff 100644 Binary files a/recipes/icons/nordjyske_dk.png and b/recipes/icons/nordjyske_dk.png differ diff --git a/recipes/icons/nortecastilla.png b/recipes/icons/nortecastilla.png index 8bfd6067ae..8edcd4c798 100644 Binary files a/recipes/icons/nortecastilla.png and b/recipes/icons/nortecastilla.png differ diff --git a/recipes/icons/nos_nl.png b/recipes/icons/nos_nl.png new file mode 100644 index 0000000000..7adae4084e Binary files /dev/null and b/recipes/icons/nos_nl.png differ diff --git a/recipes/icons/novaya_gazeta.png b/recipes/icons/novaya_gazeta.png index 60dff549f9..7c16d3d33b 100644 Binary files a/recipes/icons/novaya_gazeta.png and b/recipes/icons/novaya_gazeta.png differ diff --git a/recipes/icons/novaya_gazeta_europe.png b/recipes/icons/novaya_gazeta_europe.png index 946319432b..f97f168776 100644 Binary files a/recipes/icons/novaya_gazeta_europe.png and b/recipes/icons/novaya_gazeta_europe.png differ diff --git a/recipes/icons/novaya_gazeta_europe_en.png b/recipes/icons/novaya_gazeta_europe_en.png index 946319432b..f97f168776 100644 Binary files a/recipes/icons/novaya_gazeta_europe_en.png and b/recipes/icons/novaya_gazeta_europe_en.png differ diff --git a/recipes/icons/novaya_media.png b/recipes/icons/novaya_media.png index 58512ca42c..f63a2da1bd 100644 Binary files a/recipes/icons/novaya_media.png and b/recipes/icons/novaya_media.png differ diff --git a/recipes/icons/novinite.png b/recipes/icons/novinite.png index 52adece1a8..dffdd5dab3 100644 Binary files a/recipes/icons/novinite.png and b/recipes/icons/novinite.png differ diff --git a/recipes/icons/novinite_bg.png b/recipes/icons/novinite_bg.png index 381255a1af..12771814d0 100644 Binary files a/recipes/icons/novinite_bg.png and b/recipes/icons/novinite_bg.png differ diff --git a/recipes/icons/novinky.cz.png b/recipes/icons/novinky.cz.png new file mode 100644 index 0000000000..a5818109af Binary files /dev/null and b/recipes/icons/novinky.cz.png differ diff --git a/recipes/icons/novinky.png b/recipes/icons/novinky.png new file mode 100644 index 0000000000..a5818109af Binary files /dev/null and b/recipes/icons/novinky.png differ diff --git a/recipes/icons/nowiny_rybnik.png b/recipes/icons/nowiny_rybnik.png index 9ada7f554e..3ad497e484 100644 Binary files a/recipes/icons/nowiny_rybnik.png and b/recipes/icons/nowiny_rybnik.png differ diff --git a/recipes/icons/nowy_obywatel.png b/recipes/icons/nowy_obywatel.png index aff4f3668a..edc5b0f83b 100644 Binary files a/recipes/icons/nowy_obywatel.png and b/recipes/icons/nowy_obywatel.png differ diff --git a/recipes/icons/npr.png b/recipes/icons/npr.png index d7e11016fc..7ac08c0786 100644 Binary files a/recipes/icons/npr.png and b/recipes/icons/npr.png differ diff --git a/recipes/icons/npr_music_blogs.png b/recipes/icons/npr_music_blogs.png index d7e11016fc..7ac08c0786 100644 Binary files a/recipes/icons/npr_music_blogs.png and b/recipes/icons/npr_music_blogs.png differ diff --git a/recipes/icons/nrc-nl-epub.png b/recipes/icons/nrc-nl-epub.png new file mode 100644 index 0000000000..25b156a69a Binary files /dev/null and b/recipes/icons/nrc-nl-epub.png differ diff --git a/recipes/icons/nrc.nl.png b/recipes/icons/nrc.nl.png index 776ead2a63..118441f763 100644 Binary files a/recipes/icons/nrc.nl.png and b/recipes/icons/nrc.nl.png differ diff --git a/recipes/icons/nrc_handelsblad.png b/recipes/icons/nrc_handelsblad.png new file mode 100644 index 0000000000..25b156a69a Binary files /dev/null and b/recipes/icons/nrc_handelsblad.png differ diff --git a/recipes/icons/nrc_next.png b/recipes/icons/nrc_next.png index d0250e4ec8..6aa047562e 100644 Binary files a/recipes/icons/nrc_next.png and b/recipes/icons/nrc_next.png differ diff --git a/recipes/icons/ntv_spor.png b/recipes/icons/ntv_spor.png new file mode 100644 index 0000000000..43da212e91 Binary files /dev/null and b/recipes/icons/ntv_spor.png differ diff --git a/recipes/icons/ntv_tr.png b/recipes/icons/ntv_tr.png new file mode 100644 index 0000000000..9c80b2f18d Binary files /dev/null and b/recipes/icons/ntv_tr.png differ diff --git a/recipes/icons/nu.png b/recipes/icons/nu.png index 132d0364bc..e4369a8f24 100644 Binary files a/recipes/icons/nu.png and b/recipes/icons/nu.png differ diff --git a/recipes/icons/nv_en.png b/recipes/icons/nv_en.png index 05d70f12ee..3fa473cc23 100644 Binary files a/recipes/icons/nv_en.png and b/recipes/icons/nv_en.png differ diff --git a/recipes/icons/nv_ru.png b/recipes/icons/nv_ru.png index 05d70f12ee..3fa473cc23 100644 Binary files a/recipes/icons/nv_ru.png and b/recipes/icons/nv_ru.png differ diff --git a/recipes/icons/nv_ua.png b/recipes/icons/nv_ua.png index 05d70f12ee..3fa473cc23 100644 Binary files a/recipes/icons/nv_ua.png and b/recipes/icons/nv_ua.png differ diff --git a/recipes/icons/nymag.png b/recipes/icons/nymag.png new file mode 100644 index 0000000000..de57e56b62 Binary files /dev/null and b/recipes/icons/nymag.png differ diff --git a/recipes/icons/nypost.png b/recipes/icons/nypost.png index a6626e1525..7180e8bdd2 100644 Binary files a/recipes/icons/nypost.png and b/recipes/icons/nypost.png differ diff --git a/recipes/icons/nytimes_cooking.png b/recipes/icons/nytimes_cooking.png new file mode 100644 index 0000000000..dd4e5e6b6c Binary files /dev/null and b/recipes/icons/nytimes_cooking.png differ diff --git a/recipes/icons/nytimes_sports.png b/recipes/icons/nytimes_sports.png index d47b763ae0..f3083d1858 100644 Binary files a/recipes/icons/nytimes_sports.png and b/recipes/icons/nytimes_sports.png differ diff --git a/recipes/icons/nytimes_tech.png b/recipes/icons/nytimes_tech.png index e71ba13b55..0f36015d2d 100644 Binary files a/recipes/icons/nytimes_tech.png and b/recipes/icons/nytimes_tech.png differ diff --git a/recipes/icons/nytimesbook.png b/recipes/icons/nytimesbook.png new file mode 100644 index 0000000000..635a8d6656 Binary files /dev/null and b/recipes/icons/nytimesbook.png differ diff --git a/recipes/icons/nzherald.png b/recipes/icons/nzherald.png index 2c10e0349e..1c1ff1d30b 100644 Binary files a/recipes/icons/nzherald.png and b/recipes/icons/nzherald.png differ diff --git a/recipes/icons/nzz_folio.png b/recipes/icons/nzz_folio.png new file mode 100644 index 0000000000..7283bd9d2a Binary files /dev/null and b/recipes/icons/nzz_folio.png differ diff --git a/recipes/icons/oba.png b/recipes/icons/oba.png new file mode 100644 index 0000000000..0370ad99b7 Binary files /dev/null and b/recipes/icons/oba.png differ diff --git a/recipes/icons/observa_digital.png b/recipes/icons/observa_digital.png new file mode 100644 index 0000000000..486f52bddd Binary files /dev/null and b/recipes/icons/observa_digital.png differ diff --git a/recipes/icons/observer_gb.png b/recipes/icons/observer_gb.png index f629c5475e..9546e15b09 100644 Binary files a/recipes/icons/observer_gb.png and b/recipes/icons/observer_gb.png differ diff --git a/recipes/icons/observer_reach_foundation.png b/recipes/icons/observer_reach_foundation.png new file mode 100644 index 0000000000..c89add0177 Binary files /dev/null and b/recipes/icons/observer_reach_foundation.png differ diff --git a/recipes/icons/oc_register.png b/recipes/icons/oc_register.png index f7adffae36..3046d44530 100644 Binary files a/recipes/icons/oc_register.png and b/recipes/icons/oc_register.png differ diff --git a/recipes/icons/odenselokalavisen_dk.png b/recipes/icons/odenselokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/odenselokalavisen_dk.png and b/recipes/icons/odenselokalavisen_dk.png differ diff --git a/recipes/icons/oesterbroavis_dk.png b/recipes/icons/oesterbroavis_dk.png new file mode 100644 index 0000000000..6fc915ca3d Binary files /dev/null and b/recipes/icons/oesterbroavis_dk.png differ diff --git a/recipes/icons/office_space.png b/recipes/icons/office_space.png index 87ff569c5a..3b702313c5 100644 Binary files a/recipes/icons/office_space.png and b/recipes/icons/office_space.png differ diff --git a/recipes/icons/old_games.png b/recipes/icons/old_games.png index 6fd3fe9e9b..79ad250a44 100644 Binary files a/recipes/icons/old_games.png and b/recipes/icons/old_games.png differ diff --git a/recipes/icons/oldnewthing.png b/recipes/icons/oldnewthing.png index 46e4ea4bc6..0485775a45 100644 Binary files a/recipes/icons/oldnewthing.png and b/recipes/icons/oldnewthing.png differ diff --git a/recipes/icons/omgubuntu.png b/recipes/icons/omgubuntu.png index fd6bd08b31..6ac21ba6d3 100644 Binary files a/recipes/icons/omgubuntu.png and b/recipes/icons/omgubuntu.png differ diff --git a/recipes/icons/onda_rock.png b/recipes/icons/onda_rock.png new file mode 100644 index 0000000000..9be80e9500 Binary files /dev/null and b/recipes/icons/onda_rock.png differ diff --git a/recipes/icons/onemagazine.png b/recipes/icons/onemagazine.png index 59bd759a67..a8c032fd76 100644 Binary files a/recipes/icons/onemagazine.png and b/recipes/icons/onemagazine.png differ diff --git a/recipes/icons/onionavclub.png b/recipes/icons/onionavclub.png new file mode 100644 index 0000000000..df21c7dadf Binary files /dev/null and b/recipes/icons/onionavclub.png differ diff --git a/recipes/icons/open_magazine.png b/recipes/icons/open_magazine.png new file mode 100644 index 0000000000..8d0d47b35b Binary files /dev/null and b/recipes/icons/open_magazine.png differ diff --git a/recipes/icons/opennet.png b/recipes/icons/opennet.png index 7d69dcd679..886dd90db5 100644 Binary files a/recipes/icons/opennet.png and b/recipes/icons/opennet.png differ diff --git a/recipes/icons/opindia.png b/recipes/icons/opindia.png new file mode 100644 index 0000000000..344cd39ea9 Binary files /dev/null and b/recipes/icons/opindia.png differ diff --git a/recipes/icons/opinion_bo.png b/recipes/icons/opinion_bo.png new file mode 100644 index 0000000000..5eb29312fa Binary files /dev/null and b/recipes/icons/opinion_bo.png differ diff --git a/recipes/icons/oregonian.png b/recipes/icons/oregonian.png new file mode 100644 index 0000000000..e9b35936c8 Binary files /dev/null and b/recipes/icons/oregonian.png differ diff --git a/recipes/icons/oreilly_premium.png b/recipes/icons/oreilly_premium.png new file mode 100644 index 0000000000..2f0fafa97a Binary files /dev/null and b/recipes/icons/oreilly_premium.png differ diff --git a/recipes/icons/oriental_daily.png b/recipes/icons/oriental_daily.png index 6272e834fc..9deb272eae 100644 Binary files a/recipes/icons/oriental_daily.png and b/recipes/icons/oriental_daily.png differ diff --git a/recipes/icons/origo_hu.png b/recipes/icons/origo_hu.png index 9857bc78fd..61da70e0ab 100644 Binary files a/recipes/icons/origo_hu.png and b/recipes/icons/origo_hu.png differ diff --git a/recipes/icons/osvitaua.png b/recipes/icons/osvitaua.png index 3f4da96b63..92d9e71591 100644 Binary files a/recipes/icons/osvitaua.png and b/recipes/icons/osvitaua.png differ diff --git a/recipes/icons/osvitaua_ru.png b/recipes/icons/osvitaua_ru.png index 3f4da96b63..92d9e71591 100644 Binary files a/recipes/icons/osvitaua_ru.png and b/recipes/icons/osvitaua_ru.png differ diff --git a/recipes/icons/osw.png b/recipes/icons/osw.png index 82f78e26d2..615c918e64 100644 Binary files a/recipes/icons/osw.png and b/recipes/icons/osw.png differ diff --git a/recipes/icons/ottawa_citizen.png b/recipes/icons/ottawa_citizen.png new file mode 100644 index 0000000000..ac5d11718a Binary files /dev/null and b/recipes/icons/ottawa_citizen.png differ diff --git a/recipes/icons/outlook_business_magazine.png b/recipes/icons/outlook_business_magazine.png new file mode 100644 index 0000000000..f964205927 Binary files /dev/null and b/recipes/icons/outlook_business_magazine.png differ diff --git a/recipes/icons/outlook_india.png b/recipes/icons/outlook_india.png new file mode 100644 index 0000000000..ae6cc70681 Binary files /dev/null and b/recipes/icons/outlook_india.png differ diff --git a/recipes/icons/padreydecano.png b/recipes/icons/padreydecano.png new file mode 100644 index 0000000000..71b4635a1e Binary files /dev/null and b/recipes/icons/padreydecano.png differ diff --git a/recipes/icons/pagina12.png b/recipes/icons/pagina12.png index 8110b9f59a..da939fb7fd 100644 Binary files a/recipes/icons/pagina12.png and b/recipes/icons/pagina12.png differ diff --git a/recipes/icons/pagina_12_print_ed.png b/recipes/icons/pagina_12_print_ed.png new file mode 100644 index 0000000000..77848a9597 Binary files /dev/null and b/recipes/icons/pagina_12_print_ed.png differ diff --git a/recipes/icons/pajama.png b/recipes/icons/pajama.png index 7f0925100b..3024396f39 100644 Binary files a/recipes/icons/pajama.png and b/recipes/icons/pajama.png differ diff --git a/recipes/icons/panorama.png b/recipes/icons/panorama.png new file mode 100644 index 0000000000..10d779645c Binary files /dev/null and b/recipes/icons/panorama.png differ diff --git a/recipes/icons/paperli_topic.png b/recipes/icons/paperli_topic.png new file mode 100644 index 0000000000..c4532ba757 Binary files /dev/null and b/recipes/icons/paperli_topic.png differ diff --git a/recipes/icons/parisreview.png b/recipes/icons/parisreview.png new file mode 100644 index 0000000000..689b164d1b Binary files /dev/null and b/recipes/icons/parisreview.png differ diff --git a/recipes/icons/parlamentni_listy.png b/recipes/icons/parlamentni_listy.png index fcdd1d8ca0..162424b726 100644 Binary files a/recipes/icons/parlamentni_listy.png and b/recipes/icons/parlamentni_listy.png differ diff --git a/recipes/icons/patente_de_corso.png b/recipes/icons/patente_de_corso.png index fb133cba7a..96cd4cf742 100644 Binary files a/recipes/icons/patente_de_corso.png and b/recipes/icons/patente_de_corso.png differ diff --git a/recipes/icons/pc_mag.png b/recipes/icons/pc_mag.png index 882e0a4d0f..0193481355 100644 Binary files a/recipes/icons/pc_mag.png and b/recipes/icons/pc_mag.png differ diff --git a/recipes/icons/pcworld_hu.png b/recipes/icons/pcworld_hu.png index 292b82ce4e..78b3300109 100644 Binary files a/recipes/icons/pcworld_hu.png and b/recipes/icons/pcworld_hu.png differ diff --git a/recipes/icons/pcworldro.png b/recipes/icons/pcworldro.png index 393c7f3717..e559fa683e 100644 Binary files a/recipes/icons/pcworldro.png and b/recipes/icons/pcworldro.png differ diff --git a/recipes/icons/penguin_news.png b/recipes/icons/penguin_news.png new file mode 100644 index 0000000000..72d250997f Binary files /dev/null and b/recipes/icons/penguin_news.png differ diff --git a/recipes/icons/penzberger_merkur.png b/recipes/icons/penzberger_merkur.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/penzberger_merkur.png and b/recipes/icons/penzberger_merkur.png differ diff --git a/recipes/icons/people_daily.png b/recipes/icons/people_daily.png index 0f10f573cb..db6105c3e0 100644 Binary files a/recipes/icons/people_daily.png and b/recipes/icons/people_daily.png differ diff --git a/recipes/icons/periodismo_humano_es.png b/recipes/icons/periodismo_humano_es.png index eb5f241956..8f674bf37a 100644 Binary files a/recipes/icons/periodismo_humano_es.png and b/recipes/icons/periodismo_humano_es.png differ diff --git a/recipes/icons/peterschiff.png b/recipes/icons/peterschiff.png index d9ab67e997..fc196f903d 100644 Binary files a/recipes/icons/peterschiff.png and b/recipes/icons/peterschiff.png differ diff --git a/recipes/icons/phd_comics.png b/recipes/icons/phd_comics.png new file mode 100644 index 0000000000..1459d2e4be Binary files /dev/null and b/recipes/icons/phd_comics.png differ diff --git a/recipes/icons/philippino_star_ngayon.png b/recipes/icons/philippino_star_ngayon.png new file mode 100644 index 0000000000..725cbf17fd Binary files /dev/null and b/recipes/icons/philippino_star_ngayon.png differ diff --git a/recipes/icons/phillosophy_now.png b/recipes/icons/phillosophy_now.png new file mode 100644 index 0000000000..8815712ef0 Binary files /dev/null and b/recipes/icons/phillosophy_now.png differ diff --git a/recipes/icons/phoronix.png b/recipes/icons/phoronix.png index 6653c3be93..f021fc8691 100644 Binary files a/recipes/icons/phoronix.png and b/recipes/icons/phoronix.png differ diff --git a/recipes/icons/phys_org.png b/recipes/icons/phys_org.png index 3cc2b8ca9a..88a71b5fc6 100644 Binary files a/recipes/icons/phys_org.png and b/recipes/icons/phys_org.png differ diff --git a/recipes/icons/physics_today.png b/recipes/icons/physics_today.png index 06a7221255..68c9374129 100644 Binary files a/recipes/icons/physics_today.png and b/recipes/icons/physics_today.png differ diff --git a/recipes/icons/piratska_strana.png b/recipes/icons/piratska_strana.png new file mode 100644 index 0000000000..1a9a8ae83b Binary files /dev/null and b/recipes/icons/piratska_strana.png differ diff --git a/recipes/icons/piratske_noviny.png b/recipes/icons/piratske_noviny.png new file mode 100644 index 0000000000..f0d3789b59 Binary files /dev/null and b/recipes/icons/piratske_noviny.png differ diff --git a/recipes/icons/planet_kde.png b/recipes/icons/planet_kde.png index 55fd7742db..c0c1095dd3 100644 Binary files a/recipes/icons/planet_kde.png and b/recipes/icons/planet_kde.png differ diff --git a/recipes/icons/plus_info.png b/recipes/icons/plus_info.png new file mode 100644 index 0000000000..0af1a624a3 Binary files /dev/null and b/recipes/icons/plus_info.png differ diff --git a/recipes/icons/pnn.png b/recipes/icons/pnn.png index 8f905192fb..6c0240cf1a 100644 Binary files a/recipes/icons/pnn.png and b/recipes/icons/pnn.png differ diff --git a/recipes/icons/poche.png b/recipes/icons/poche.png new file mode 100644 index 0000000000..5ca1e1fd5b Binary files /dev/null and b/recipes/icons/poche.png differ diff --git a/recipes/icons/podnikatel.png b/recipes/icons/podnikatel.png index ddaa7bf57a..a19f64b755 100644 Binary files a/recipes/icons/podnikatel.png and b/recipes/icons/podnikatel.png differ diff --git a/recipes/icons/poetrymagazine.png b/recipes/icons/poetrymagazine.png new file mode 100644 index 0000000000..5959ec16ae Binary files /dev/null and b/recipes/icons/poetrymagazine.png differ diff --git a/recipes/icons/politifact.png b/recipes/icons/politifact.png new file mode 100644 index 0000000000..3fb64ab6e0 Binary files /dev/null and b/recipes/icons/politifact.png differ diff --git a/recipes/icons/polizeipress_de.png b/recipes/icons/polizeipress_de.png index aca985c612..ce1b6dabe0 100644 Binary files a/recipes/icons/polizeipress_de.png and b/recipes/icons/polizeipress_de.png differ diff --git a/recipes/icons/popscience.png b/recipes/icons/popscience.png index 4401131d19..c70f4ac08a 100644 Binary files a/recipes/icons/popscience.png and b/recipes/icons/popscience.png differ diff --git a/recipes/icons/portafolio.png b/recipes/icons/portafolio.png new file mode 100644 index 0000000000..644103dde3 Binary files /dev/null and b/recipes/icons/portafolio.png differ diff --git a/recipes/icons/portfolio_hu.png b/recipes/icons/portfolio_hu.png new file mode 100644 index 0000000000..519270e8ec Binary files /dev/null and b/recipes/icons/portfolio_hu.png differ diff --git a/recipes/icons/portfolio_hu_hu.png b/recipes/icons/portfolio_hu_hu.png index 08272a92b4..0e471fa1df 100644 Binary files a/recipes/icons/portfolio_hu_hu.png and b/recipes/icons/portfolio_hu_hu.png differ diff --git a/recipes/icons/post_today.png b/recipes/icons/post_today.png index c157e3cbec..881761b8c1 100644 Binary files a/recipes/icons/post_today.png and b/recipes/icons/post_today.png differ diff --git a/recipes/icons/poughkeepsie_journal.png b/recipes/icons/poughkeepsie_journal.png new file mode 100644 index 0000000000..6ae6d01c94 Binary files /dev/null and b/recipes/icons/poughkeepsie_journal.png differ diff --git a/recipes/icons/praguemonitor.png b/recipes/icons/praguemonitor.png index 83601c5f04..045e226284 100644 Binary files a/recipes/icons/praguemonitor.png and b/recipes/icons/praguemonitor.png differ diff --git a/recipes/icons/pragyata.png b/recipes/icons/pragyata.png new file mode 100644 index 0000000000..eaecfb16e5 Binary files /dev/null and b/recipes/icons/pragyata.png differ diff --git a/recipes/icons/pravda.png b/recipes/icons/pravda.png new file mode 100644 index 0000000000..7d48281dbd Binary files /dev/null and b/recipes/icons/pravda.png differ diff --git a/recipes/icons/pravda_ru.png b/recipes/icons/pravda_ru.png index 6cae699c89..0127a8b2f0 100644 Binary files a/recipes/icons/pravda_ru.png and b/recipes/icons/pravda_ru.png differ diff --git a/recipes/icons/pravda_uk.png b/recipes/icons/pravda_uk.png index 6cae699c89..cfb46d85e2 100644 Binary files a/recipes/icons/pravda_uk.png and b/recipes/icons/pravda_uk.png differ diff --git a/recipes/icons/pravda_ukraine.png b/recipes/icons/pravda_ukraine.png index 6cae699c89..cfb46d85e2 100644 Binary files a/recipes/icons/pravda_ukraine.png and b/recipes/icons/pravda_ukraine.png differ diff --git a/recipes/icons/pravda_ukraine_ru.png b/recipes/icons/pravda_ukraine_ru.png index 6cae699c89..cfb46d85e2 100644 Binary files a/recipes/icons/pravda_ukraine_ru.png and b/recipes/icons/pravda_ukraine_ru.png differ diff --git a/recipes/icons/pravo.png b/recipes/icons/pravo.png new file mode 100644 index 0000000000..d856b33469 Binary files /dev/null and b/recipes/icons/pravo.png differ diff --git a/recipes/icons/prekshaa.png b/recipes/icons/prekshaa.png new file mode 100644 index 0000000000..116ceabb71 Binary files /dev/null and b/recipes/icons/prekshaa.png differ diff --git a/recipes/icons/press_information_bureau.png b/recipes/icons/press_information_bureau.png new file mode 100644 index 0000000000..c6a6119642 Binary files /dev/null and b/recipes/icons/press_information_bureau.png differ diff --git a/recipes/icons/presse_portal.png b/recipes/icons/presse_portal.png index aca985c612..ce1b6dabe0 100644 Binary files a/recipes/icons/presse_portal.png and b/recipes/icons/presse_portal.png differ diff --git a/recipes/icons/pro_linux_de.png b/recipes/icons/pro_linux_de.png index 56648ebcbd..7a5802c945 100644 Binary files a/recipes/icons/pro_linux_de.png and b/recipes/icons/pro_linux_de.png differ diff --git a/recipes/icons/pro_physik.png b/recipes/icons/pro_physik.png new file mode 100644 index 0000000000..8e47abbed5 Binary files /dev/null and b/recipes/icons/pro_physik.png differ diff --git a/recipes/icons/project.png b/recipes/icons/project.png index a15c53ab8a..7df6d24be8 100644 Binary files a/recipes/icons/project.png and b/recipes/icons/project.png differ diff --git a/recipes/icons/project_en.png b/recipes/icons/project_en.png index a15c53ab8a..7df6d24be8 100644 Binary files a/recipes/icons/project_en.png and b/recipes/icons/project_en.png differ diff --git a/recipes/icons/project_syndicate.png b/recipes/icons/project_syndicate.png new file mode 100644 index 0000000000..7cd36a0cb2 Binary files /dev/null and b/recipes/icons/project_syndicate.png differ diff --git a/recipes/icons/projo.png b/recipes/icons/projo.png new file mode 100644 index 0000000000..30b664d629 Binary files /dev/null and b/recipes/icons/projo.png differ diff --git a/recipes/icons/propublica.png b/recipes/icons/propublica.png index 32ff972ee1..4aa9958d2b 100644 Binary files a/recipes/icons/propublica.png and b/recipes/icons/propublica.png differ diff --git a/recipes/icons/prosleduet.png b/recipes/icons/prosleduet.png new file mode 100644 index 0000000000..918203f752 Binary files /dev/null and b/recipes/icons/prosleduet.png differ diff --git a/recipes/icons/prospectmaguk.png b/recipes/icons/prospectmaguk.png new file mode 100644 index 0000000000..c60a710d04 Binary files /dev/null and b/recipes/icons/prospectmaguk.png differ diff --git a/recipes/icons/prospectmaguk_free.png b/recipes/icons/prospectmaguk_free.png new file mode 100644 index 0000000000..7aa14ee58f Binary files /dev/null and b/recipes/icons/prospectmaguk_free.png differ diff --git a/recipes/icons/protagon.png b/recipes/icons/protagon.png new file mode 100644 index 0000000000..c753016986 Binary files /dev/null and b/recipes/icons/protagon.png differ diff --git a/recipes/icons/protvmagazin.png b/recipes/icons/protvmagazin.png index 400fddbd54..4f43f74bf3 100644 Binary files a/recipes/icons/protvmagazin.png and b/recipes/icons/protvmagazin.png differ diff --git a/recipes/icons/psych.png b/recipes/icons/psych.png new file mode 100644 index 0000000000..197191474c Binary files /dev/null and b/recipes/icons/psych.png differ diff --git a/recipes/icons/publicdomainreview_org.png b/recipes/icons/publicdomainreview_org.png new file mode 100644 index 0000000000..16517c957b Binary files /dev/null and b/recipes/icons/publicdomainreview_org.png differ diff --git a/recipes/icons/publico.png b/recipes/icons/publico.png new file mode 100644 index 0000000000..de77c3f0c6 Binary files /dev/null and b/recipes/icons/publico.png differ diff --git a/recipes/icons/punto_informatico.png b/recipes/icons/punto_informatico.png index 37455eb1f0..7fe1e4ed8c 100644 Binary files a/recipes/icons/punto_informatico.png and b/recipes/icons/punto_informatico.png differ diff --git a/recipes/icons/pure_pc.png b/recipes/icons/pure_pc.png index 6640aaee26..1cab72b644 100644 Binary files a/recipes/icons/pure_pc.png and b/recipes/icons/pure_pc.png differ diff --git a/recipes/icons/quanta_magazine.png b/recipes/icons/quanta_magazine.png new file mode 100644 index 0000000000..db14c0a998 Binary files /dev/null and b/recipes/icons/quanta_magazine.png differ diff --git a/recipes/icons/queleer.png b/recipes/icons/queleer.png new file mode 100644 index 0000000000..0b89ac4c93 Binary files /dev/null and b/recipes/icons/queleer.png differ diff --git a/recipes/icons/rabble_ca.png b/recipes/icons/rabble_ca.png new file mode 100644 index 0000000000..3d471ac652 Binary files /dev/null and b/recipes/icons/rabble_ca.png differ diff --git a/recipes/icons/radikal_tr.png b/recipes/icons/radikal_tr.png index b89cf4140b..2684c85ed2 100644 Binary files a/recipes/icons/radikal_tr.png and b/recipes/icons/radikal_tr.png differ diff --git a/recipes/icons/radio_prague.png b/recipes/icons/radio_prague.png new file mode 100644 index 0000000000..c9f3e4f4ab Binary files /dev/null and b/recipes/icons/radio_prague.png differ diff --git a/recipes/icons/radio_praha.png b/recipes/icons/radio_praha.png new file mode 100644 index 0000000000..c9f3e4f4ab Binary files /dev/null and b/recipes/icons/radio_praha.png differ diff --git a/recipes/icons/radiosvoboda_ua.png b/recipes/icons/radiosvoboda_ua.png index affd3e19f8..dea26a0b4e 100644 Binary files a/recipes/icons/radiosvoboda_ua.png and b/recipes/icons/radiosvoboda_ua.png differ diff --git a/recipes/icons/randerslokalavisen_dk.png b/recipes/icons/randerslokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/randerslokalavisen_dk.png and b/recipes/icons/randerslokalavisen_dk.png differ diff --git a/recipes/icons/rbc_ru.png b/recipes/icons/rbc_ru.png index f60f84c81f..55f6a4aadd 100644 Binary files a/recipes/icons/rbc_ru.png and b/recipes/icons/rbc_ru.png differ diff --git a/recipes/icons/rds.png b/recipes/icons/rds.png index 6804aa7e63..0803f4c47f 100644 Binary files a/recipes/icons/rds.png and b/recipes/icons/rds.png differ diff --git a/recipes/icons/readers_digest.png b/recipes/icons/readers_digest.png new file mode 100644 index 0000000000..d052fae8ec Binary files /dev/null and b/recipes/icons/readers_digest.png differ diff --git a/recipes/icons/readersdigest_thehealthy.png b/recipes/icons/readersdigest_thehealthy.png new file mode 100644 index 0000000000..292be27c01 Binary files /dev/null and b/recipes/icons/readersdigest_thehealthy.png differ diff --git a/recipes/icons/real_clear.png b/recipes/icons/real_clear.png new file mode 100644 index 0000000000..9f1dfe30b0 Binary files /dev/null and b/recipes/icons/real_clear.png differ diff --git a/recipes/icons/real_world_economics_review.png b/recipes/icons/real_world_economics_review.png index 9ccd38d2f3..6e1793fb9d 100644 Binary files a/recipes/icons/real_world_economics_review.png and b/recipes/icons/real_world_economics_review.png differ diff --git a/recipes/icons/reason_magazine.png b/recipes/icons/reason_magazine.png new file mode 100644 index 0000000000..3b31ac13eb Binary files /dev/null and b/recipes/icons/reason_magazine.png differ diff --git a/recipes/icons/red_aragon.png b/recipes/icons/red_aragon.png index cb968d6e71..c9a1d9aa9c 100644 Binary files a/recipes/icons/red_aragon.png and b/recipes/icons/red_aragon.png differ diff --git a/recipes/icons/red_voltaire.png b/recipes/icons/red_voltaire.png new file mode 100644 index 0000000000..9f44a424ce Binary files /dev/null and b/recipes/icons/red_voltaire.png differ diff --git a/recipes/icons/regina_leader_post.png b/recipes/icons/regina_leader_post.png new file mode 100644 index 0000000000..ccca989ba9 Binary files /dev/null and b/recipes/icons/regina_leader_post.png differ diff --git a/recipes/icons/republica.png b/recipes/icons/republica.png new file mode 100644 index 0000000000..6135647482 Binary files /dev/null and b/recipes/icons/republica.png differ diff --git a/recipes/icons/respekt_magazine.png b/recipes/icons/respekt_magazine.png new file mode 100644 index 0000000000..4c1acc7672 Binary files /dev/null and b/recipes/icons/respekt_magazine.png differ diff --git a/recipes/icons/reuters.png b/recipes/icons/reuters.png new file mode 100644 index 0000000000..59904c1274 Binary files /dev/null and b/recipes/icons/reuters.png differ diff --git a/recipes/icons/reuters_ja.png b/recipes/icons/reuters_ja.png index 7c7942a922..d9dcfecc8f 100644 Binary files a/recipes/icons/reuters_ja.png and b/recipes/icons/reuters_ja.png differ diff --git a/recipes/icons/revista_cromos.png b/recipes/icons/revista_cromos.png new file mode 100644 index 0000000000..300c09c333 Binary files /dev/null and b/recipes/icons/revista_cromos.png differ diff --git a/recipes/icons/revista_muy.png b/recipes/icons/revista_muy.png new file mode 100644 index 0000000000..1a4ccfaf4a Binary files /dev/null and b/recipes/icons/revista_muy.png differ diff --git a/recipes/icons/revista_piaui.png b/recipes/icons/revista_piaui.png new file mode 100644 index 0000000000..67d63e6cfb Binary files /dev/null and b/recipes/icons/revista_piaui.png differ diff --git a/recipes/icons/revista_semana.png b/recipes/icons/revista_semana.png new file mode 100644 index 0000000000..953260fda6 Binary files /dev/null and b/recipes/icons/revista_semana.png differ diff --git a/recipes/icons/revista_summa.png b/recipes/icons/revista_summa.png new file mode 100644 index 0000000000..0d00979c98 Binary files /dev/null and b/recipes/icons/revista_summa.png differ diff --git a/recipes/icons/revista_veintitres.png b/recipes/icons/revista_veintitres.png index 389ec29471..ed07881eaa 100644 Binary files a/recipes/icons/revista_veintitres.png and b/recipes/icons/revista_veintitres.png differ diff --git a/recipes/icons/rga.png b/recipes/icons/rga.png new file mode 100644 index 0000000000..5695c3ef66 Binary files /dev/null and b/recipes/icons/rga.png differ diff --git a/recipes/icons/ria_ru.png b/recipes/icons/ria_ru.png index 04727b6b96..8c47da83de 100644 Binary files a/recipes/icons/ria_ru.png and b/recipes/icons/ria_ru.png differ diff --git a/recipes/icons/rian_eng.png b/recipes/icons/rian_eng.png new file mode 100644 index 0000000000..0e22ab4a9b Binary files /dev/null and b/recipes/icons/rian_eng.png differ diff --git a/recipes/icons/rian_spa.png b/recipes/icons/rian_spa.png new file mode 100644 index 0000000000..0e22ab4a9b Binary files /dev/null and b/recipes/icons/rian_spa.png differ diff --git a/recipes/icons/roger_ebert.png b/recipes/icons/roger_ebert.png new file mode 100644 index 0000000000..fbc8fea941 Binary files /dev/null and b/recipes/icons/roger_ebert.png differ diff --git a/recipes/icons/roger_ebert_blog.png b/recipes/icons/roger_ebert_blog.png new file mode 100644 index 0000000000..fbc8fea941 Binary files /dev/null and b/recipes/icons/roger_ebert_blog.png differ diff --git a/recipes/icons/romanialibera.png b/recipes/icons/romanialibera.png index 7035097790..ae9cef2dc6 100644 Binary files a/recipes/icons/romanialibera.png and b/recipes/icons/romanialibera.png differ diff --git a/recipes/icons/root.png b/recipes/icons/root.png index 87d7889d15..b72b0ee6fa 100644 Binary files a/recipes/icons/root.png and b/recipes/icons/root.png differ diff --git a/recipes/icons/rosbalt.png b/recipes/icons/rosbalt.png index cf7f78e949..8839f1ad26 100644 Binary files a/recipes/icons/rosbalt.png and b/recipes/icons/rosbalt.png differ diff --git a/recipes/icons/roskildelokalavisen_dk.png b/recipes/icons/roskildelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/roskildelokalavisen_dk.png and b/recipes/icons/roskildelokalavisen_dk.png differ diff --git a/recipes/icons/rt.png b/recipes/icons/rt.png new file mode 100644 index 0000000000..e2f191b546 Binary files /dev/null and b/recipes/icons/rt.png differ diff --git a/recipes/icons/rte.png b/recipes/icons/rte.png index d53370773e..dffc9085af 100644 Binary files a/recipes/icons/rte.png and b/recipes/icons/rte.png differ diff --git a/recipes/icons/rts.png b/recipes/icons/rts.png index 57700527de..60989e6ecf 100644 Binary files a/recipes/icons/rts.png and b/recipes/icons/rts.png differ diff --git a/recipes/icons/rubikon_de.png b/recipes/icons/rubikon_de.png new file mode 100644 index 0000000000..872d9b7aaa Binary files /dev/null and b/recipes/icons/rubikon_de.png differ diff --git a/recipes/icons/rudersdallokalavisen_dk.png b/recipes/icons/rudersdallokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/rudersdallokalavisen_dk.png and b/recipes/icons/rudersdallokalavisen_dk.png differ diff --git a/recipes/icons/rue89.png b/recipes/icons/rue89.png index 1b7e7a746a..d5c8812044 100644 Binary files a/recipes/icons/rue89.png and b/recipes/icons/rue89.png differ diff --git a/recipes/icons/russiafeed.png b/recipes/icons/russiafeed.png index a568f141ca..b3823c73fa 100644 Binary files a/recipes/icons/russiafeed.png and b/recipes/icons/russiafeed.png differ diff --git a/recipes/icons/rynek_zdrowia.png b/recipes/icons/rynek_zdrowia.png index 0cc9440ed1..e406f785d6 100644 Binary files a/recipes/icons/rynek_zdrowia.png and b/recipes/icons/rynek_zdrowia.png differ diff --git a/recipes/icons/sabit_fikir.png b/recipes/icons/sabit_fikir.png new file mode 100644 index 0000000000..87438987aa Binary files /dev/null and b/recipes/icons/sabit_fikir.png differ diff --git a/recipes/icons/saechsische.png b/recipes/icons/saechsische.png new file mode 100644 index 0000000000..7f0b08f6e6 Binary files /dev/null and b/recipes/icons/saechsische.png differ diff --git a/recipes/icons/sage_news.png b/recipes/icons/sage_news.png index cea0cd848e..7094c2125b 100644 Binary files a/recipes/icons/sage_news.png and b/recipes/icons/sage_news.png differ diff --git a/recipes/icons/sage_news_opinion.png b/recipes/icons/sage_news_opinion.png new file mode 100644 index 0000000000..9f029270ff Binary files /dev/null and b/recipes/icons/sage_news_opinion.png differ diff --git a/recipes/icons/salonica_press_news.png b/recipes/icons/salonica_press_news.png new file mode 100644 index 0000000000..2eef9603d3 Binary files /dev/null and b/recipes/icons/salonica_press_news.png differ diff --git a/recipes/icons/samanyolu_haber.png b/recipes/icons/samanyolu_haber.png index c50e8397aa..f34e9ec67b 100644 Binary files a/recipes/icons/samanyolu_haber.png and b/recipes/icons/samanyolu_haber.png differ diff --git a/recipes/icons/samanyolu_teknoloji.png b/recipes/icons/samanyolu_teknoloji.png new file mode 100644 index 0000000000..d95525c901 Binary files /dev/null and b/recipes/icons/samanyolu_teknoloji.png differ diff --git a/recipes/icons/san_fran_chronicle.png b/recipes/icons/san_fran_chronicle.png index 4e044301b8..f1fd99916a 100644 Binary files a/recipes/icons/san_fran_chronicle.png and b/recipes/icons/san_fran_chronicle.png differ diff --git a/recipes/icons/sarajevo_x.png b/recipes/icons/sarajevo_x.png index e853dc5387..30f8aceacc 100644 Binary files a/recipes/icons/sarajevo_x.png and b/recipes/icons/sarajevo_x.png differ diff --git a/recipes/icons/sardinia_post.png b/recipes/icons/sardinia_post.png new file mode 100644 index 0000000000..92316b99dd Binary files /dev/null and b/recipes/icons/sardinia_post.png differ diff --git a/recipes/icons/saskatoon_star_phoenix.png b/recipes/icons/saskatoon_star_phoenix.png new file mode 100644 index 0000000000..56a9d0d9e6 Binary files /dev/null and b/recipes/icons/saskatoon_star_phoenix.png differ diff --git a/recipes/icons/satkurier.png b/recipes/icons/satkurier.png index 2e5fe73466..6a93c5da59 100644 Binary files a/recipes/icons/satkurier.png and b/recipes/icons/satkurier.png differ diff --git a/recipes/icons/satmagazine.png b/recipes/icons/satmagazine.png new file mode 100644 index 0000000000..35dc280e8d Binary files /dev/null and b/recipes/icons/satmagazine.png differ diff --git a/recipes/icons/sb_nation.png b/recipes/icons/sb_nation.png index 82274f8118..7e28808d10 100644 Binary files a/recipes/icons/sb_nation.png and b/recipes/icons/sb_nation.png differ diff --git a/recipes/icons/schattenblick.png b/recipes/icons/schattenblick.png index 79e5ced36c..8228dbc530 100644 Binary files a/recipes/icons/schattenblick.png and b/recipes/icons/schattenblick.png differ diff --git a/recipes/icons/schongauer_nachrichten.png b/recipes/icons/schongauer_nachrichten.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/schongauer_nachrichten.png and b/recipes/icons/schongauer_nachrichten.png differ diff --git a/recipes/icons/schwarzerpfeil.png b/recipes/icons/schwarzerpfeil.png new file mode 100644 index 0000000000..c8a4dafae8 Binary files /dev/null and b/recipes/icons/schwarzerpfeil.png differ diff --git a/recipes/icons/science_advances.png b/recipes/icons/science_advances.png new file mode 100644 index 0000000000..a7cc08250f Binary files /dev/null and b/recipes/icons/science_advances.png differ diff --git a/recipes/icons/science_news.png b/recipes/icons/science_news.png new file mode 100644 index 0000000000..7cfaffee2b Binary files /dev/null and b/recipes/icons/science_news.png differ diff --git a/recipes/icons/scientific_american.png b/recipes/icons/scientific_american.png new file mode 100644 index 0000000000..d2b3ef71c0 Binary files /dev/null and b/recipes/icons/scientific_american.png differ diff --git a/recipes/icons/scinexx.png b/recipes/icons/scinexx.png index e479593353..85b9ce3ebe 100644 Binary files a/recipes/icons/scinexx.png and b/recipes/icons/scinexx.png differ diff --git a/recipes/icons/scmp.png b/recipes/icons/scmp.png index 19c64b4a1e..a8ce467738 100644 Binary files a/recipes/icons/scmp.png and b/recipes/icons/scmp.png differ diff --git a/recipes/icons/scprint.png b/recipes/icons/scprint.png new file mode 100644 index 0000000000..84a08426b5 Binary files /dev/null and b/recipes/icons/scprint.png differ diff --git a/recipes/icons/scroll.png b/recipes/icons/scroll.png new file mode 100644 index 0000000000..ceabdfe338 Binary files /dev/null and b/recipes/icons/scroll.png differ diff --git a/recipes/icons/seanhannity.png b/recipes/icons/seanhannity.png new file mode 100644 index 0000000000..ae2f7d3aaa Binary files /dev/null and b/recipes/icons/seanhannity.png differ diff --git a/recipes/icons/seattle_times.png b/recipes/icons/seattle_times.png index 6a7c4ca982..b4ec19c648 100644 Binary files a/recipes/icons/seattle_times.png and b/recipes/icons/seattle_times.png differ diff --git a/recipes/icons/security_watch.png b/recipes/icons/security_watch.png index 441f26e534..0239b98052 100644 Binary files a/recipes/icons/security_watch.png and b/recipes/icons/security_watch.png differ diff --git a/recipes/icons/sekurak_pl.png b/recipes/icons/sekurak_pl.png index 9f9cad7adc..f4f01b2b8c 100644 Binary files a/recipes/icons/sekurak_pl.png and b/recipes/icons/sekurak_pl.png differ diff --git a/recipes/icons/seminar_magazine.png b/recipes/icons/seminar_magazine.png new file mode 100644 index 0000000000..3b83c092b8 Binary files /dev/null and b/recipes/icons/seminar_magazine.png differ diff --git a/recipes/icons/serverside.png b/recipes/icons/serverside.png new file mode 100644 index 0000000000..32c36260b3 Binary files /dev/null and b/recipes/icons/serverside.png differ diff --git a/recipes/icons/sfbg.png b/recipes/icons/sfbg.png new file mode 100644 index 0000000000..f157c30b6d Binary files /dev/null and b/recipes/icons/sfbg.png differ diff --git a/recipes/icons/sfin.png b/recipes/icons/sfin.png index f2cc6d8637..529f7f15ac 100644 Binary files a/recipes/icons/sfin.png and b/recipes/icons/sfin.png differ diff --git a/recipes/icons/sg_hu.png b/recipes/icons/sg_hu.png index a516babdbe..11abff5824 100644 Binary files a/recipes/icons/sg_hu.png and b/recipes/icons/sg_hu.png differ diff --git a/recipes/icons/shacknews.png b/recipes/icons/shacknews.png new file mode 100644 index 0000000000..ca9b3b5080 Binary files /dev/null and b/recipes/icons/shacknews.png differ diff --git a/recipes/icons/shortlist.png b/recipes/icons/shortlist.png new file mode 100644 index 0000000000..3008199aed Binary files /dev/null and b/recipes/icons/shortlist.png differ diff --git a/recipes/icons/sigma_live.png b/recipes/icons/sigma_live.png new file mode 100644 index 0000000000..163ead7ceb Binary files /dev/null and b/recipes/icons/sigma_live.png differ diff --git a/recipes/icons/sign_of_the_times.png b/recipes/icons/sign_of_the_times.png index fa935a1a7b..22e6cf6728 100644 Binary files a/recipes/icons/sign_of_the_times.png and b/recipes/icons/sign_of_the_times.png differ diff --git a/recipes/icons/sign_on_sd.png b/recipes/icons/sign_on_sd.png new file mode 100644 index 0000000000..96d2d682cf Binary files /dev/null and b/recipes/icons/sign_on_sd.png differ diff --git a/recipes/icons/silicon_republic.png b/recipes/icons/silicon_republic.png index cf075712e7..24d94eca6f 100644 Binary files a/recipes/icons/silicon_republic.png and b/recipes/icons/silicon_republic.png differ diff --git a/recipes/icons/singtao_daily.png b/recipes/icons/singtao_daily.png new file mode 100644 index 0000000000..38d59d6db6 Binary files /dev/null and b/recipes/icons/singtao_daily.png differ diff --git a/recipes/icons/singtaohk.png b/recipes/icons/singtaohk.png new file mode 100644 index 0000000000..eeb4dc5429 Binary files /dev/null and b/recipes/icons/singtaohk.png differ diff --git a/recipes/icons/siol.png b/recipes/icons/siol.png index b5cb79512c..9fea79a23b 100644 Binary files a/recipes/icons/siol.png and b/recipes/icons/siol.png differ diff --git a/recipes/icons/sisainlive.png b/recipes/icons/sisainlive.png new file mode 100644 index 0000000000..e0e7f2d28d Binary files /dev/null and b/recipes/icons/sisainlive.png differ diff --git a/recipes/icons/sizinti_derigisi.png b/recipes/icons/sizinti_derigisi.png new file mode 100644 index 0000000000..74f1fa50a4 Binary files /dev/null and b/recipes/icons/sizinti_derigisi.png differ diff --git a/recipes/icons/skai.png b/recipes/icons/skai.png index 8582645859..a0c20a7ce6 100644 Binary files a/recipes/icons/skai.png and b/recipes/icons/skai.png differ diff --git a/recipes/icons/skanderborglokalavisen_dk.png b/recipes/icons/skanderborglokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/skanderborglokalavisen_dk.png and b/recipes/icons/skanderborglokalavisen_dk.png differ diff --git a/recipes/icons/skeptic.png b/recipes/icons/skeptic.png new file mode 100644 index 0000000000..09d12f4a6d Binary files /dev/null and b/recipes/icons/skeptic.png differ diff --git a/recipes/icons/skeptical_enquirer.png b/recipes/icons/skeptical_enquirer.png new file mode 100644 index 0000000000..073586ae43 Binary files /dev/null and b/recipes/icons/skeptical_enquirer.png differ diff --git a/recipes/icons/skylife.png b/recipes/icons/skylife.png index c0190c602a..c441ee9c57 100644 Binary files a/recipes/icons/skylife.png and b/recipes/icons/skylife.png differ diff --git a/recipes/icons/slashdot.png b/recipes/icons/slashdot.png index a2b853e5c5..10f3c7fcda 100644 Binary files a/recipes/icons/slashdot.png and b/recipes/icons/slashdot.png differ diff --git a/recipes/icons/slate.png b/recipes/icons/slate.png new file mode 100644 index 0000000000..3255efc6df Binary files /dev/null and b/recipes/icons/slate.png differ diff --git a/recipes/icons/slate_star_codex.png b/recipes/icons/slate_star_codex.png new file mode 100644 index 0000000000..3f5837b0bc Binary files /dev/null and b/recipes/icons/slate_star_codex.png differ diff --git a/recipes/icons/slovo.png b/recipes/icons/slovo.png new file mode 100644 index 0000000000..d127973523 Binary files /dev/null and b/recipes/icons/slovo.png differ diff --git a/recipes/icons/smashing.png b/recipes/icons/smashing.png index a1cbf65435..9dd2f9f7d8 100644 Binary files a/recipes/icons/smashing.png and b/recipes/icons/smashing.png differ diff --git a/recipes/icons/sme.png b/recipes/icons/sme.png new file mode 100644 index 0000000000..a536b92581 Binary files /dev/null and b/recipes/icons/sme.png differ diff --git a/recipes/icons/smith.png b/recipes/icons/smith.png new file mode 100644 index 0000000000..538bfbc9d1 Binary files /dev/null and b/recipes/icons/smith.png differ diff --git a/recipes/icons/sn_dk.png b/recipes/icons/sn_dk.png new file mode 100644 index 0000000000..6e425b13b2 Binary files /dev/null and b/recipes/icons/sn_dk.png differ diff --git a/recipes/icons/snob.png b/recipes/icons/snob.png index e4d9a7b19d..76f6073690 100644 Binary files a/recipes/icons/snob.png and b/recipes/icons/snob.png differ diff --git a/recipes/icons/snopes.png b/recipes/icons/snopes.png new file mode 100644 index 0000000000..8447227580 Binary files /dev/null and b/recipes/icons/snopes.png differ diff --git a/recipes/icons/sobaka.png b/recipes/icons/sobaka.png index c6776d5c52..07fbd92652 100644 Binary files a/recipes/icons/sobaka.png and b/recipes/icons/sobaka.png differ diff --git a/recipes/icons/sobesednik.png b/recipes/icons/sobesednik.png index 5e8845ff79..0650f10022 100644 Binary files a/recipes/icons/sobesednik.png and b/recipes/icons/sobesednik.png differ diff --git a/recipes/icons/soenderborglokalavisen_dk.png b/recipes/icons/soenderborglokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/soenderborglokalavisen_dk.png and b/recipes/icons/soenderborglokalavisen_dk.png differ diff --git a/recipes/icons/sol_haber.png b/recipes/icons/sol_haber.png new file mode 100644 index 0000000000..0f09e5f3b0 Binary files /dev/null and b/recipes/icons/sol_haber.png differ diff --git a/recipes/icons/soldiers.png b/recipes/icons/soldiers.png index f9f3a228b4..a05c0c210b 100644 Binary files a/recipes/icons/soldiers.png and b/recipes/icons/soldiers.png differ diff --git a/recipes/icons/something_awful.png b/recipes/icons/something_awful.png index 3a0c065d60..ff05e0f53c 100644 Binary files a/recipes/icons/something_awful.png and b/recipes/icons/something_awful.png differ diff --git a/recipes/icons/southernstar.png b/recipes/icons/southernstar.png new file mode 100644 index 0000000000..58f2efa494 Binary files /dev/null and b/recipes/icons/southernstar.png differ diff --git a/recipes/icons/sova.png b/recipes/icons/sova.png index 9ae76e064a..4288d390d3 100644 Binary files a/recipes/icons/sova.png and b/recipes/icons/sova.png differ diff --git a/recipes/icons/spectator-au.png b/recipes/icons/spectator-au.png index c5508d9675..43123dbb21 100644 Binary files a/recipes/icons/spectator-au.png and b/recipes/icons/spectator-au.png differ diff --git a/recipes/icons/spectator_magazine.png b/recipes/icons/spectator_magazine.png new file mode 100644 index 0000000000..4be902c85a Binary files /dev/null and b/recipes/icons/spectator_magazine.png differ diff --git a/recipes/icons/spektrum.png b/recipes/icons/spektrum.png index 346eddaef2..beebeae323 100644 Binary files a/recipes/icons/spektrum.png and b/recipes/icons/spektrum.png differ diff --git a/recipes/icons/spiegel_int.png b/recipes/icons/spiegel_int.png index 064c9b6d1e..d2df2be3c8 100644 Binary files a/recipes/icons/spiegel_int.png and b/recipes/icons/spiegel_int.png differ diff --git a/recipes/icons/spiegelde.png b/recipes/icons/spiegelde.png index 064c9b6d1e..d2df2be3c8 100644 Binary files a/recipes/icons/spiegelde.png and b/recipes/icons/spiegelde.png differ diff --git a/recipes/icons/spin_magazine.png b/recipes/icons/spin_magazine.png new file mode 100644 index 0000000000..703456057b Binary files /dev/null and b/recipes/icons/spin_magazine.png differ diff --git a/recipes/icons/sportowefakty.png b/recipes/icons/sportowefakty.png index 5ae7f5a34e..292b14dd79 100644 Binary files a/recipes/icons/sportowefakty.png and b/recipes/icons/sportowefakty.png differ diff --git a/recipes/icons/sports_illustrated.png b/recipes/icons/sports_illustrated.png new file mode 100644 index 0000000000..e68e494c78 Binary files /dev/null and b/recipes/icons/sports_illustrated.png differ diff --git a/recipes/icons/sportstar.png b/recipes/icons/sportstar.png new file mode 100644 index 0000000000..46dd068116 Binary files /dev/null and b/recipes/icons/sportstar.png differ diff --git a/recipes/icons/sporza_be.png b/recipes/icons/sporza_be.png new file mode 100644 index 0000000000..4d72dd8c1e Binary files /dev/null and b/recipes/icons/sporza_be.png differ diff --git a/recipes/icons/st_louis_post_dispatch.png b/recipes/icons/st_louis_post_dispatch.png index 71a474be70..744968a0c6 100644 Binary files a/recipes/icons/st_louis_post_dispatch.png and b/recipes/icons/st_louis_post_dispatch.png differ diff --git a/recipes/icons/stackoverflow.png b/recipes/icons/stackoverflow.png index 54400cbe4d..c23ad99d61 100644 Binary files a/recipes/icons/stackoverflow.png and b/recipes/icons/stackoverflow.png differ diff --git a/recipes/icons/stamgasten.png b/recipes/icons/stamgasten.png index f07e26d5ce..c89c651c9e 100644 Binary files a/recipes/icons/stamgasten.png and b/recipes/icons/stamgasten.png differ diff --git a/recipes/icons/standardmedia_ke.png b/recipes/icons/standardmedia_ke.png index d0152f574f..ad7bcf8f06 100644 Binary files a/recipes/icons/standardmedia_ke.png and b/recipes/icons/standardmedia_ke.png differ diff --git a/recipes/icons/standardmoney.png b/recipes/icons/standardmoney.png index e081d27fa4..fb33dc07c1 100644 Binary files a/recipes/icons/standardmoney.png and b/recipes/icons/standardmoney.png differ diff --git a/recipes/icons/star_gazetesi.png b/recipes/icons/star_gazetesi.png new file mode 100644 index 0000000000..a5d92eb058 Binary files /dev/null and b/recipes/icons/star_gazetesi.png differ diff --git a/recipes/icons/staradvertiser.png b/recipes/icons/staradvertiser.png index 1f1b5e5536..ce7ef10028 100644 Binary files a/recipes/icons/staradvertiser.png and b/recipes/icons/staradvertiser.png differ diff --git a/recipes/icons/starnberger_merkur.png b/recipes/icons/starnberger_merkur.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/starnberger_merkur.png and b/recipes/icons/starnberger_merkur.png differ diff --git a/recipes/icons/stars_and_stripes.png b/recipes/icons/stars_and_stripes.png new file mode 100644 index 0000000000..6b8db24297 Binary files /dev/null and b/recipes/icons/stars_and_stripes.png differ diff --git a/recipes/icons/starwars.png b/recipes/icons/starwars.png index 3d1f6dc6a7..f50730e6a4 100644 Binary files a/recipes/icons/starwars.png and b/recipes/icons/starwars.png differ diff --git a/recipes/icons/stiintasitehnica.png b/recipes/icons/stiintasitehnica.png index 7e1a55419a..d7ccca2ae9 100644 Binary files a/recipes/icons/stiintasitehnica.png and b/recipes/icons/stiintasitehnica.png differ diff --git a/recipes/icons/stnn.png b/recipes/icons/stnn.png new file mode 100644 index 0000000000..0e75b1ca3a Binary files /dev/null and b/recipes/icons/stnn.png differ diff --git a/recipes/icons/stopgame.png b/recipes/icons/stopgame.png index 5a52b84390..f4e971d769 100644 Binary files a/recipes/icons/stopgame.png and b/recipes/icons/stopgame.png differ diff --git a/recipes/icons/strange_horizons.png b/recipes/icons/strange_horizons.png new file mode 100644 index 0000000000..6588dffa38 Binary files /dev/null and b/recipes/icons/strange_horizons.png differ diff --git a/recipes/icons/strategy-business.png b/recipes/icons/strategy-business.png new file mode 100644 index 0000000000..cfef7ee96f Binary files /dev/null and b/recipes/icons/strategy-business.png differ diff --git a/recipes/icons/substack.png b/recipes/icons/substack.png new file mode 100644 index 0000000000..be19ec5161 Binary files /dev/null and b/recipes/icons/substack.png differ diff --git a/recipes/icons/sueddeutsche_mobil.png b/recipes/icons/sueddeutsche_mobil.png new file mode 100644 index 0000000000..ceb8b2d301 Binary files /dev/null and b/recipes/icons/sueddeutsche_mobil.png differ diff --git a/recipes/icons/sunday_times_magazine.png b/recipes/icons/sunday_times_magazine.png new file mode 100644 index 0000000000..1ecd570cd9 Binary files /dev/null and b/recipes/icons/sunday_times_magazine.png differ diff --git a/recipes/icons/superesportes.png b/recipes/icons/superesportes.png new file mode 100644 index 0000000000..88f9f1f4bd Binary files /dev/null and b/recipes/icons/superesportes.png differ diff --git a/recipes/icons/svt_nyheter.png b/recipes/icons/svt_nyheter.png new file mode 100644 index 0000000000..f328be5aa1 Binary files /dev/null and b/recipes/icons/svt_nyheter.png differ diff --git a/recipes/icons/swarajya.png b/recipes/icons/swarajya.png new file mode 100644 index 0000000000..04667283c5 Binary files /dev/null and b/recipes/icons/swarajya.png differ diff --git a/recipes/icons/swiat_obrazu.png b/recipes/icons/swiat_obrazu.png index 6a7621ca9a..e7cb89cb43 100644 Binary files a/recipes/icons/swiat_obrazu.png and b/recipes/icons/swiat_obrazu.png differ diff --git a/recipes/icons/syddjurslokalavisen_dk.png b/recipes/icons/syddjurslokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/syddjurslokalavisen_dk.png and b/recipes/icons/syddjurslokalavisen_dk.png differ diff --git a/recipes/icons/t3n_de.png b/recipes/icons/t3n_de.png new file mode 100644 index 0000000000..c48a112e4e Binary files /dev/null and b/recipes/icons/t3n_de.png differ diff --git a/recipes/icons/t_online.png b/recipes/icons/t_online.png new file mode 100644 index 0000000000..6b2bf8583a Binary files /dev/null and b/recipes/icons/t_online.png differ diff --git a/recipes/icons/tagesan.png b/recipes/icons/tagesan.png index 42ad290ee9..c857ae4c27 100644 Binary files a/recipes/icons/tagesan.png and b/recipes/icons/tagesan.png differ diff --git a/recipes/icons/tagespost.png b/recipes/icons/tagespost.png index ea4f4e6a12..0c66fa5f69 100644 Binary files a/recipes/icons/tagespost.png and b/recipes/icons/tagespost.png differ diff --git a/recipes/icons/taggeschau_de.png b/recipes/icons/taggeschau_de.png index 03e9fd3f03..1f785d0767 100644 Binary files a/recipes/icons/taggeschau_de.png and b/recipes/icons/taggeschau_de.png differ diff --git a/recipes/icons/taipei.png b/recipes/icons/taipei.png new file mode 100644 index 0000000000..8bdca073c8 Binary files /dev/null and b/recipes/icons/taipei.png differ diff --git a/recipes/icons/takiedela.png b/recipes/icons/takiedela.png index 43cc94cedc..0f799811d8 100644 Binary files a/recipes/icons/takiedela.png and b/recipes/icons/takiedela.png differ diff --git a/recipes/icons/tanea.png b/recipes/icons/tanea.png new file mode 100644 index 0000000000..fda49b5148 Binary files /dev/null and b/recipes/icons/tanea.png differ diff --git a/recipes/icons/tayga.png b/recipes/icons/tayga.png index 697fd26b6e..f7d9bd85b0 100644 Binary files a/recipes/icons/tayga.png and b/recipes/icons/tayga.png differ diff --git a/recipes/icons/taz.png b/recipes/icons/taz.png new file mode 100644 index 0000000000..30e4b25703 Binary files /dev/null and b/recipes/icons/taz.png differ diff --git a/recipes/icons/taz_rss.png b/recipes/icons/taz_rss.png index 86f9f6c04e..f8f496c39a 100644 Binary files a/recipes/icons/taz_rss.png and b/recipes/icons/taz_rss.png differ diff --git a/recipes/icons/tech_economy.png b/recipes/icons/tech_economy.png index 895f307c0a..b92f4cfb4a 100644 Binary files a/recipes/icons/tech_economy.png and b/recipes/icons/tech_economy.png differ diff --git a/recipes/icons/tech_world.png b/recipes/icons/tech_world.png index 0f2d739a08..48c7cb5fed 100644 Binary files a/recipes/icons/tech_world.png and b/recipes/icons/tech_world.png differ diff --git a/recipes/icons/techdirt.png b/recipes/icons/techdirt.png index f34040b1e3..034b0151d4 100644 Binary files a/recipes/icons/techdirt.png and b/recipes/icons/techdirt.png differ diff --git a/recipes/icons/technology_review_de.png b/recipes/icons/technology_review_de.png new file mode 100644 index 0000000000..163b186c46 Binary files /dev/null and b/recipes/icons/technology_review_de.png differ diff --git a/recipes/icons/techtarget.png b/recipes/icons/techtarget.png new file mode 100644 index 0000000000..4f00895e88 Binary files /dev/null and b/recipes/icons/techtarget.png differ diff --git a/recipes/icons/tedneward.png b/recipes/icons/tedneward.png new file mode 100644 index 0000000000..6ce465923e Binary files /dev/null and b/recipes/icons/tedneward.png differ diff --git a/recipes/icons/tegernseer_zeitung.png b/recipes/icons/tegernseer_zeitung.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/tegernseer_zeitung.png and b/recipes/icons/tegernseer_zeitung.png differ diff --git a/recipes/icons/telam.png b/recipes/icons/telam.png index befb1ed900..e99a01bbfb 100644 Binary files a/recipes/icons/telam.png and b/recipes/icons/telam.png differ diff --git a/recipes/icons/telegraph_in.png b/recipes/icons/telegraph_in.png index 73f5181ab8..9fc707204e 100644 Binary files a/recipes/icons/telegraph_in.png and b/recipes/icons/telegraph_in.png differ diff --git a/recipes/icons/thai_post_daily.png b/recipes/icons/thai_post_daily.png new file mode 100644 index 0000000000..cc807f67a2 Binary files /dev/null and b/recipes/icons/thai_post_daily.png differ diff --git a/recipes/icons/thairath.png b/recipes/icons/thairath.png new file mode 100644 index 0000000000..baf7a08654 Binary files /dev/null and b/recipes/icons/thairath.png differ diff --git a/recipes/icons/the_athletic.png b/recipes/icons/the_athletic.png new file mode 100644 index 0000000000..e5c22f2f0a Binary files /dev/null and b/recipes/icons/the_athletic.png differ diff --git a/recipes/icons/the_baffler.png b/recipes/icons/the_baffler.png new file mode 100644 index 0000000000..fed3a7534a Binary files /dev/null and b/recipes/icons/the_baffler.png differ diff --git a/recipes/icons/the_budget_fashionista.png b/recipes/icons/the_budget_fashionista.png new file mode 100644 index 0000000000..ea2da6e4a9 Binary files /dev/null and b/recipes/icons/the_budget_fashionista.png differ diff --git a/recipes/icons/the_clinic_online.png b/recipes/icons/the_clinic_online.png new file mode 100644 index 0000000000..978a4b63ac Binary files /dev/null and b/recipes/icons/the_clinic_online.png differ diff --git a/recipes/icons/the_conversation.png b/recipes/icons/the_conversation.png new file mode 100644 index 0000000000..59332a05c4 Binary files /dev/null and b/recipes/icons/the_conversation.png differ diff --git a/recipes/icons/the_daily_news_egypt.png b/recipes/icons/the_daily_news_egypt.png new file mode 100644 index 0000000000..b061b2b4c5 Binary files /dev/null and b/recipes/icons/the_daily_news_egypt.png differ diff --git a/recipes/icons/the_diplomat.png b/recipes/icons/the_diplomat.png new file mode 100644 index 0000000000..7a1c8fa8c0 Binary files /dev/null and b/recipes/icons/the_diplomat.png differ diff --git a/recipes/icons/the_feature.png b/recipes/icons/the_feature.png new file mode 100644 index 0000000000..f5c4f717a1 Binary files /dev/null and b/recipes/icons/the_feature.png differ diff --git a/recipes/icons/the_federalist.png b/recipes/icons/the_federalist.png new file mode 100644 index 0000000000..b4ad0541f2 Binary files /dev/null and b/recipes/icons/the_federalist.png differ diff --git a/recipes/icons/the_freeman.png b/recipes/icons/the_freeman.png new file mode 100644 index 0000000000..e30f4c41d4 Binary files /dev/null and b/recipes/icons/the_freeman.png differ diff --git a/recipes/icons/the_friday_times.png b/recipes/icons/the_friday_times.png new file mode 100644 index 0000000000..e2f74600c1 Binary files /dev/null and b/recipes/icons/the_friday_times.png differ diff --git a/recipes/icons/the_insider.png b/recipes/icons/the_insider.png index 78151d8f54..7b2b65ef41 100644 Binary files a/recipes/icons/the_insider.png and b/recipes/icons/the_insider.png differ diff --git a/recipes/icons/the_journal.png b/recipes/icons/the_journal.png new file mode 100644 index 0000000000..66c4ed83c2 Binary files /dev/null and b/recipes/icons/the_journal.png differ diff --git a/recipes/icons/the_manila_bulletin.png b/recipes/icons/the_manila_bulletin.png new file mode 100644 index 0000000000..0558362870 Binary files /dev/null and b/recipes/icons/the_manila_bulletin.png differ diff --git a/recipes/icons/the_manila_times.png b/recipes/icons/the_manila_times.png new file mode 100644 index 0000000000..6e52d7a7bc Binary files /dev/null and b/recipes/icons/the_manila_times.png differ diff --git a/recipes/icons/the_marker.png b/recipes/icons/the_marker.png new file mode 100644 index 0000000000..f87dc6395f Binary files /dev/null and b/recipes/icons/the_marker.png differ diff --git a/recipes/icons/the_new_republic.png b/recipes/icons/the_new_republic.png new file mode 100644 index 0000000000..dc756592a1 Binary files /dev/null and b/recipes/icons/the_new_republic.png differ diff --git a/recipes/icons/the_philippine_daily_inquirer.png b/recipes/icons/the_philippine_daily_inquirer.png new file mode 100644 index 0000000000..a9aadf4aeb Binary files /dev/null and b/recipes/icons/the_philippine_daily_inquirer.png differ diff --git a/recipes/icons/the_philippine_star.png b/recipes/icons/the_philippine_star.png new file mode 100644 index 0000000000..e30f4c41d4 Binary files /dev/null and b/recipes/icons/the_philippine_star.png differ diff --git a/recipes/icons/the_register.png b/recipes/icons/the_register.png new file mode 100644 index 0000000000..1a7a085c90 Binary files /dev/null and b/recipes/icons/the_register.png differ diff --git a/recipes/icons/the_saturday_paper.png b/recipes/icons/the_saturday_paper.png new file mode 100644 index 0000000000..2b77fbc603 Binary files /dev/null and b/recipes/icons/the_saturday_paper.png differ diff --git a/recipes/icons/the_scotsman.png b/recipes/icons/the_scotsman.png index 5eb732c0ac..19efd5ba99 100644 Binary files a/recipes/icons/the_scotsman.png and b/recipes/icons/the_scotsman.png differ diff --git a/recipes/icons/the_sun.png b/recipes/icons/the_sun.png new file mode 100644 index 0000000000..0326adf1a7 Binary files /dev/null and b/recipes/icons/the_sun.png differ diff --git a/recipes/icons/the_verge.png b/recipes/icons/the_verge.png new file mode 100644 index 0000000000..01db90aea7 Binary files /dev/null and b/recipes/icons/the_verge.png differ diff --git a/recipes/icons/the_week.png b/recipes/icons/the_week.png new file mode 100644 index 0000000000..41a07c2218 Binary files /dev/null and b/recipes/icons/the_week.png differ diff --git a/recipes/icons/the_week_magazine_free.png b/recipes/icons/the_week_magazine_free.png index 4fc029a27f..f8d3c9013f 100644 Binary files a/recipes/icons/the_week_magazine_free.png and b/recipes/icons/the_week_magazine_free.png differ diff --git a/recipes/icons/the_week_uk.png b/recipes/icons/the_week_uk.png new file mode 100644 index 0000000000..f8d3c9013f Binary files /dev/null and b/recipes/icons/the_week_uk.png differ diff --git a/recipes/icons/the_wire.png b/recipes/icons/the_wire.png new file mode 100644 index 0000000000..09c49b1462 Binary files /dev/null and b/recipes/icons/the_wire.png differ diff --git a/recipes/icons/thebell.png b/recipes/icons/thebell.png index 60597cb0ce..4b4b5a50cf 100644 Binary files a/recipes/icons/thebell.png and b/recipes/icons/thebell.png differ diff --git a/recipes/icons/thecodelesscode.png b/recipes/icons/thecodelesscode.png new file mode 100644 index 0000000000..6e40d1181f Binary files /dev/null and b/recipes/icons/thecodelesscode.png differ diff --git a/recipes/icons/thecultofghoul.png b/recipes/icons/thecultofghoul.png new file mode 100644 index 0000000000..04c29c68dd Binary files /dev/null and b/recipes/icons/thecultofghoul.png differ diff --git a/recipes/icons/thedailywtf.png b/recipes/icons/thedailywtf.png index 3f1ea4f752..195f0f9ebe 100644 Binary files a/recipes/icons/thedailywtf.png and b/recipes/icons/thedailywtf.png differ diff --git a/recipes/icons/thedgesingapore.png b/recipes/icons/thedgesingapore.png new file mode 100644 index 0000000000..cf4d2e7df1 Binary files /dev/null and b/recipes/icons/thedgesingapore.png differ diff --git a/recipes/icons/theecocolapse.png b/recipes/icons/theecocolapse.png index 31c20ee40f..185479eb59 100644 Binary files a/recipes/icons/theecocolapse.png and b/recipes/icons/theecocolapse.png differ diff --git a/recipes/icons/theeconomictimes_india_print_edition.png b/recipes/icons/theeconomictimes_india_print_edition.png new file mode 100644 index 0000000000..e3ee3b1a9a Binary files /dev/null and b/recipes/icons/theeconomictimes_india_print_edition.png differ diff --git a/recipes/icons/theindiaforum.png b/recipes/icons/theindiaforum.png new file mode 100644 index 0000000000..d8471a44cc Binary files /dev/null and b/recipes/icons/theindiaforum.png differ diff --git a/recipes/icons/theluminouslandscape.png b/recipes/icons/theluminouslandscape.png index d3ef6c38f7..a432e96a5d 100644 Binary files a/recipes/icons/theluminouslandscape.png and b/recipes/icons/theluminouslandscape.png differ diff --git a/recipes/icons/themorningpaper.png b/recipes/icons/themorningpaper.png new file mode 100644 index 0000000000..3103c10c9d Binary files /dev/null and b/recipes/icons/themorningpaper.png differ diff --git a/recipes/icons/thenewcriterion.png b/recipes/icons/thenewcriterion.png index 50449e8e9d..f739c6c0e2 100644 Binary files a/recipes/icons/thenewcriterion.png and b/recipes/icons/thenewcriterion.png differ diff --git a/recipes/icons/thenews.png b/recipes/icons/thenews.png new file mode 100644 index 0000000000..8fbd7f2145 Binary files /dev/null and b/recipes/icons/thenews.png differ diff --git a/recipes/icons/theoldfoodie.png b/recipes/icons/theoldfoodie.png index c71208762b..99426c2d4e 100644 Binary files a/recipes/icons/theoldfoodie.png and b/recipes/icons/theoldfoodie.png differ diff --git a/recipes/icons/theprint.png b/recipes/icons/theprint.png new file mode 100644 index 0000000000..487e082771 Binary files /dev/null and b/recipes/icons/theprint.png differ diff --git a/recipes/icons/thewest_au.png b/recipes/icons/thewest_au.png index 39448f0a59..e66164eb97 100644 Binary files a/recipes/icons/thewest_au.png and b/recipes/icons/thewest_au.png differ diff --git a/recipes/icons/think_progress.png b/recipes/icons/think_progress.png index 27099d84f3..33f0b5def1 100644 Binary files a/recipes/icons/think_progress.png and b/recipes/icons/think_progress.png differ diff --git a/recipes/icons/thn.png b/recipes/icons/thn.png new file mode 100644 index 0000000000..5fb28fed0d Binary files /dev/null and b/recipes/icons/thn.png differ diff --git a/recipes/icons/tijolaco.png b/recipes/icons/tijolaco.png new file mode 100644 index 0000000000..616114c008 Binary files /dev/null and b/recipes/icons/tijolaco.png differ diff --git a/recipes/icons/tillsonburg.png b/recipes/icons/tillsonburg.png new file mode 100644 index 0000000000..053bb28bdb Binary files /dev/null and b/recipes/icons/tillsonburg.png differ diff --git a/recipes/icons/times_of_malta.png b/recipes/icons/times_of_malta.png index 4fcb617c67..c3c78648c3 100644 Binary files a/recipes/icons/times_of_malta.png and b/recipes/icons/times_of_malta.png differ diff --git a/recipes/icons/tjournal.png b/recipes/icons/tjournal.png index 48c652575f..0e7ab53dea 100644 Binary files a/recipes/icons/tjournal.png and b/recipes/icons/tjournal.png differ diff --git a/recipes/icons/tmz.png b/recipes/icons/tmz.png index 83f7cd11fd..f2de76e9c5 100644 Binary files a/recipes/icons/tmz.png and b/recipes/icons/tmz.png differ diff --git a/recipes/icons/tnxm.png b/recipes/icons/tnxm.png index 5ea3190485..527d9e55df 100644 Binary files a/recipes/icons/tnxm.png and b/recipes/icons/tnxm.png differ diff --git a/recipes/icons/today_online.png b/recipes/icons/today_online.png new file mode 100644 index 0000000000..165241c5d6 Binary files /dev/null and b/recipes/icons/today_online.png differ diff --git a/recipes/icons/toi.png b/recipes/icons/toi.png index c7f427aafc..26da33fff6 100644 Binary files a/recipes/icons/toi.png and b/recipes/icons/toi.png differ diff --git a/recipes/icons/toiprint.png b/recipes/icons/toiprint.png new file mode 100644 index 0000000000..26da33fff6 Binary files /dev/null and b/recipes/icons/toiprint.png differ diff --git a/recipes/icons/tolzer_kurier.png b/recipes/icons/tolzer_kurier.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/tolzer_kurier.png and b/recipes/icons/tolzer_kurier.png differ diff --git a/recipes/icons/tomshardware_it.png b/recipes/icons/tomshardware_it.png new file mode 100644 index 0000000000..ad467b86d3 Binary files /dev/null and b/recipes/icons/tomshardware_it.png differ diff --git a/recipes/icons/tovima.png b/recipes/icons/tovima.png index 686a86ed9f..b8c608b78b 100644 Binary files a/recipes/icons/tovima.png and b/recipes/icons/tovima.png differ diff --git a/recipes/icons/toyokeizai.png b/recipes/icons/toyokeizai.png new file mode 100644 index 0000000000..1caad7a3ed Binary files /dev/null and b/recipes/icons/toyokeizai.png differ diff --git a/recipes/icons/tri_city_herald.png b/recipes/icons/tri_city_herald.png new file mode 100644 index 0000000000..711911cc67 Binary files /dev/null and b/recipes/icons/tri_city_herald.png differ diff --git a/recipes/icons/trombon.png b/recipes/icons/trombon.png index 7516bbbfa7..a4531dd419 100644 Binary files a/recipes/icons/trombon.png and b/recipes/icons/trombon.png differ diff --git a/recipes/icons/tsn.png b/recipes/icons/tsn.png index bf7a3cbbeb..579936675f 100644 Binary files a/recipes/icons/tsn.png and b/recipes/icons/tsn.png differ diff --git a/recipes/icons/tst.png b/recipes/icons/tst.png index 42becd7397..48433cd9e8 100644 Binary files a/recipes/icons/tst.png and b/recipes/icons/tst.png differ diff --git a/recipes/icons/tulsaworld.png b/recipes/icons/tulsaworld.png index 4b85d34972..7ef4c978f0 100644 Binary files a/recipes/icons/tulsaworld.png and b/recipes/icons/tulsaworld.png differ diff --git a/recipes/icons/tuttojove.png b/recipes/icons/tuttojove.png index 229ebd7cb3..fa95114bd2 100644 Binary files a/recipes/icons/tuttojove.png and b/recipes/icons/tuttojove.png differ diff --git a/recipes/icons/tuttosport.png b/recipes/icons/tuttosport.png index 1c84a1d49c..a4e71a3f74 100644 Binary files a/recipes/icons/tuttosport.png and b/recipes/icons/tuttosport.png differ diff --git a/recipes/icons/tv2lorry_dk.png b/recipes/icons/tv2lorry_dk.png index 1d6cc0ecb5..93fa196193 100644 Binary files a/recipes/icons/tv2lorry_dk.png and b/recipes/icons/tv2lorry_dk.png differ diff --git a/recipes/icons/tv2nord_dk.png b/recipes/icons/tv2nord_dk.png index 2637e6cc17..7c57b0df6c 100644 Binary files a/recipes/icons/tv2nord_dk.png and b/recipes/icons/tv2nord_dk.png differ diff --git a/recipes/icons/tveast_dk.png b/recipes/icons/tveast_dk.png index ca09b44e18..ffd2a8dfce 100644 Binary files a/recipes/icons/tveast_dk.png and b/recipes/icons/tveast_dk.png differ diff --git a/recipes/icons/tvmania.png b/recipes/icons/tvmania.png index ef08f5db0b..609704e853 100644 Binary files a/recipes/icons/tvmania.png and b/recipes/icons/tvmania.png differ diff --git a/recipes/icons/tvmidtvest_dk.png b/recipes/icons/tvmidtvest_dk.png index 87aeba5cb0..7796d779ad 100644 Binary files a/recipes/icons/tvmidtvest_dk.png and b/recipes/icons/tvmidtvest_dk.png differ diff --git a/recipes/icons/tvsyd_dk.png b/recipes/icons/tvsyd_dk.png index e79b68bf55..8ce68fd962 100644 Binary files a/recipes/icons/tvsyd_dk.png and b/recipes/icons/tvsyd_dk.png differ diff --git a/recipes/icons/tvxs.png b/recipes/icons/tvxs.png index 1c40af0a17..6512af75b5 100644 Binary files a/recipes/icons/tvxs.png and b/recipes/icons/tvxs.png differ diff --git a/recipes/icons/tweakers.png b/recipes/icons/tweakers.png index 352f05364c..0915f47883 100644 Binary files a/recipes/icons/tweakers.png and b/recipes/icons/tweakers.png differ diff --git a/recipes/icons/tweakers_net.png b/recipes/icons/tweakers_net.png index 03a25abb92..3f22ec6f2c 100644 Binary files a/recipes/icons/tweakers_net.png and b/recipes/icons/tweakers_net.png differ diff --git a/recipes/icons/tyzden.png b/recipes/icons/tyzden.png new file mode 100644 index 0000000000..a0f01d560b Binary files /dev/null and b/recipes/icons/tyzden.png differ diff --git a/recipes/icons/ua_fooball.png b/recipes/icons/ua_fooball.png new file mode 100644 index 0000000000..4e8b806c97 Binary files /dev/null and b/recipes/icons/ua_fooball.png differ diff --git a/recipes/icons/ubuntu_pl.png b/recipes/icons/ubuntu_pl.png index 5685eddd1c..9fb99753ef 100644 Binary files a/recipes/icons/ubuntu_pl.png and b/recipes/icons/ubuntu_pl.png differ diff --git a/recipes/icons/ukraiyns_tizhdien.png b/recipes/icons/ukraiyns_tizhdien.png index c3ad8d253f..2c6d8118a6 100644 Binary files a/recipes/icons/ukraiyns_tizhdien.png and b/recipes/icons/ukraiyns_tizhdien.png differ diff --git a/recipes/icons/ukrinform_de.png b/recipes/icons/ukrinform_de.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_de.png and b/recipes/icons/ukrinform_de.png differ diff --git a/recipes/icons/ukrinform_en.png b/recipes/icons/ukrinform_en.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_en.png and b/recipes/icons/ukrinform_en.png differ diff --git a/recipes/icons/ukrinform_es.png b/recipes/icons/ukrinform_es.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_es.png and b/recipes/icons/ukrinform_es.png differ diff --git a/recipes/icons/ukrinform_fr.png b/recipes/icons/ukrinform_fr.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_fr.png and b/recipes/icons/ukrinform_fr.png differ diff --git a/recipes/icons/ukrinform_ja.png b/recipes/icons/ukrinform_ja.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_ja.png and b/recipes/icons/ukrinform_ja.png differ diff --git a/recipes/icons/ukrinform_pl.png b/recipes/icons/ukrinform_pl.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_pl.png and b/recipes/icons/ukrinform_pl.png differ diff --git a/recipes/icons/ukrinform_ru.png b/recipes/icons/ukrinform_ru.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_ru.png and b/recipes/icons/ukrinform_ru.png differ diff --git a/recipes/icons/ukrinform_uk.png b/recipes/icons/ukrinform_uk.png index d23dac533c..02b86556f5 100644 Binary files a/recipes/icons/ukrinform_uk.png and b/recipes/icons/ukrinform_uk.png differ diff --git a/recipes/icons/unian_net.png b/recipes/icons/unian_net.png index 8421cf4959..0d550ba7a1 100644 Binary files a/recipes/icons/unian_net.png and b/recipes/icons/unian_net.png differ diff --git a/recipes/icons/unian_net_en.png b/recipes/icons/unian_net_en.png new file mode 100644 index 0000000000..0d550ba7a1 Binary files /dev/null and b/recipes/icons/unian_net_en.png differ diff --git a/recipes/icons/unian_net_ua.png b/recipes/icons/unian_net_ua.png new file mode 100644 index 0000000000..0d550ba7a1 Binary files /dev/null and b/recipes/icons/unian_net_ua.png differ diff --git a/recipes/icons/uninohimitu.png b/recipes/icons/uninohimitu.png index 31fcafd2cb..ac8786774a 100644 Binary files a/recipes/icons/uninohimitu.png and b/recipes/icons/uninohimitu.png differ diff --git a/recipes/icons/united_daily.png b/recipes/icons/united_daily.png index 65d25d7fc4..90598e490d 100644 Binary files a/recipes/icons/united_daily.png and b/recipes/icons/united_daily.png differ diff --git a/recipes/icons/universe_today.png b/recipes/icons/universe_today.png index 0c26d3da5a..12afccaa13 100644 Binary files a/recipes/icons/universe_today.png and b/recipes/icons/universe_today.png differ diff --git a/recipes/icons/unn_ru.png b/recipes/icons/unn_ru.png index f937eafa66..3b7d05b1f9 100644 Binary files a/recipes/icons/unn_ru.png and b/recipes/icons/unn_ru.png differ diff --git a/recipes/icons/unn_ua.png b/recipes/icons/unn_ua.png index f937eafa66..3b7d05b1f9 100644 Binary files a/recipes/icons/unn_ua.png and b/recipes/icons/unn_ua.png differ diff --git a/recipes/icons/unperiodico.png b/recipes/icons/unperiodico.png new file mode 100644 index 0000000000..b7dca0d57a Binary files /dev/null and b/recipes/icons/unperiodico.png differ diff --git a/recipes/icons/upi.png b/recipes/icons/upi.png new file mode 100644 index 0000000000..a374e21688 Binary files /dev/null and b/recipes/icons/upi.png differ diff --git a/recipes/icons/usatoday.png b/recipes/icons/usatoday.png new file mode 100644 index 0000000000..19d565aa02 Binary files /dev/null and b/recipes/icons/usatoday.png differ diff --git a/recipes/icons/valbybladet_dk.png b/recipes/icons/valbybladet_dk.png new file mode 100644 index 0000000000..6fc915ca3d Binary files /dev/null and b/recipes/icons/valbybladet_dk.png differ diff --git a/recipes/icons/vancouver_province.png b/recipes/icons/vancouver_province.png new file mode 100644 index 0000000000..a1dfec6c37 Binary files /dev/null and b/recipes/icons/vancouver_province.png differ diff --git a/recipes/icons/vancouver_sun.png b/recipes/icons/vancouver_sun.png new file mode 100644 index 0000000000..ad06b7c682 Binary files /dev/null and b/recipes/icons/vancouver_sun.png differ diff --git a/recipes/icons/vanloesebladet_dk.png b/recipes/icons/vanloesebladet_dk.png new file mode 100644 index 0000000000..6fc915ca3d Binary files /dev/null and b/recipes/icons/vanloesebladet_dk.png differ diff --git a/recipes/icons/vardelokalavisen_dk.png b/recipes/icons/vardelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/vardelokalavisen_dk.png and b/recipes/icons/vardelokalavisen_dk.png differ diff --git a/recipes/icons/variety.png b/recipes/icons/variety.png index 0475852f39..39a815ce93 100644 Binary files a/recipes/icons/variety.png and b/recipes/icons/variety.png differ diff --git a/recipes/icons/vedomosti.png b/recipes/icons/vedomosti.png index 4d044e7653..d960c465c7 100644 Binary files a/recipes/icons/vedomosti.png and b/recipes/icons/vedomosti.png differ diff --git a/recipes/icons/veintitres.png b/recipes/icons/veintitres.png new file mode 100644 index 0000000000..9afc903d94 Binary files /dev/null and b/recipes/icons/veintitres.png differ diff --git a/recipes/icons/vejlelokalavisen_dk.png b/recipes/icons/vejlelokalavisen_dk.png index 6329cfd130..70464fce89 100644 Binary files a/recipes/icons/vejlelokalavisen_dk.png and b/recipes/icons/vejlelokalavisen_dk.png differ diff --git a/recipes/icons/version2.png b/recipes/icons/version2.png index cafd2e1db5..758dec6003 100644 Binary files a/recipes/icons/version2.png and b/recipes/icons/version2.png differ diff --git a/recipes/icons/verstka.png b/recipes/icons/verstka.png index 97a565df50..c206a2002e 100644 Binary files a/recipes/icons/verstka.png and b/recipes/icons/verstka.png differ diff --git a/recipes/icons/vesterbrobladet_dk.png b/recipes/icons/vesterbrobladet_dk.png new file mode 100644 index 0000000000..6fc915ca3d Binary files /dev/null and b/recipes/icons/vesterbrobladet_dk.png differ diff --git a/recipes/icons/vfr_magazine.png b/recipes/icons/vfr_magazine.png index 8ca1529bdc..aa7a668a50 100644 Binary files a/recipes/icons/vfr_magazine.png and b/recipes/icons/vfr_magazine.png differ diff --git a/recipes/icons/vic_times.png b/recipes/icons/vic_times.png new file mode 100644 index 0000000000..b72d9c0173 Binary files /dev/null and b/recipes/icons/vic_times.png differ diff --git a/recipes/icons/vice.png b/recipes/icons/vice.png index 749d79b6c0..06ab0b8a77 100644 Binary files a/recipes/icons/vice.png and b/recipes/icons/vice.png differ diff --git a/recipes/icons/vijesti.png b/recipes/icons/vijesti.png index 938d0419e1..be3da20413 100644 Binary files a/recipes/icons/vijesti.png and b/recipes/icons/vijesti.png differ diff --git a/recipes/icons/vikna_ru.png b/recipes/icons/vikna_ru.png index a70191fd77..a0252a0435 100644 Binary files a/recipes/icons/vikna_ru.png and b/recipes/icons/vikna_ru.png differ diff --git a/recipes/icons/vikna_ua.png b/recipes/icons/vikna_ua.png index a70191fd77..a0252a0435 100644 Binary files a/recipes/icons/vikna_ua.png and b/recipes/icons/vikna_ua.png differ diff --git a/recipes/icons/villagevoice.png b/recipes/icons/villagevoice.png new file mode 100644 index 0000000000..e1a5c38d36 Binary files /dev/null and b/recipes/icons/villagevoice.png differ diff --git a/recipes/icons/vio_mundo.png b/recipes/icons/vio_mundo.png index 303fa6302e..dd3e5c9072 100644 Binary files a/recipes/icons/vio_mundo.png and b/recipes/icons/vio_mundo.png differ diff --git a/recipes/icons/virtualshackles.png b/recipes/icons/virtualshackles.png index 0df72fdf28..b8cee80363 100644 Binary files a/recipes/icons/virtualshackles.png and b/recipes/icons/virtualshackles.png differ diff --git a/recipes/icons/vitalia.png b/recipes/icons/vitalia.png index 9cfa1ef63d..37d3bd2ca1 100644 Binary files a/recipes/icons/vitalia.png and b/recipes/icons/vitalia.png differ diff --git a/recipes/icons/vnexpress.png b/recipes/icons/vnexpress.png new file mode 100644 index 0000000000..8fbaf88371 Binary files /dev/null and b/recipes/icons/vnexpress.png differ diff --git a/recipes/icons/voetbal_belgie.png b/recipes/icons/voetbal_belgie.png new file mode 100644 index 0000000000..4715132f25 Binary files /dev/null and b/recipes/icons/voetbal_belgie.png differ diff --git a/recipes/icons/voice_of_america.png b/recipes/icons/voice_of_america.png index 6013646336..18e35f0395 100644 Binary files a/recipes/icons/voice_of_america.png and b/recipes/icons/voice_of_america.png differ diff --git a/recipes/icons/vreme.png b/recipes/icons/vreme.png index 385ceda4e6..756ddf54da 100644 Binary files a/recipes/icons/vreme.png and b/recipes/icons/vreme.png differ diff --git a/recipes/icons/vrijnederland.png b/recipes/icons/vrijnederland.png index 41806323a6..6fe2f9649f 100644 Binary files a/recipes/icons/vrijnederland.png and b/recipes/icons/vrijnederland.png differ diff --git a/recipes/icons/walla.png b/recipes/icons/walla.png index 0fd6025a13..2c83e2f2dd 100644 Binary files a/recipes/icons/walla.png and b/recipes/icons/walla.png differ diff --git a/recipes/icons/warentest.png b/recipes/icons/warentest.png index 22f1b791a6..893aad03ad 100644 Binary files a/recipes/icons/warentest.png and b/recipes/icons/warentest.png differ diff --git a/recipes/icons/wash_post_print.png b/recipes/icons/wash_post_print.png new file mode 100644 index 0000000000..a2b47a8249 Binary files /dev/null and b/recipes/icons/wash_post_print.png differ diff --git a/recipes/icons/weilheimer_tagblatt.png b/recipes/icons/weilheimer_tagblatt.png index c81f477f08..986f67a35c 100644 Binary files a/recipes/icons/weilheimer_tagblatt.png and b/recipes/icons/weilheimer_tagblatt.png differ diff --git a/recipes/icons/welt.png b/recipes/icons/welt.png index 8fdef53330..afd2d7233c 100644 Binary files a/recipes/icons/welt.png and b/recipes/icons/welt.png differ diff --git a/recipes/icons/winsupersite.png b/recipes/icons/winsupersite.png index 65b496a163..d4c5442770 100644 Binary files a/recipes/icons/winsupersite.png and b/recipes/icons/winsupersite.png differ diff --git a/recipes/icons/wirtscafts_woche.png b/recipes/icons/wirtscafts_woche.png index be800b7aeb..b8fa0c7878 100644 Binary files a/recipes/icons/wirtscafts_woche.png and b/recipes/icons/wirtscafts_woche.png differ diff --git a/recipes/icons/wonderzine.png b/recipes/icons/wonderzine.png index d520f25f52..4e6bd3409e 100644 Binary files a/recipes/icons/wonderzine.png and b/recipes/icons/wonderzine.png differ diff --git a/recipes/icons/workers_world.png b/recipes/icons/workers_world.png index 48b7d97e1b..25cc936a50 100644 Binary files a/recipes/icons/workers_world.png and b/recipes/icons/workers_world.png differ diff --git a/recipes/icons/wsj_free.png b/recipes/icons/wsj_free.png new file mode 100644 index 0000000000..5fb5496c5a Binary files /dev/null and b/recipes/icons/wsj_free.png differ diff --git a/recipes/icons/xkcd.png b/recipes/icons/xkcd.png new file mode 100644 index 0000000000..ca0cfe3d5c Binary files /dev/null and b/recipes/icons/xkcd.png differ diff --git a/recipes/icons/yahoo_news.png b/recipes/icons/yahoo_news.png index aa344b0198..4ac5329585 100644 Binary files a/recipes/icons/yahoo_news.png and b/recipes/icons/yahoo_news.png differ diff --git a/recipes/icons/yakima_herald.png b/recipes/icons/yakima_herald.png new file mode 100644 index 0000000000..7e8de676aa Binary files /dev/null and b/recipes/icons/yakima_herald.png differ diff --git a/recipes/icons/yalansavar.png b/recipes/icons/yalansavar.png index 97b2d47cc5..aa8f88530e 100644 Binary files a/recipes/icons/yalansavar.png and b/recipes/icons/yalansavar.png differ diff --git a/recipes/icons/yementimes.png b/recipes/icons/yementimes.png new file mode 100644 index 0000000000..b74ecc420b Binary files /dev/null and b/recipes/icons/yementimes.png differ diff --git a/recipes/icons/yenisafak_gazetesi.png b/recipes/icons/yenisafak_gazetesi.png index 399f542ae5..258c8d7a31 100644 Binary files a/recipes/icons/yenisafak_gazetesi.png and b/recipes/icons/yenisafak_gazetesi.png differ diff --git a/recipes/icons/ynet.png b/recipes/icons/ynet.png new file mode 100644 index 0000000000..ae1ea1efb0 Binary files /dev/null and b/recipes/icons/ynet.png differ diff --git a/recipes/icons/yomiuri.png b/recipes/icons/yomiuri.png index 0d720916bb..a747d7df33 100644 Binary files a/recipes/icons/yomiuri.png and b/recipes/icons/yomiuri.png differ diff --git a/recipes/icons/yomiuri_world.png b/recipes/icons/yomiuri_world.png new file mode 100644 index 0000000000..2f1a37e3ff Binary files /dev/null and b/recipes/icons/yomiuri_world.png differ diff --git a/recipes/icons/zackzack.png b/recipes/icons/zackzack.png new file mode 100644 index 0000000000..33940edaea Binary files /dev/null and b/recipes/icons/zackzack.png differ diff --git a/recipes/icons/zaman.png b/recipes/icons/zaman.png index bebb3e29b0..11a741f2fa 100644 Binary files a/recipes/icons/zaman.png and b/recipes/icons/zaman.png differ diff --git a/recipes/icons/zaobao.png b/recipes/icons/zaobao.png new file mode 100644 index 0000000000..0db21d760c Binary files /dev/null and b/recipes/icons/zaobao.png differ diff --git a/recipes/icons/zaufana_trzecia_strona.png b/recipes/icons/zaufana_trzecia_strona.png index 6c38f65320..dda45170e7 100644 Binary files a/recipes/icons/zaufana_trzecia_strona.png and b/recipes/icons/zaufana_trzecia_strona.png differ diff --git a/recipes/icons/zaxid_net.png b/recipes/icons/zaxid_net.png index 210a418b46..52f499e857 100644 Binary files a/recipes/icons/zaxid_net.png and b/recipes/icons/zaxid_net.png differ diff --git a/recipes/icons/zdnet.fr.png b/recipes/icons/zdnet.fr.png index 2f5623fc10..3746b2e034 100644 Binary files a/recipes/icons/zdnet.fr.png and b/recipes/icons/zdnet.fr.png differ diff --git a/recipes/icons/zdnet.png b/recipes/icons/zdnet.png index f80c5a3801..1a8038fd00 100644 Binary files a/recipes/icons/zdnet.png and b/recipes/icons/zdnet.png differ diff --git a/recipes/icons/zeitde_sub.png b/recipes/icons/zeitde_sub.png new file mode 100644 index 0000000000..a2d858d29b Binary files /dev/null and b/recipes/icons/zeitde_sub.png differ diff --git a/recipes/icons/zerocalcare.png b/recipes/icons/zerocalcare.png new file mode 100644 index 0000000000..983a91c3ae Binary files /dev/null and b/recipes/icons/zerocalcare.png differ diff --git a/recipes/icons/zerohedge.png b/recipes/icons/zerohedge.png index e06ad0632e..4f5852c5c4 100644 Binary files a/recipes/icons/zerohedge.png and b/recipes/icons/zerohedge.png differ diff --git a/recipes/icons/zita_be.png b/recipes/icons/zita_be.png new file mode 100644 index 0000000000..d3a9af6829 Binary files /dev/null and b/recipes/icons/zita_be.png differ diff --git a/recipes/icons/zn_ru.png b/recipes/icons/zn_ru.png index 0f8c8b7485..4cdd3f3a9b 100644 Binary files a/recipes/icons/zn_ru.png and b/recipes/icons/zn_ru.png differ diff --git a/recipes/icons/zn_ua.png b/recipes/icons/zn_ua.png index 0f8c8b7485..4cdd3f3a9b 100644 Binary files a/recipes/icons/zn_ua.png and b/recipes/icons/zn_ua.png differ diff --git a/recipes/icons/znadplanszy_pl.png b/recipes/icons/znadplanszy_pl.png new file mode 100644 index 0000000000..693e90250f Binary files /dev/null and b/recipes/icons/znadplanszy_pl.png differ diff --git a/recipes/icons/zougla.png b/recipes/icons/zougla.png new file mode 100644 index 0000000000..95461dd865 Binary files /dev/null and b/recipes/icons/zougla.png differ diff --git a/recipes/ifzm.recipe b/recipes/ifzm.recipe index 1ad2145878..9d46bd6b84 100644 --- a/recipes/ifzm.recipe +++ b/recipes/ifzm.recipe @@ -1,56 +1,60 @@ from calibre.web.feeds.news import BasicNewsRecipe +import json + +def json_to_html(raw, link): + data = json.loads(raw) + data = data['data']['content'] + title = '

    '.format(link) + data['subject'] + '

    ' + auth = '

    ' + data['author'] + '

    ' + sub = '

    ' + data['introtext'] + '

    ' + body = data['fulltext'] + return '
    ' + title + auth + sub + body + '
    ' -class AdvancedUserRecipe1277305250(BasicNewsRecipe): - title = u'infzm - China Southern Weekly' - oldest_article = 14 - max_articles_per_feed = 100 - - feeds = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'), - (u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', - u'http://www.infzm.com/rss/economic.xml'), - (u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', - u'http://www.infzm.com/rss/culture.xml'), - (u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', - u'http://www.infzm.com/rss/lifestyle.xml'), - (u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', - u'http://www.infzm.com/rss/opinion.xml') - ] - __author__ = 'rty' - __version__ = '1.0' +class infzm(BasicNewsRecipe): + title = '南方周末' + __author__ = 'unkn0wn' + description = 'Southern Weekly (infzm.com), founded in 1984, is a Chinese weekly newspaper based in Guangzhou. Download Weekly.' language = 'zh' - publisher = 'http://www.infzm.com' - description = 'Chinese Weekly Tabloid' - category = 'News, China' - remove_javascript = True - use_embedded_content = False + encoding = 'utf-8' no_stylesheets = True - encoding = 'UTF-8' - conversion_options = {'linearize_tables': True} - masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg' + remove_javascript = True + ignore_duplicate_articles = {'title'} + remove_empty_feeds = True + use_embedded_content = False + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'http://ssimg.kkod.cn/web/02/14227.gif' + + articles_are_obfuscated = True + + remove_tags = [dict(name=['video', 'svg', 'button'])] + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True)['href'].split('?')[0] + res_link = link.replace('https://www.infzm.com', 'https://api.infzm.com/mobile') \ + + '?platform=wap&version=1.89.0&machine_id=35458aa29603f2b246636e5492122b50&user_id=&token=&member_type=' + # if article is paywalled, add code to figure out machine_id + raw = br.open(res_link).read() + html = json_to_html(raw, link) + return ({ 'data': html, 'url': link }) extra_css = ''' - @font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n - body { - margin-right: 8pt; - font-family: 'DroidFont', serif;} - .detailContent {font-family: 'DroidFont', serif, sans-serif} - ''' + img {display:block; margin:0 auto;} + .cm_pic_caption, .cm_pic_author { font-size:small; text-align:center; } + ''' - keep_only_tags = [ - dict(name='div', attrs={'id': 'detailContent'}), - ] - remove_tags = [ - dict(name='div', attrs={ - 'id': ['detailTools', 'detailSideL', 'pageNum']}), - ] - remove_tags_after = [ - dict(name='div', attrs={'id': 'pageNum'}), + feeds = [ + ('南方周末', 'https://news.google.com/rss/search?q=when:170h+allinurl:https%3A%2F%2Fwww.infzm.com&hl=zh-HK&gl=HK&ceid=HK:zh') ] - def preprocess_html(self, soup): - for item in soup.findAll(color=True): - del item['font'] - for item in soup.findAll(style=True): - del item['style'] - return soup + def populate_article_metadata(self, article, soup, first): + article.title = article.title.replace(' - 南方周末', '') + article.url = soup.find('h1')['title'] + article.summary = self.tag_to_string(soup.find(attrs={'class':'intro'})) + article.text_summary = self.tag_to_string(soup.find(attrs={'class':'intro'})) diff --git a/recipes/il_post.recipe b/recipes/il_post.recipe index c7b3c063e8..c862ca88d7 100644 --- a/recipes/il_post.recipe +++ b/recipes/il_post.recipe @@ -10,14 +10,16 @@ from __future__ import absolute_import, division, print_function, unicode_literals from calibre.web.feeds.news import BasicNewsRecipe -from calibre.utils.magick import Image +from datetime import date, timedelta + +dates = [ date.today().strftime('%Y/%m/%d'), (date.today() - timedelta(1)).strftime('%Y/%m/%d') ] # ----------- CUSTOMIZATION OPTIONS START ----------- # Comment (add # in front) to disable the sections you are not interested in # Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare sections = [ - ("Prima Pagina", "https://www.ilpost.it/prime-pagine"), + ("Italia", "https://www.ilpost.it/italia/"), ("Mondo", "https://www.ilpost.it/mondo/"), ("Politica", "https://www.ilpost.it/politica/"), ("Tecnologia", "https://www.ilpost.it/tecnologia/"), @@ -33,16 +35,11 @@ sections = [ ("Konrad", "https://www.ilpost.it/europa/"), ] -# Change this to True if you want grey images -convert_to_grayscale = False - # ----------- CUSTOMIZATION OPTIONS OVER ----------- -prefixes = {"Permalink to", "Commenta", "Link all'articolo"} - class IlPost(BasicNewsRecipe): - __author__ = 'Marco Scirea' + __author__ = 'Marco Scirea, unkn0wn' __license__ = 'GPL v3' __copyright__ = '2019, Marco Scirea ' @@ -54,59 +51,42 @@ class IlPost(BasicNewsRecipe): ' la ricetta puo\' essere configurata per tenerle a colori' ) tags = "news" - cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg" + masthead_url = 'https://www.ilpost.it/error/images/ilpost.svg' ignore_duplicate_articles = {"title", "url"} no_stylesheets = True - keep_only_tags = [dict(id=["expanding", "singleBody"])] + extra_css = ' .wp-caption-text { font-size:small; } ' + keep_only_tags = [dict(name='main', attrs={'id':lambda x: x and x.startswith('index_main-content__')})] + remove_tags_before = [dict(name='article')] + remove_tags_after = [dict(name='article')] + remove_tags = [ + dict(attrs={'class':lambda x: x and x.startswith( + ('index_actions__', 'index_il-post-comments___', 'index_art_tag__') + )}), + dict(attrs={'id':'audioPlayerArticle'}) + ] def parse_page(self, name, url): self.log.debug(url) soup = self.index_to_soup(url) entries = [] for article in soup.findAll('article'): - for link in article.findAll('a', href=True, title=True): - if not link["href"].startswith("https://www.ilpost.it/20"): + for link in article.findAll('a', href=True): + if not any(x in link['href'] for x in dates): continue - title = link["title"] - for prefix in prefixes: - if title.startswith(prefix): - title = title.lstrip(prefix) - break - title = title.strip() + title = self.tag_to_string(link.h2) + desc = self.tag_to_string(link.p) + if not title: + continue + self.log('\t', title) entries.append({ "url": link["href"], "title": title, + "description": desc }) return (name, entries) - def populate_article_metadata(self, article, soup, first): - description = soup.find(attrs={"name": "description"}) - article.summary = description[ - "content"] if description else "No meta description given" - article.text_summary = description[ - "content"] if description else "No meta description given" - def parse_index(self): feeds = [] for section in sections: feeds.append(self.parse_page(section[0], section[1])) return feeds - - if convert_to_grayscale: - # Image conversion to greyscale by Starson17 - # https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15 - def postprocess_html(self, soup, first): - # process all the images - for tag in soup.findAll('img', src=True): - iurl = tag['src'] - img = Image() - img.open(iurl) - img.type = "GrayscaleType" - img.save(iurl) - return soup - - def preprocess_html(self, soup): - galleryItems = soup.findAll("figure", {"class": "gallery-item"}) - if galleryItems: - self.abort_article() - return soup diff --git a/recipes/inc42.recipe b/recipes/inc42.recipe new file mode 100644 index 0000000000..5aaa10ddb3 --- /dev/null +++ b/recipes/inc42.recipe @@ -0,0 +1,65 @@ +from calibre.ptempfile import PersistentTemporaryFile +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class inc42(BasicNewsRecipe): + title = 'Inc42' + __author__ = 'unkn0wn' + description = 'Inc42 is India’s largest tech media platform working with the mission to accelerate the GDP of India’s tech & startup economy.' + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en_IN' + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://omcdn.inc42.com/users/d0ffd8ffa0d2/images/4477fc48bee71659696918-color-black-1-e1576150264134.png?width=224' + + keep_only_tags = [ + classes('entry-header entry-content'), + ] + + remove_tags = [ + dict(name='button'), + classes('also-read slick-list slides-three common-card'), + ] + + ignore_duplicate_articles = {'title'} + remove_empty_feeds = True + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + feeds = [] + + sections = [ + 'features', 'buzz', 'startups', 'resources' + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:inc42.com{}&hl=en-IN&gl=IN&ceid=IN:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Others', a.format(''))) + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe index 33dfa40716..33cd5c2daa 100644 --- a/recipes/india_today.recipe +++ b/recipes/india_today.recipe @@ -38,7 +38,7 @@ class IndiaToday(BasicNewsRecipe): ''' remove_tags = [ - classes('checkout__section sharing align-center-button amp-izooto-sub ads__container inline-story-add amp-ad'), + classes('checkout__section sharing align-center-button amp-izooto-sub ads__container inline-story-add amp-ad readmore__box'), dict(name=(('amp-web-push-widget', 'amp-ad'))), dict(attrs={'id':'tab-link-wrapper-plugin'}), dict(name='div', attrs={'amp-access':'NOT granted'}) @@ -63,11 +63,10 @@ class IndiaToday(BasicNewsRecipe): sections = {} date = soup.find(attrs={'class':lambda x: x and x.startswith('MagazineEdition_edition__date')}) - edition = soup.find(attrs={'class':lambda x: x and x.startswith('MagazineEdition_magazineprime')}) - self.timefmt =' (' + self.tag_to_string(edition) + ') [' + self.tag_to_string(date).strip() + ']' - p = edition.findNext('p') - if p: - self.description = self.tag_to_string(p).strip() + edition = soup.find(attrs={'class':'prime__magazine'}) + self.timefmt = '(' + self.tag_to_string(edition).strip() +') [' + self.tag_to_string(date).strip() + ']' + if p := edition.findNext('p'): + self.description = self.tag_to_string(p) self.log('Downloading Issue: ', self.timefmt) for tag in soup.findAll('div', attrs={'class': lambda x: x and 'NoCard_story__grid__' in x}): @@ -125,11 +124,5 @@ class IndiaToday(BasicNewsRecipe): quo.name = 'blockquote' return soup - def populate_article_metadata(self, article, soup, first): - if first and hasattr(self, 'add_toc_thumbnail'): - image = soup.find('img', src=True, attrs={'class':'i-amphtml-fill-content'}) - if image is not None: - self.add_toc_thumbnail(article, image['src']) - def print_version(self, url): return url.replace('.in/','.in/amp/') diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index e9e3620c6b..5a97429daa 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -1,4 +1,6 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes +from datetime import datetime, timedelta +from calibre.utils.date import parse_date class IndianExpress(BasicNewsRecipe): @@ -15,12 +17,10 @@ class IndianExpress(BasicNewsRecipe): ignore_duplicate_articles = {'url'} extra_css = ''' - #storycenterbyline {font-size:small;} - #img-cap {font-size:small;} - blockquote{color:#404040;} - em{font-style:italic; color:#202020;} - #sub-d{color:#202020; font-style:italic;} - .ie-authorbox{font-size:small;} + #img-cap, .ie-authorbox, .author-block, #storycenterbyline { font-size:small; } + blockquote { color:#404040; } + em, #sub-d { color:#202020; font-style:italic; } + img { display:block; margin:0 auto; } ''' resolve_internal_links = True @@ -33,15 +33,17 @@ class IndianExpress(BasicNewsRecipe): dict(name='img', attrs={'src':lambda x: x and x.endswith('-button-300-ie.jpeg')}), dict(name='a', attrs={'href':lambda x: x and x.endswith('/?utm_source=newbanner')}), classes( - 'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright' - ' storytags pdsc-related-modify news-guard premium-story append_social_share' - ' digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg' - 'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare' - ' custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec' + 'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright ' + 'storytags pdsc-related-modify news-guard premium-story append_social_share ie-int-campign-ad ' + 'digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg ' + 'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare osv-ad-class ' + 'custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec o-story-list subscriber_hide ' + 'author-social author-follow author-img premium_widget_below_article' ) ] def parse_index(self): + section_list = [ ('Front Page', 'https://indianexpress.com/print/front-page/'), ('India', 'https://indianexpress.com/section/india/'), @@ -49,12 +51,17 @@ class IndianExpress(BasicNewsRecipe): ('Delhi Confidential', 'https://indianexpress.com/section/delhi-confidential/'), ('Opinion', 'http://indianexpress.com/section/opinion/'), ('UPSC-CSE Key', 'https://indianexpress.com/section/upsc-current-affairs/'), + ('Explained', 'https://indianexpress.com/section/explained/'), ('Business', 'https://indianexpress.com/section/business/'), - ('Political Pulse', 'https://indianexpress.com/section/political-pulse/'), + # ('Political Pulse', 'https://indianexpress.com/section/political-pulse/'), ('Sunday Eye', 'https://indianexpress.com/section/express-sunday-eye/'), + ('World', 'https://indianexpress.com/section/world/'), # ('Education', 'https://indianexpress.com/section/education/'), # ('Gadgets', 'https://indianexpress.com/section/technology/gadgets/'), - # ('Tech Review', 'https://indianexpress.com/section/technology/tech-reviews/'), + ('Tech Review', 'https://indianexpress.com/section/technology/tech-reviews/'), + # ('Techhook', 'https://indianexpress.com/section/technology/techook/'), + # ('Laptops', 'https://indianexpress.com/section/technology/laptops/'), + # ('Mobiles & Tabs', 'https://indianexpress.com/section/technology/mobile-tabs/'), ('Science', 'https://indianexpress.com/section/technology/science/'), ('Movie Review', 'https://indianexpress.com/section/entertainment/movie-review/'), ] @@ -67,30 +74,40 @@ class IndianExpress(BasicNewsRecipe): section_url = section[1] self.log(section_title, section_url) soup = self.index_to_soup(section_url) - articles = self.articles_from_soup(soup) + if '/world/' in section_url or '/explained/' in section_url: + articles = self.articles_from_page(soup) + else: + articles = self.articles_from_soup(soup) if articles: feeds.append((section_title, articles)) return feeds + def articles_from_page(self, soup): + ans = [] + for div in soup.findAll(attrs={'class':['northeast-topbox', 'explained-section-grid']}): + for a in div.findAll('a', href=True): + if not a.find('img') and '/section/' not in a['href']: + url = a['href'] + title = self.tag_to_string(a) + self.log('\t', title, '\n\t\t', url) + ans.append({'title': title, 'url': url, 'description': ''}) + return ans + def articles_from_soup(self, soup): ans = [] div = soup.find('div', attrs={'class':['nation', 'o-opin']}) for art in div.findAll(attrs={'class':['articles', 'o-opin-article']}): for a in art.findAll('a', href=True): - if not a.find('img'): + if not a.find('img') and not ('/profile/' in a['href'] or '/agency/' in a['href']): url = a['href'] title = self.tag_to_string(a) desc = '' if p:= art.find('p'): desc = self.tag_to_string(p) if da := art.find('div', attrs={'class':['date', 'o-opin-date']}): - from datetime import datetime, timedelta - from calibre.utils.date import parse_date - d = parse_date(self.tag_to_string(da)).replace(tzinfo=None) + date = parse_date(self.tag_to_string(da)).replace(tzinfo=None) today = datetime.now() - if (today - d) > timedelta(self.oldest_article): - url = '' - if not url or not title: + if (today - date) > timedelta(self.oldest_article): continue self.log('\t', title, '\n\t', desc, '\n\t\t', url) ans.append({'title': title, 'url': url, 'description': desc}) @@ -104,8 +121,7 @@ class IndianExpress(BasicNewsRecipe): return citem['content'] def preprocess_html(self, soup): - h2 = soup.find('h2') - if h2: + if h2 := soup.find('h2'): h2.name = 'p' h2['id'] = 'sub-d' for span in soup.findAll( @@ -119,4 +135,9 @@ class IndianExpress(BasicNewsRecipe): if lazy is not None: lazy.extract() noscript.name = 'div' + if span := soup.find('span', content=True, attrs={'itemprop':'dateModified'}): + date = parse_date(span['content']).replace(tzinfo=None) + today = datetime.now() + if (today - date) > timedelta(self.oldest_article): + self.abort_article('Skipping old article') return soup diff --git a/recipes/indian_express_print_edition.recipe b/recipes/indian_express_print_edition.recipe deleted file mode 100644 index cc5bbe74a4..0000000000 --- a/recipes/indian_express_print_edition.recipe +++ /dev/null @@ -1,87 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe, classes -from collections import defaultdict - - -class IndianExpressPrint(BasicNewsRecipe): - title = u'Indian Express | Print Edition' - language = 'en_IN' - __author__ = 'unkn0wn' - masthead_url = 'https://indianexpress.com/wp-content/themes/indianexpress/images/indian-express-logo-n.svg' - no_stylesheets = True - use_embedded_content = False - remove_attributes = ['style', 'height', 'width'] - ignore_duplicate_articles = {'url'} - - extra_css = ''' - #storycenterbyline {font-size:small;} - #img-cap {font-size:small;} - blockquote{color:#404040;} - em{font-style:italic; color:#202020;} - #sub-d{color:#202020; font-style:italic;} - .ie-authorbox{font-size:small;} - ''' - - resolve_internal_links = True - remove_empty_feeds = True - - keep_only_tags = [classes('heading-part full-details')] - remove_tags = [ - dict(name='div', attrs={'id': 'ie_story_comments'}), - dict(name='div', attrs={'class': lambda x: x and 'related-widget' in x}), - dict(name='img', attrs={'src':lambda x: x and x.endswith('-button-300-ie.jpeg')}), - dict(name='a', attrs={'href':lambda x: x and x.endswith('/?utm_source=newbanner')}), - classes( - 'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright' - ' storytags pdsc-related-modify news-guard premium-story append_social_share' - ' digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg' - 'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare' - ' custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec' - ) - ] - - def parse_index(self): - soup = self.index_to_soup('https://indianexpress.com/todays-paper/') - feeds_dict = defaultdict(list) - div = soup.find('div', attrs={'class':'today-paper'}) - for a in div.findAll('a', attrs={'href':lambda x: x and x.startswith( - ('https://indianexpress.com/article/', 'https://indianexpress.com/elections/') - )}): - if not a.find('img'): - url = a['href'] - title = self.tag_to_string(a) - section = 'Front Page' - if str := a.findParent('strong'): - if span := str.find_previous_sibling('span'): - section = self.tag_to_string(span) - # if 'City' in section: - # url = '' - if not url or not title: - continue - self.log(section, '\n\t', title, '\n\t\t', url) - feeds_dict[section].append({"title": title, "url": url}) - return [(section, articles) for section, articles in feeds_dict.items()] - - def get_cover_url(self): - soup = self.index_to_soup( - 'https://www.magzter.com/IN/The-Indian-Express-Ltd./The-Indian-Express-Mumbai/Newspaper/' - ) - for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): - return citem['content'] - - def preprocess_html(self, soup): - h2 = soup.find('h2') - if h2: - h2.name = 'p' - h2['id'] = 'sub-d' - for span in soup.findAll( - 'span', attrs={'class': ['ie-custom-caption', 'custom-caption']} - ): - span['id'] = 'img-cap' - for img in soup.findAll('img'): - noscript = img.findParent('noscript') - if noscript is not None: - lazy = noscript.findPreviousSibling('img') - if lazy is not None: - lazy.extract() - noscript.name = 'div' - return soup diff --git a/recipes/irish_independent.recipe b/recipes/irish_independent.recipe index 5eb2de46f0..f82aee6f55 100644 --- a/recipes/irish_independent.recipe +++ b/recipes/irish_independent.recipe @@ -12,34 +12,32 @@ class IrishIndependent(BasicNewsRecipe): description = 'Irish and World news from Irelands Bestselling Daily Broadsheet' __author__ = 'Neil Grogan' language = 'en_IE' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 100 - remove_tags_before = dict(id='article') - remove_tags_after = [dict(name='div', attrs={'class': 'toolsBottom'})] no_stylesheets = True + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + dict(name='div', attrs={'class':lambda x: x and '_contentwrapper' in x}) + ] + remove_tags = [ - dict(name='div', attrs={'class': 'toolsBottom'}), - dict(name='div', attrs={'class': 'toolsTop'}), - dict(name='div', attrs={'class': 'boxRelated'}), - dict(name='div', attrs={'class': 'section first'}), - dict(name='div', attrs={'class': 'tabIt'}), - dict(name='div', attrs={'class': 'inner'}) + dict(name=['svg', 'button']), + dict(name='div', attrs={'data-testid':['article-share', 'embed-video', 'inline-related-wrapper']}) ] feeds = [ - (u'Frontpage News', u'http://www.independent.ie/rss'), - (u'World News', u'http://www.independent.ie/world-news/rss'), - (u'Technology', u'http://www.independent.ie/business/technology/rss'), - (u'Sport', u'http://www.independent.ie/sport/rss'), - (u'Entertainment', u'http://www.independent.ie/entertainment/rss'), - (u'Independent Woman', u'http://www.independent.ie/lifestyle/independent-woman/rss'), - (u'Education', u'http://www.independent.ie/education/rss'), - (u'Lifestyle', u'http://www.independent.ie/lifestyle/rss'), - (u'Travel', u'http://www.independent.ie/travel/rss'), - (u'Letters', u'http://www.independent.ie/opinion/letters/rss'), - (u'Weather', u'http://www.independent.ie/weather/rss') + ('Frontpage News', 'http://www.independent.ie/rss'), + ('World News', 'http://www.independent.ie/world-news/rss'), + ('Opinion', 'http://www.independent.ie/opinion/rss'), + ('Business', 'http://www.independent.ie/business/rss'), + ('Sport', 'http://www.independent.ie/sport/rss'), + ('Life', 'http://www.independent.ie/life/rss'), + ('Style', 'http://www.independent.ie/style/rss'), + ('Entertainment', 'http://www.independent.ie/business/rss'), ] -# If text only articles are desired -# def print_version(self, url): -# return '%s?service=Print' % url + def preprocess_html(self, soup): + for img in soup.findAll(attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index f4771a9bbc..b868d152da 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -32,13 +32,30 @@ class IrishTimes(BasicNewsRecipe): no_stylesheets = True temp_files = [] keep_only_tags = [ - dict(name=['h1', 'h2']), - classes('lead-art-wrapper article-body-wrapper'), + classes('custom-headline custom-subheadline lead-art-wrapper article-body-wrapper byline-text'), ] remove_tags = [ - dict(name='button') + dict(name='button'), + classes('sm-promo-headline top-table-list-container single-divider interstitial-link'), ] remove_attributes = ['width', 'height'] + + def get_cover_url(self): + from datetime import date + cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/ie/irish_times.750.jpg' + br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) + try: + br.open(cover) + except: + index = 'https://en.kiosko.net/ie/np/irish_times.html' + soup = self.index_to_soup(index) + for image in soup.find('img', attrs={'src': lambda x: x and x.endswith('750.jpg')}): + if image['src'].startswith('/'): + return 'https:' + image['src'] + return image['src'] + self.log("\nCover unavailable") + cover = None + return cover def parse_index(self): soup = self.index_to_soup('https://www.irishtimes.com/') diff --git a/recipes/irish_times_free.recipe b/recipes/irish_times_free.recipe new file mode 100644 index 0000000000..cf1d16ae69 --- /dev/null +++ b/recipes/irish_times_free.recipe @@ -0,0 +1,89 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.ptempfile import PersistentTemporaryFile + +class IrishTimes(BasicNewsRecipe): + title = 'The Irish Times (free)' + __author__ = 'unkn0wn' + description = 'Daily news from The Irish Times' + language = 'en_IE' + + masthead_url = 'http://www.irishtimes.com/assets/images/generic/website/logo_theirishtimes.png' + + encoding = 'utf-8' + max_articles_per_feed = 50 + remove_empty_feeds = True + no_stylesheets = True + + keep_only_tags = [ + classes('custom-headline custom-subheadline lead-art-wrapper article-body-wrapper byline-text'), + ] + remove_tags = [ + dict(name=['button', 'svg']), + classes('sm-promo-headline top-table-list-container single-divider interstitial-link'), + ] + + remove_attributes = ['width', 'height'] + ignore_duplicate_articles = {'title'} + resolve_internal_links = True + articles_are_obfuscated = True + + def get_cover_url(self): + from datetime import date + cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/ie/irish_times.750.jpg' + br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) + try: + br.open(cover) + except: + index = 'https://en.kiosko.net/ie/np/irish_times.html' + soup = self.index_to_soup(index) + for image in soup.find('img', attrs={'src': lambda x: x and x.endswith('750.jpg')}): + if image['src'].startswith('/'): + return 'https:' + image['src'] + return image['src'] + self.log("\nCover unavailable") + cover = None + return cover + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', '/podcast' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article', link['href']) + self.abort_article('skipping video links') + + self.log('Found', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + # Reduce image sizes to get file size below amazon's email + # sending threshold + self.web2disk_options.compress_news_images = True + self.web2disk_options.compress_news_images_auto_size = 5 + self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') + + feeds = [] + + sections = [ + 'ireland', 'world', 'opinion', 'politics', 'crime-law', 'culture', 'business', + 'life-style', 'health', 'sport', 'property', 'food', 'abroad', 'environment', + 'obituaries' + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:irishtimes.com{}&hl=en-IE&gl=IE&ceid=IE:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Others', a.format(''))) diff --git a/recipes/jpost.recipe b/recipes/jpost.recipe index 9d434d0e8e..2c94daead2 100644 --- a/recipes/jpost.recipe +++ b/recipes/jpost.recipe @@ -15,7 +15,7 @@ class JerusalemPost(BasicNewsRecipe): use_embedded_content = False language = 'en' keep_only_tags = [ - classes('margin-container-body'), + classes('margin-container-body article-title article-subline article-inner-content-breaking-news'), ] remove_tags = [ classes('share-buttons hide-for-premium'), @@ -27,9 +27,12 @@ class JerusalemPost(BasicNewsRecipe): max_articles_per_feed = 10 no_stylesheets = True + def get_browser(self): + return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + feeds = [ + ('Top Stories', 'https://www.jpost.com/Rss/RssFeedsHeadlines.aspx'), ('Arab Israeli Conflict', 'https://www.jpost.com/rss/rssfeedsarabisraeliconflict.aspx'), - ('Jerusalem', 'https://www.jpost.com/rss/rssfeedsjerusalem.aspx'), ('US Politics', 'https://www.jpost.com/rss/rssfeedsamerican-politics'), ('Israel News', 'https://www.jpost.com/rss/rssfeedsisraelnews.aspx'), ( @@ -37,4 +40,5 @@ class JerusalemPost(BasicNewsRecipe): 'https://www.jpost.com/rss/rssfeedsmiddleeastnews.aspx' ), ('International News', 'https://www.jpost.com/rss/rssfeedsinternational'), + ('Opinion', 'https://www.jpost.com/Rss/RssFeedsOpinion.aspx'), ] diff --git a/recipes/kirkusreviews.recipe b/recipes/kirkusreviews.recipe new file mode 100644 index 0000000000..99de429ee3 --- /dev/null +++ b/recipes/kirkusreviews.recipe @@ -0,0 +1,131 @@ +from urllib.parse import urljoin + +from calibre.web.feeds.news import BasicNewsRecipe + + +class KirkusReviews(BasicNewsRecipe): + title = "Kirkus Reviews" + description = ("Kirkus Reviews is an American book review magazine founded in 1933 by Virginia Kirkus." + " The magazine is headquartered in New York City. Released twice monthly on the 1st/15th.") + language = "en" + __author__ = "ping" + publication_type = "magazine" + masthead_url = ( + "https://d1fd687oe6a92y.cloudfront.net/img/kir_images/logo/kirkus-nav-logo.svg" + ) + encoding = "utf-8" + remove_javascript = True + no_stylesheets = True + auto_cleanup = False + ignore_duplicate_articles = {"url"} + compress_news_images = True + compress_news_images_auto_size = 6 + max_articles_per_feed = 99 + + keep_only_tags = [ + dict( + class_=[ + "article-author", + "article-author-img-start", + "article-author-description-start", + "single-review", + ] + ) + ] + remove_tags = [ + dict( + class_=[ + "sidebar-content", + "article-social-share-desktop-first", + "article-social-share-desktop-pagination", + "article-social-share-mobile", + "share-review-text", + "like-dislike-article", + "rate-this-book-text", + "input-group", + "user-comments", + "show-all-response-text", + "button-row", + "hide-on-mobile", + "related-article", + "breadcrumb-row", + "shop-now-dropdown", + ] + ) + ] + remove_tags_after = [dict(class_="single-review")] + + extra_css = """ + .image-container img { max-width: 100%; height: auto; margin-bottom: 0.2rem; } + .photo-caption { font-size: 0.8rem; margin-bottom: 0.5rem; display: block; } + .book-review-img .image-container { text-align: center; } + .book-rating-module .description-title { font-size: 1.25rem; margin-left: 0; text-align: center; } + """ + + def preprocess_html(self, soup): + h1 = soup.find(class_="article-title") + book_cover = soup.find("ul", class_="book-review-img") + if book_cover: + for li in book_cover.find_all("li"): + li.name = "div" + book_cover.name = "div" + if h1: + book_cover.insert_before(h1.extract()) + return soup + + def parse_index(self): + issue_url = "https://www.kirkusreviews.com/magazine/current/" + soup = self.index_to_soup(issue_url) + issue = soup.find(name="article", class_="issue-container") + cover_img = issue.select(".issue-header .cover-image img") + if cover_img: + self.cover_url = cover_img[0]["src"] + + h1 = issue.find("h1") + if h1: + self.timefmt = f" [{self.tag_to_string(h1)}]" # edition + + articles = {} + for book_ele in soup.find_all(name="div", class_="issue-featured-book"): + link = book_ele.find("a") + if not link: + continue + section = self.tag_to_string(book_ele.find("h3")).upper() + articles.setdefault(section, []).append( + {"url": urljoin(issue_url, link["href"]), "title": link["title"]} + ) + for post_ele in issue.select("div.issue-more-posts ul li div.lead-text"): + link = post_ele.find("a") + if not link: + continue + section = self.tag_to_string(post_ele.find(class_="lead-text-type")).upper() + articles.setdefault(section, []).append( + { + "url": urljoin(issue_url, link["href"]), + "title": self.tag_to_string(link), + } + ) + for section_ele in issue.select("section.reviews-section"): + section_articles = [] + for review in section_ele.select("ul li.starred"): + link = review.select("h4 a") + if not link: + continue + description = review.find("p") + section_articles.append( + { + "url": urljoin(issue_url, link[0]["href"]), + "title": self.tag_to_string(link[0]), + "description": "" + if not description + else self.tag_to_string(description), + } + ) + if not section_articles: + continue + section = self.tag_to_string(section_ele.find("h3")).upper() + if section not in articles: + articles[section] = [] + articles.setdefault(section, []).extend(section_articles) + + return articles.items() diff --git a/recipes/kyivpost_ar.recipe b/recipes/kyivpost_ar.recipe index 88ee98268c..39e0fc949f 100644 --- a/recipes/kyivpost_ar.recipe +++ b/recipes/kyivpost_ar.recipe @@ -11,19 +11,24 @@ class KyivPost(BasicNewsRecipe): publisher = 'BIZNESGRUPP TOV' category = 'newspaper' cover_url = u'https://www.kyivpost.com/wp-content/themes/kyivpost/assets/img/svg/logo-foot.svg' - language = 'ar' + language = 'ar_uk' no_stylesheets = False remove_javascript = True auto_cleanup = False oldest_article = 7 - max_articles_per_feed = 10 + max_articles_per_feed = 20 - remove_tags_before = dict(name='article', attrs={'class': 'article'}) + remove_tags_before = dict(attrs={'class': 'single-article'}) - remove_tags_after = dict(name='article', attrs={'class': 'article'}) + remove_tags_after = dict(attrs={'class': 'post-author-block'}) remove_tags = [ - dict(name='div', attrs={'class': 'entry-footer hide_post_header'}) + dict(attrs={'class': 'post-label-and-topic'}), + dict(attrs={'class': 'sidebar-start'}), + dict(attrs={'class': 'correction'}), + dict(attrs={'id': 'correction'}), + dict(attrs={'class': 'ad_between_paragraphs'}), + dict(attrs={'id': 'insert-post-by-topic'}) ] feeds = [( diff --git a/recipes/kyivpost_en.recipe b/recipes/kyivpost_en.recipe index acdb6a92ca..344ac169a0 100644 --- a/recipes/kyivpost_en.recipe +++ b/recipes/kyivpost_en.recipe @@ -16,14 +16,19 @@ class KyivPost(BasicNewsRecipe): remove_javascript = True auto_cleanup = False oldest_article = 7 - max_articles_per_feed = 10 + max_articles_per_feed = 20 - remove_tags_before = dict(name='article', attrs={'class': 'article'}) + remove_tags_before = dict(attrs={'class': 'single-article'}) - remove_tags_after = dict(name='article', attrs={'class': 'article'}) + remove_tags_after = dict(attrs={'class': 'post-author-block'}) remove_tags = [ - dict(name='div', attrs={'class': 'entry-footer hide_post_header'}) + dict(attrs={'class': 'post-label-and-topic'}), + dict(attrs={'class': 'sidebar-start'}), + dict(attrs={'class': 'correction'}), + dict(attrs={'id': 'correction'}), + dict(attrs={'class': 'ad_between_paragraphs'}), + dict(attrs={'id': 'insert-post-by-topic'}) ] feeds = [('News', 'https://www.kyivpost.com/feed')] diff --git a/recipes/kyivpost_ua.recipe b/recipes/kyivpost_ua.recipe index a0040f3c93..e4a05c6ecb 100644 --- a/recipes/kyivpost_ua.recipe +++ b/recipes/kyivpost_ua.recipe @@ -16,14 +16,19 @@ class KyivPost(BasicNewsRecipe): remove_javascript = True auto_cleanup = False oldest_article = 7 - max_articles_per_feed = 10 + max_articles_per_feed = 20 - remove_tags_before = dict(name='article', attrs={'class': 'article'}) + remove_tags_before = dict(attrs={'class': 'single-article'}) - remove_tags_after = dict(name='article', attrs={'class': 'article'}) + remove_tags_after = dict(attrs={'class': 'post-author-block'}) remove_tags = [ - dict(name='div', attrs={'class': 'entry-footer hide_post_header'}) + dict(attrs={'class': 'post-label-and-topic'}), + dict(attrs={'class': 'sidebar-start'}), + dict(attrs={'class': 'correction'}), + dict(attrs={'id': 'correction'}), + dict(attrs={'class': 'ad_between_paragraphs'}), + dict(attrs={'id': 'insert-post-by-topic'}) ] feeds = [ diff --git a/recipes/latimes.recipe b/recipes/latimes.recipe index 2e0ef1edbc..ff9c94c09b 100644 --- a/recipes/latimes.recipe +++ b/recipes/latimes.recipe @@ -63,6 +63,7 @@ class LATimes(BasicNewsRecipe): keep_only_tags = [ classes('headline page-lead-media authors published-date page-article-container'), + dict(attrs={'data-element':'story-body'}), ] remove_tags= [ diff --git a/recipes/le_monde.recipe b/recipes/le_monde.recipe index e47a106125..e255de463e 100644 --- a/recipes/le_monde.recipe +++ b/recipes/le_monde.recipe @@ -16,18 +16,20 @@ import re class LeMonde(BasicNewsRecipe): title = 'Le Monde' - __author__ = 'veezh' + __author__ = 'veezh, Martin Villard' description = 'Les flux RSS du Monde.fr' publisher = 'Société Editrice du Monde' publication_type = 'newspaper' needs_subscription = 'optional' language = 'fr' + encoding = 'utf-8' - oldest_article = 2 - max_articles_per_feed = 15 + oldest_article = 1 no_stylesheets = True remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} + reverse_article_order = True + remove_empty_feeds = True conversion_options = { 'publisher': publisher @@ -36,30 +38,87 @@ class LeMonde(BasicNewsRecipe): masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Le_monde_logo.svg/800px-Le_monde_logo.svg.png' feeds = [ - ('A la une', 'https://www.lemonde.fr/rss/une.xml'), - ('International', 'https://www.lemonde.fr/international/rss_full.xml'), - ('Politique', 'https://www.lemonde.fr/politique/rss_full.xml'), - ('Société', 'https://www.lemonde.fr/societe/rss_full.xml'), - ('Economie', 'https://www.lemonde.fr/economie/rss_full.xml'), - ('Planète', 'https://www.lemonde.fr/planete/rss_full.xml'), - ('Sciences', 'https://www.lemonde.fr/sciences/rss_full.xml'), - ('Pixels', 'https://www.lemonde.fr/pixels/rss_full.xml'), - ('Culture', 'https://www.lemonde.fr/culture/rss_full.xml'), - ('Idées', 'https://www.lemonde.fr/idees/rss_full.xml') + ('International : Europe ', 'https://www.lemonde.fr/europe/rss_full.xml'), + ('International : Amériques ', 'https://www.lemonde.fr/ameriques/rss_full.xml'), + ('International : Afrique ', 'https://www.lemonde.fr/afrique/rss_full.xml'), + ('International : Asie Pacifique', 'https://www.lemonde.fr/asie-pacifique/rss_full.xml'), + ('International : Proche-Orient', 'https://www.lemonde.fr/proche-orient/rss_full.xml'), + ('International : Royaume-Uni', 'https://www.lemonde.fr/royaume-uni/rss_full.xml'), + ('International : Etats-Unis', 'https://www.lemonde.fr/etats-unis/rss_full.xml'), + ('International : La une', 'https://www.lemonde.fr/international/rss_full.xml'), + ('France : Politique ', 'https://www.lemonde.fr/politique/rss_full.xml'), + ('France : Société ', 'https://www.lemonde.fr/societe/rss_full.xml'), + ('France : Les décodeurs', 'https://www.lemonde.fr/les-decodeurs/rss_full.xml'), + ('France : Justice ', 'https://www.lemonde.fr/justice/rss_full.xml'), + ('France : Police ', 'https://www.lemonde.fr/police/rss_full.xml'), + ('France : Campus ', 'https://www.lemonde.fr/campus/rss_full.xml'), + ('France : Education', 'https://www.lemonde.fr/education/rss_full.xml'), + ('Economie : Entreprises ', 'https://www.lemonde.fr/entreprises/rss_full.xml'), + ('Economie : Argent ', 'https://www.lemonde.fr/argent/rss_full.xml'), + ('Economie : Économie française', 'https://www.lemonde.fr/economie-francaise/rss_full.xml'), + ('Economie : Industrie', 'https://www.lemonde.fr/industrie/rss_full.xml'), + ('Economie : Emploi ', 'https://www.lemonde.fr/emploi/rss_full.xml'), + ('Economie : Immobilier ', 'https://www.lemonde.fr/immobilier/rss_full.xml'), + ('Economie : Médias', 'https://www.lemonde.fr/medias/rss_full.xml'), + ('Economie : La une', 'https://www.lemonde.fr/economie/rss_full.xml'), + ('Planète: Climat ', 'https://www.lemonde.fr/climat/rss_full.xml'), + ('Planète: Agriculture ', 'https://www.lemonde.fr/agriculture/rss_full.xml'), + ('Planète: Environnement', 'https://www.lemonde.fr/environnement/rss_full.xml'), + ('Planète: La une', 'https://www.lemonde.fr/planete/rss_full.xml'), + ('Sciences : Espace ', 'https://www.lemonde.fr/espace/rss_full.xml'), + ('Sciences : Biologie ', 'https://www.lemonde.fr/biologie/rss_full.xml'), + ('Sciences : Médecine ', 'https://www.lemonde.fr/medecine/rss_full.xml'), + ('Sciences : Physique ', 'https://www.lemonde.fr/physique/rss_full.xml'), + ('Sciences : Santé', 'https://www.lemonde.fr/sante/rss_full.xml'), + ('Sciences : La une', 'https://www.lemonde.fr/sciences/rss_full.xml'), + ('Culture : Cinéma ', 'https://www.lemonde.fr/cinema/rss_full.xml'), + ('Culture : Musiques ', 'https://www.lemonde.fr/musiques/rss_full.xml'), + ('Culture : Télévision et radio', 'https://www.lemonde.fr/televisions-radio/rss_full.xml'), + ('Culture : Le Monde des livres', 'https://www.lemonde.fr/livres/rss_full.xml'), + ('Culture : Arts ', 'https://www.lemonde.fr/arts/rss_full.xml'), + ('Culture : Scènes', 'https://www.lemonde.fr/scenes/rss_full.xml'), + ('Culture : La une', 'https://www.lemonde.fr/culture/rss_full.xml'), + ('Opinions : La une', 'https://www.lemonde.fr/idees/rss_full.xml'), + ('Opinions : éditoriaux', 'https://www.lemonde.fr/editoriaux/rss_full.xml'), + ('Opinions : chroniques ', 'https://www.lemonde.fr/chroniques/rss_full.xml'), + ('Opinions : tribunes', 'https://www.lemonde.fr/tribunes/rss_full.xml'), + ('Pixels : Jeux vidéo', 'https://www.lemonde.fr/jeux-video/rss_full.xml'), + ('Pixels : Culture web', 'https://www.lemonde.fr/cultures-web/rss_full.xml'), + ('Pixels : La une', 'https://www.lemonde.fr/pixels/rss_full.xml'), + ('Sport : Football ', 'https://www.lemonde.fr/football/rss_full.xml'), + ('Sport : Rugby ', 'https://www.lemonde.fr/rugby/rss_full.xml'), + ('Sport : Tennis ', 'https://www.lemonde.fr/tennis/rss_full.xml'), + ('Sport : Cyclisme ', 'https://www.lemonde.fr/cyclisme/rss_full.xml'), + ('Sport : Basket', 'https://www.lemonde.fr/basket/rss_full.xml'), + ('Sport : La une', 'https://www.lemonde.fr/sport/rss_full.xml'), + ('M le mag : L’époque ', 'https://www.lemonde.fr/m-perso/rss_full.xml'), + ('M le mag : Styles ', 'https://www.lemonde.fr/m-styles/rss_full.xml'), + ('M le mag : Gastronomie ', 'https://www.lemonde.fr/gastronomie/rss_full.xml'), + ('M le mag : Recettes du Monde', 'https://www.lemonde.fr/les-recettes-du-monde/rss_full.xml'), + ('M le mag : Sexo', 'https://www.lemonde.fr/sexo/rss_full.xml'), + ('M le mag : La une', 'https://www.lemonde.fr/m-le-mag/rss_full.xml'), + ('Actualités : A la une', 'https://www.lemonde.fr/rss/une.xml'), + ('Actualités : En continu', 'https://www.lemonde.fr/rss/en_continu.xml'), + ('Actualités : Vidéos ', 'https://www.lemonde.fr/videos/rss_full.xml'), + ('Actualités : Portfolios', 'https://www.lemonde.fr/photo/rss_full.xml'), ] keep_only_tags = [ classes('article__header'), - dict(name='section', attrs={'class': ['article__content', 'article__heading', + dict(name='section', attrs={'class': ['article__cover', 'article__content', 'article__heading', 'article__wrapper']}) ] remove_tags = [ - classes('article__status meta__date meta__reading-time meta__social multimedia-embed'), - dict(name=['footer', 'link']), + classes('article__status meta__reading-time meta__social multimedia-embed'), + dict(name=['footer', 'link', 'meta', 'svg', 'button']), dict(name='img', attrs={'class': ['article__author-picture']}), - dict(name='section', attrs={'class': ['article__reactions', 'author', 'catcher', - 'portfolio', 'services-inread']}) + dict(name='section', attrs={'class': + [ + 'inread js-services-inread', 'catcher catcher--inline', 'inread inread--NL js-services-inread', + 'article__reactions', 'author', 'catcher', 'portfolio', 'services-inread' + ] + }) ] remove_attributes = [ @@ -101,13 +160,13 @@ class LeMonde(BasicNewsRecipe): def get_cover_url(self): # today's date is a reasonable guess for the ID of the cover cover_id = date.today().strftime('%Y%m%d') - soup = self.index_to_soup('https://www.lemonde.fr/') - a = soup.find('a', {'id': 'jelec_link', 'style': True}) - if a and a['style']: - url = a['style'].split('/') - if len(url) > 5 and url[3].isdigit(): + # soup = self.index_to_soup('https://www.lemonde.fr/') + # a = soup.find('a', {'id': 'jelec_link', 'style': True}) + # if a and a['style']: + # url = a['style'].split('/') + # if len(url) > 5 and url[3].isdigit(): # overwrite guess if actual cover ID was found - cover_id = url[3] + # cover_id = url[3] return 'https://www.lemonde.fr/thumbnail/journal/' + cover_id + '/1000/1490' def get_article_url(self, article): @@ -124,7 +183,7 @@ class LeMonde(BasicNewsRecipe): for img in soup.find_all('img', {'data-srcset': True}): data_srcset = img['data-srcset'].split() if len(data_srcset) > 1: - img['src'] = data_srcset[-2] + img['src'] = data_srcset[0] del img['data-srcset'] return soup diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index a112968816..b59bfb6b12 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -1,16 +1,12 @@ -#!/usr/bin/env python - import json import re from datetime import date - from calibre.web.feeds.news import BasicNewsRecipe, classes is_saturday = date.today().weekday() == 5 - class LiveMint(BasicNewsRecipe): - title = u'Live Mint' + title = 'Live Mint' description = 'Financial News from India.' language = 'en_IN' __author__ = 'Krittika Goyal, revised by unkn0wn' @@ -23,55 +19,92 @@ class LiveMint(BasicNewsRecipe): masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg' remove_empty_feeds = True + resolve_internal_links = True + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'Mint | ' + date.today().strftime('%b %d, %Y') + if is_saturday: + self.title = 'Mint Lounge | ' + date.today().strftime('%b %d, %Y') if is_saturday: + + def get_cover_url(self): + soup = self.index_to_soup('https://lifestyle.livemint.com/') + if citem := soup.find('div', attrs={'class':'headLatestIss_cover'}): + return citem.img['src'].replace('_tn.jpg', '_mr.jpg') + + masthead_url = 'https://lifestyle.livemint.com/mintlounge/static-images/lounge-logo.svg' - cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/16/Delhi/Delhi/5_01/bf867ea1_01_mr.jpg' + oldest_article = 6.5 # days + + extra_css = ''' + #story-summary-0 {font-style:italic; color:#202020;} + .innerBanner, .storyImgSec {text-align:center; font-size:small;} + .author {font-size:small;} + ''' keep_only_tags = [ - dict(name='h1'), - dict(name='h2', attrs={'id':'story-summary-0'}), - dict(name='picture'), - dict(name='div', attrs={'class':'innerBanCaption'}), - dict(name='div', attrs={'id':'date-display-before-content'}), - dict(name='div', attrs={'class':'storyContent'}), + classes('storyPageHeading storyContent innerBanner author') ] remove_tags = [ - classes( - 'sidebarAdv similarStoriesClass moreFromSecClass' - ) + dict(name=['meta', 'link', 'svg', 'button', 'iframe']), + classes('hidden-article-url sidebarAdv similarStoriesClass moreFromSecClass linkStories publishDetail'), + dict(attrs={'id':['hidden-article-id-0', 'hidden-article-type-0']}) ] + feeds = [ - ('News', 'https://lifestyle.livemint.com/rss/news'), - ('Food','https://lifestyle.livemint.com/rss/food'), - ('Fashion','https://lifestyle.livemint.com/rss/fashion'), - ('How to Lounge','https://lifestyle.livemint.com/rss/how-to-lounge'), - ('Smart Living','https://lifestyle.livemint.com/rss/smart-living'), + ('Lounge News', 'https://lifestyle.livemint.com/rss/news'), + ('Food', 'https://lifestyle.livemint.com/rss/food'), + ('Fashion', 'https://lifestyle.livemint.com/rss/fashion'), + ('How to Lounge', 'https://lifestyle.livemint.com/rss/how-to-lounge'), + ('Smart Living', 'https://lifestyle.livemint.com/rss/smart-living'), + ('Health', 'https://lifestyle.livemint.com/rss/health'), + ('Relationships', 'https://lifestyle.livemint.com//rss/relationships') ] def preprocess_html(self, soup): + if h2 := soup.find('h2'): + h2.name = 'p' + for also in soup.findAll('h2'): + if self.tag_to_string(also).strip().startswith('Also'): + also.extract() for img in soup.findAll('img', attrs={'data-img': True}): img['src'] = img['data-img'] return soup else: - # some wsj articles wont load + + def get_cover_url(self): + soup = self.index_to_soup( + 'https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Mint-Mumbai/Newspaper/' + ) + for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + return citem['content'] + extra_css = ''' + img {margin:0 auto;} + .psTopLogoItem img, .ecologoStory { width:100; } #img-cap {font-size:small; text-align:center;} - #auth-info {font-size:small; text-align:center;} - .highlights {font-style:italic;} - .summary{font-style:italic; color:#404040;} + .summary, .highlights, .synopsis { + font-weight:normal !important; font-style:italic; color:#202020; + } + h2 {font-size:normal !important;} + .author-widget {font-size:small; font-style:italic; color:#404040;} + em, blockquote {color:#202020;} + .moreAbout, .articleInfo, .metaData, .psTopicsHeading, .topicsTag {font-size:small;} ''' - cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/05/Delhi/Delhi/5_01/1ec7ad14_01_mr.jpg' keep_only_tags = [ - dict(name='h1'), - dict(name='figure', attrs={'data-vars-mediatype':'image'}), - classes('articleInfo FirstEle summary highlights paywall'), + dict(name='article', attrs={'id':lambda x: x and x.startswith(('article_', 'box_'))}), + classes('contentSec') ] remove_tags = [ + dict(name=['meta', 'link', 'svg', 'button', 'iframe']), classes( - 'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight' - ' socialHolder imgbig disclamerText disqus-comment-count' + 'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight gadgetSlider' + ' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot bs_logo' + ' datePublish sepStory premiumSlider moreStory Joinus moreAbout milestone benefitText' ) ] @@ -96,6 +129,12 @@ class LiveMint(BasicNewsRecipe): ] def preprocess_raw_html(self, raw, *a): + # remove empty p tags + raw = re.sub( + r'(

    \s*)(<[^(\/|a|i|b|em|strong)])', '\g<2>', re.sub( + r'(

    \s* \s*<\/p>)|(

    \s*<\/p>)|( \s*<\/p>)', '', raw + ) + ) if '' in raw: m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw) raw1 = raw[m.start():] @@ -105,19 +144,29 @@ class LiveMint(BasicNewsRecipe): body = data['articleBody'] + '

    '\ + re.sub(r'(([a-z]|[^A-Z])\.|\.”)([A-Z]|“[A-Z])', r'\1

    \3', value) body = '

    ' + body + '

    ' - raw = re.sub(r'
    ([^}]*)
    ', body, raw) - return raw - else: - return raw + raw2 = re.sub(r'
    ([^}]*)
    ', body, raw) + return raw2 + return raw def preprocess_html(self, soup): + for strong in soup.findAll('strong'): + if strong.find('p'): + strong.name = 'div' + for embed in soup.findAll('div', attrs={'class':'embed'}): + if nos := embed.find('noscript'): + nos.name = 'span' for span in soup.findAll('figcaption'): span['id'] = 'img-cap' - for auth in soup.findAll('span', attrs={'class':['articleInfo pubtime','articleInfo author']}): - auth['id'] = 'auth-info' + for auth in soup.findAll('span', attrs={'class':lambda x: x and 'articleInfo' in x.split()}): auth.name = 'div' for span in soup.findAll('span', attrs={'class':'exclusive'}): span.extract() for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] + if wa := soup.find(**classes('autobacklink-topic')): + if p := wa.findParent('p'): + p.extract() return soup + + def populate_article_metadata(self, article, soup, first): + article.title = article.title.replace('','₹') diff --git a/recipes/mainichi_en.recipe b/recipes/mainichi_en.recipe index 8a265a5ab1..d49e960bb8 100644 --- a/recipes/mainichi_en.recipe +++ b/recipes/mainichi_en.recipe @@ -1,24 +1,13 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = "GPL v3" -__copyright__ = ( - "2010, Hiroshi Miura . " - "2021, Albert Aparicio Isarn " -) - """ www.mainichi.jp/english """ -from datetime import datetime - +from calibre.ptempfile import PersistentTemporaryFile from calibre.web.feeds.news import BasicNewsRecipe - class MainichiEnglishNews(BasicNewsRecipe): title = u"The Mainichi" - __author__ = "Albert Aparicio Isarn (old version by Hiroshi Miura)" + __author__ = 'unkn0wn' description = "Japanese traditional newspaper Mainichi news in English" publisher = "Mainichi News" @@ -29,114 +18,28 @@ class MainichiEnglishNews(BasicNewsRecipe): index = "http://mainichi.jp/english/" masthead_url = index + "images/themainichi.png" - oldest_article = 2 - max_articles_per_feed = 40 no_stylesheets = True remove_javascript = True + auto_cleanup = True - remove_tags_before = {"id": "main-cont"} - remove_tags_after = {"class": "main-text"} - remove_tags = [{"name": "div", "id": "tools"}, {"name": "div", "class": "sub"}] + ignore_duplicate_articles = {'title'} - def get_pickup_section(self, soup): - # Topmost story - top = soup.find("section", attrs={"class": "pickup section"}) - top_link = top.find("p", attrs={"class": "midashi"}).find("a") + articles_are_obfuscated = True + def get_obfuscated_article(self, url): + br = self.get_browser() try: - top_date = ( - soup.find("div", attrs={"id": "main"}) - .find("div", attrs={"class": "date-box"}) - .find("p", attrs={"class": "date"}) - .string - ) + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name - top_date_formatted = datetime.strptime(top_date, "%A, %B %d, %Y").strftime("%Y/%m/%d") - except AttributeError: - # If date not present, assume it is from today - top_date_formatted = datetime.now().strftime("%Y/%m/%d") - - top_description = top.find("p", attrs={"class": "txt"}).text - - return [ - { - "title": top_link.string, - "date": top_date_formatted, - "url": "https:" + top_link["href"], - "description": top_description, - } - ] - - def retrieve_news_from_column(self, column): - column_news = [] - - for item in column.findAll("li"): - if item: - itema = item.find("a") - date_item = itema.find("p", attrs={"class": "date"}) - - column_news.append( - { - "title": itema.find("span").string, - "date": date_item.string.strip("()") if date_item else "", - "url": "https:" + itema["href"], - "description": "", - } - ) - - return column_news - - def get_top_stories(self, soup): - top_stories = self.get_pickup_section(soup) - - news_section = soup.find("section", attrs={"class": "newslist"}) - top_news = news_section.find("div", attrs={"class": "main-box"}).find("ul") - - top_stories.extend(self.retrieve_news_from_column(top_news)) - - return top_stories - - def get_editor_picks(self, soup): - editor_picks = [] - - news_section = soup.find("section", attrs={"class": "newslist"}) - news = news_section.find("div", attrs={"class": "sub-box"}).find("ul") - - editor_picks.extend(self.retrieve_news_from_column(news)) - - return editor_picks - - def get_section(self, section): - soup = self.index_to_soup(self.index + section + "index.html") - - section_news_items = self.get_pickup_section(soup) - - news_columns = ( - soup.find("section", attrs={"class": "newslist section"}) - .find("div", attrs={"class": "col-set"}) - .find("ul") - ) - - section_news_items.extend(self.retrieve_news_from_column(news_columns)) - - return section_news_items - - def parse_index(self): - soup = self.index_to_soup(self.index + "index.html") - - feeds = [ - ("Top Stories", self.get_top_stories(soup)), - ("Editor's Picks", self.get_editor_picks(soup)), - # ("Latest Articles", self.get_section(self.index + "latest"+"index.html")), - ("Japan", self.get_section("japan")), - ("World", self.get_section("world")), - ("Business", self.get_section("business")), - ("Sports", self.get_section("sports")), - ("Science", self.get_section("science")), - ("Entertainment", self.get_section("entertainment")), - ("Opinion", self.get_section("opinion")), - ("Lifestyle", self.get_section("lifestyle")), - ("Obituaries", self.get_section("obituaries")), - ] - - return feeds + feeds = [ + ('Articles', 'https://news.google.com/rss/search?q=when:48h+allinurl:mainichi.jp%2Fenglish%2Farticles%2F&hl=en-US&gl=US&ceid=US:en') + ] diff --git a/recipes/martinfowler.recipe b/recipes/martinfowler.recipe new file mode 100644 index 0000000000..419d4e1e25 --- /dev/null +++ b/recipes/martinfowler.recipe @@ -0,0 +1,28 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class MartinFowler(BasicNewsRecipe): + title = 'Martin Fowler Blog' + description = 'Software development is a young profession, and we are still learning the techniques and building the tools to do it effectively.' + language = 'en' + oldest_article = 14 + + keep_only_tags = [ + dict(name='main') + ] + + remove_tags = [ + dict(name='div', attrs={'class': 'bio'}), + dict(name='div', attrs={'class': 'article-card'}), + dict(name='hr', attrs={'class': 'bodySep'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class': lambda x: x and 'paperBody' in x.split()}) + ] + + use_embedded_content = False + no_stylesheets = True + + feeds = [ + (u'Martin Fowler Blog', u'https://martinfowler.com/feed.atom') + ] diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe index d5a1518b1f..2606d284b0 100644 --- a/recipes/mediapart.recipe +++ b/recipes/mediapart.recipe @@ -9,6 +9,8 @@ # ( cover image format is changed to .jpeg) # 14 Jan 2021 - Add Mediapart Logo url as masthead_url and change cover # by overlaying the date on top of the Mediapart cover +# 22 Mar 2023 - Switch to Google feeds + from __future__ import unicode_literals __license__ = 'GPL v3' @@ -17,235 +19,74 @@ __copyright__ = '2021, Loïc Houpert . Adapted from: Mediapart ''' -import re -from datetime import date, datetime, timezone, timedelta -from calibre.web.feeds import feeds_from_index -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict( - attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} - ) - +from datetime import datetime, timezone, timedelta +from calibre.ptempfile import PersistentTemporaryFile +from calibre.web.feeds.news import BasicNewsRecipe, classes class Mediapart(BasicNewsRecipe): title = 'Mediapart' - __author__ = 'Loïc Houpert' + __author__ = 'Loïc Houpert, unkn0wn' description = 'Global news in French from news site Mediapart' publication_type = 'newspaper' language = 'fr' needs_subscription = True - oldest_article = 2 use_embedded_content = False no_stylesheets = True keep_only_tags = [ - dict(name='h1'), - dict(name='div', **classes('author')), - classes('news__heading__top__intro news__body__center__article') + classes( + 'news__heading__top news__heading__center news__body__center__article' + ) ] + remove_tags = [ - classes('login-subscribe print-source_url'), + classes('action-links media--rich read-also login-subscribe print-source_url'), dict(name='svg'), ] + conversion_options = {'smarten_punctuation': True} masthead_url = "https://raw.githubusercontent.com/lhoupert/calibre_contrib/main/mediapart_masthead.png" - # cover_url = 'https://raw.githubusercontent.com/lhoupert/calibre_contrib/main/mediapart.jpeg' - # -- + ignore_duplicate_articles = {'title'} + resolve_internal_links = True + remove_empty_feeds = True - # Get date in french time zone format - today = datetime.now(timezone.utc) + timedelta(hours=1) - oldest_article_date = today - timedelta(days=oldest_article) + articles_are_obfuscated = True - feeds = [ - ('La Une', 'http://www.mediapart.fr/articles/feed'), + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + feeds = [] + + sections = [ + 'france', 'international', 'economie', 'culture-idees', 'politique', 'ecologie', 'fil-dactualites' ] - # The feed at 'http://www.mediapart.fr/articles/feed' only displayed the 10 - # last elements so the articles are indexed on specific pages - # in the function my_parse_index. In this function the article are parsed - # using the function get_articles and the dict values dict_article_sources - - def parse_feeds(self): - feeds = super(Mediapart, self).parse_feeds() - feeds += feeds_from_index(self.my_parse_index(feeds)) - return feeds - - def my_parse_index(self, la_une): - - dict_article_sources = [ - { - 'type': 'Brèves', - 'webpage': 'https://www.mediapart.fr/journal/fil-dactualites', - 'separador': { - 'page': 'ul', - 'thread': 'li' - } - }, - { - 'type': 'International', - 'webpage': 'https://www.mediapart.fr/journal/international', - 'separador': { - 'page': 'div', - 'thread': 'div' - } - }, - { - 'type': 'France', - 'webpage': 'https://www.mediapart.fr/journal/france', - 'separador': { - 'page': 'div', - 'thread': 'div' - } - }, - { - 'type': 'Économie', - 'webpage': 'https://www.mediapart.fr/journal/economie', - 'separador': { - 'page': 'div', - 'thread': 'div' - } - }, - { - 'type': 'Culture', - 'webpage': 'https://www.mediapart.fr/journal/culture-idees', - 'separador': { - 'page': 'div', - 'thread': 'div' - } - }, - ] - - def get_articles( - type_of_article, webpage, separador_page='ul', separador_thread='li' - ): - - specific_articles = [] - - webpage_article = [] - soup = self.index_to_soup(webpage) - page = soup.find('main', {'class': 'global-wrapper'}) - if page is None: - page = soup.find('section', {'class': 'news__body-wrapper mb-800'}) - fils = page.find(separador_page, {'class': 'post-list universe-journal'}) - if fils is None: - fils = page.find(separador_page, {'class': 'news__list__content _hasNewsletter'}) - - all_articles = fils.findAll(separador_thread) - for article in all_articles: - try: - # title = article.find('h3', recursive=False) - title = article.find('h3', recursive=True) - if title is None or ''.join(title['class']) == 'title-specific': - # print(f"[BAD title entry] Print value of title:\n {title}") - continue - # print(f"\n[OK title entry] Print value of title:\n {title}\n") - - try: - article_mot_cle = article.find( - 'a', { - 'href': re.compile(r'.*\/mot-cle\/.*') - } - ).renderContents().decode('utf-8') - except Exception: - article_mot_cle = '' - - try: - article_type = article.find( - 'a', { - 'href': re.compile(r'.*\/type-darticles\/.*') - } - ).renderContents().decode('utf-8') - except Exception: - article_type = '' - - for s in title('span'): - s.replaceWith(s.renderContents().decode('utf-8') + "\n") - url = title.find('a', href=True)['href'] - - date = article.find('time', datetime=True)['datetime'] - article_date = datetime.strptime(date, '%Y-%m-%d') - # Add French timezone to date of the article for date check - article_date = article_date.replace(tzinfo=timezone.utc) + timedelta(hours=1) - if article_date < self.oldest_article_date: - print("article_date < self.oldest_article_date\n") - continue - - # print("-------- Recent article added to the list ------- \n") - all_authors = article.findAll( - # 'a', {'class': re.compile(r'\bjournalist\b')} - 'div', {'class': 'teaser__signature'} - ) - if not all_authors: - all_authors = article.findAll( - 'a', {'class': re.compile(r'\bjournalist\b')} - ) - authors = [self.tag_to_string(a) for a in all_authors] - # print(f"Authors in tag : {authors}") - - # If not link to the author profile is available the - # html separador is a span tag - if not all_authors: - try: - all_authors = article.findAll( - 'span', {'class': re.compile(r'\bjournalist\b')} - ) - authors = [self.tag_to_string(a) for a in all_authors] - # print(f"Authors in tag : {authors}") - except: - authors = 'unknown' - - description = article.find('p').renderContents().decode('utf-8') - # print(f"

    in article : {self.tag_to_string(description).strip()} ") - - summary = { - 'title': self.tag_to_string(title).strip(), - 'description': description, - 'date': article_date.strftime("%a, %d %b, %Y %H:%M"), - 'author': ', '.join(authors), - 'article_type': article_type, - 'mot_cle': article_mot_cle.capitalize(), - 'url': 'https://www.mediapart.fr' + url, - } - if webpage_article: - if summary['url'] != webpage_article[-1]['url']: - webpage_article.append(summary) - else: - webpage_article.append(summary) - except Exception: - pass - - specific_articles += [(type_of_article, - webpage_article)] if webpage_article else [] - return specific_articles - - articles = [] - - for category in dict_article_sources: - articles += get_articles( - category['type'], category['webpage'], category['separador']['page'], - category['separador']['thread'] - ) - - return articles - - # non-locale specific date parse (strptime("%d %b %Y",s) would work with - # french locale) - def parse_french_date(self, date_str): - date_arr = date_str.lower().split() - return date( - day=int(date_arr[0]), - year=int(date_arr[2]), - month=[ - None, 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre' - ].index(date_arr[1]) - ) + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:mediapart.fr%2Fjournal{}&hl=fr-FR&gl=FR&ceid=FR:fr' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Autres', a.format(''))) def get_browser(self): # -- Handle login @@ -298,7 +139,7 @@ class Mediapart(BasicNewsRecipe): p.setPen(pen) font = QFont() font.setFamily('Times') - font.setPointSize(78) + font.setPointSize(72) p.setFont(font) r = QRect(0, 600, 744,100) p.drawText(r, Qt.AlignmentFlag.AlignJustify | Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignCenter, date) diff --git a/recipes/mit_technology_review.recipe b/recipes/mit_technology_review.recipe index 70fedadf41..7075655d74 100644 --- a/recipes/mit_technology_review.recipe +++ b/recipes/mit_technology_review.recipe @@ -10,7 +10,7 @@ technologyreview.com ''' from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes from collections import OrderedDict - +import json def absurl(x): if x.startswith('//'): @@ -50,6 +50,7 @@ class MitTechnologyReview(BasicNewsRecipe): #cre-d{font-size:xx-small; text-align:center; color:gray;} #cap-d{font-size:small; text-align:center;} blockquote{text-align:center; color:#404040;} + em { color:#202020;} ''' keep_only_tags = [ prefixed_classes('contentHeader contentArticleHeader contentBody') @@ -64,15 +65,17 @@ class MitTechnologyReview(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup('https://www.technologyreview.com/') - div = soup.find('div', attrs={'class':lambda s: s and s.startswith('magazineSidebar__imageWrap')}) - img = div.find('img', src=True) - return img['src'] + if script := soup.find('script', id='preload'): + JSON = script.contents[0].split('magazineCover\":')[1].strip() + data = json.JSONDecoder().raw_decode(JSON)[0] + return data['config']['src'] def parse_index(self): soup = self.index_to_soup(self.INDEX) - issue = soup.find('h1', attrs={'class':lambda x: x and x.startswith('magazineHero__title')}) + issue = soup.find(attrs={'class':lambda x: x and x.startswith('magazineHero__title')}) time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')}) - self.timefmt = ' (' + self.tag_to_string(issue) + ') [' + self.tag_to_string(time) + ']' + self.title = 'MIT Tech Review ' + self.tag_to_string(issue) + self.timefmt = ' [' + self.tag_to_string(time) + ']' self.log('Downloading issue: ', self.timefmt) # parse articles diff --git a/recipes/moneycontrol.recipe b/recipes/moneycontrol.recipe index 3c8f0483ea..4fcc5c5760 100644 --- a/recipes/moneycontrol.recipe +++ b/recipes/moneycontrol.recipe @@ -1,65 +1,104 @@ -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) +from calibre.web.feeds.news import BasicNewsRecipe, classes +from urllib.parse import quote class MoneyControlRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en_IN' - locale = 'en_IN' - encoding = 'iso-8859-1' - version = 1 - title = u'Money Control' - publisher = u'moneycontrol.com' - category = u'News, Financial, India' - description = u'Financial news from India' - - oldest_article = 7 - max_articles_per_feed = 100 - use_embedded_content = False - + __author__ = 'unkn0wn' + description = 'Read the latest business news on the Indian economy, global market, upcoming IPOs and more.' + language = 'en_IN' + masthead_url = 'https://images.moneycontrol.com/images/ftpopup/moneyloginlogo.png' + encoding = 'utf-8' no_stylesheets = True remove_javascript = True + remove_attributes = ['width', 'height', 'float', 'style'] + + ignore_duplicate_articles = {'title', 'url'} + remove_empty_feeds = True + resolve_internal_links = True + oldest_article = 1 # days + + extra_css = ''' + img {display:block; margin:0 auto;} + .article_image_wrapper { font-size:small; text-align:center; } + .articlename_join_follow, .author_wrapper, .FT_block_article { font-size:small; color:#404040; } + .article_desc { font-style:italic; color:#202020; } + ''' + + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + soup = self.index_to_soup(url) + link = soup.a['href'] + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/multimedia/', + ] + if any(x in link for x in skip_sections): + self.abort_article('skipping video links ', link) + self.log('Found ', link) + html = br.open(link).read() + return ({ 'data': html, 'url': link }) + + keep_only_tags = [ + dict(name='div', attrs={'id':lambda x: x and x.startswith('article-')}) + ] + + remove_tags = [ + dict(name=['svg', 'style', 'button', 'script']), + dict(attrs={'id':['social_icon_impression', 'taboola-mid-article-thumbnails']}), + classes( + 'social_icons_wrapper mid-arti-ad lastPara related_stories_left_block social_icons_mobile_wrapper ' + 'advSlotsWithoutGrayBox tags_wrapper maintextdiv page_right_wrapper stockwidget tech_newsletter' + ) + ] + + def preprocess_html(self, soup): + desc = soup.find(**classes('article_desc')) + if desc: + desc.name = 'p' + for wrap in soup.findAll(**classes('article_image_wrapper')): + for h2 in wrap.findAll('h2'): + h2.name = 'span' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup feeds = [] + + when = oldest_article*24 + index = 'https://www.moneycontrol.com/' + + business_sections = [ + 'markets', 'stocks', 'ipo', 'budget', 'banks', 'moneycontrol-research', 'economy', 'earnings', 'real-estate', + 'personal-finance', 'commodities', 'trade', 'companies' + ] + + a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-IN&gl=IN&ceid=IN:en' + + for sec in business_sections: + allinurl_a = index + 'news/business' + feeds.append((sec.capitalize(), a.format(when, quote(allinurl_a + sec, safe='')))) + feeds.append(('Business' , a.format(when, quote(allinurl_a + sec, safe='')))) + + news_sections = [ + 'india', 'world', 'opinion', 'politics', 'technology', 'trends', 'lifestyle' + ] + + for sec in news_sections: + allinurl_b = index + 'news' + feeds.append((sec.capitalize(), a.format(when, quote(allinurl_b + sec, safe='')))) + feeds.append(('News', a.format(when, quote(allinurl_b + sec, safe=''), ''))) feeds.append( - (u'Latest News', u'http://www.moneycontrol.com/rss/latestnews.xml')) - feeds.append( - (u'All Stories', u'http://www.moneycontrol.com/rss/allstories.xml')) + ('Others', 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-IN&gl=IN&ceid=IN:en'.format(when, quote(index, safe=''))) + ) - def print_version(self, url): - return url.replace('/stocksnews.php?', '/news_print.php?') + '&sr_no=0' - - # The articles contain really horrible html. More than one and section, not properly closed tags, lots and lots of - # tags and some weird markup that crashes the conversion to ebook. Needs some drastic sanitizing - '''def preprocess_html(self, soup): - freshSoup = BeautifulSoup('') - - headline = soup.find('td', attrs = {'class': 'heading'}) - if headline: - h1 = new_tag(freshSoup, 'h1') - # Convert to string before adding it to the document! - h1.append(self.tag_to_string(headline)) - freshSoup.body.append(h1) - - for p in soup.findAll('p', attrs={'class': true}): - if ''.join(p['class']) == 'MsoNormal': - # We have some weird pagebreak marker here; it will not find all of them however - continue - - para = new_tag(freshSoup, 'p') - # Convert to string; this will loose all formatting but also all illegal markup - para.append(self.tag_to_string(p)) - - freshSoup.body.append(para) - - return freshSoup - ''' + def populate_article_metadata(self, article, soup, first): + div = soup.find('div', attrs={'data-io-article-url':True}) + if div: + article.url = div['data-io-article-url'] + desc = soup.find(**classes('article_desc')) + if desc: + article.summary = self.tag_to_string(desc) + article.text_summary = article.summary + article.title = article.title.replace(' - Moneycontrol', '') diff --git a/recipes/my_dealz_de.recipe b/recipes/my_dealz_de.recipe index 9b2c686d22..f5720ce7d5 100644 --- a/recipes/my_dealz_de.recipe +++ b/recipes/my_dealz_de.recipe @@ -2,7 +2,6 @@ from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -from datetime import datetime class MyDealzDE(BasicNewsRecipe): @@ -21,10 +20,6 @@ class MyDealzDE(BasicNewsRecipe): simultaneous_downloads = 10 # description, some Reader show this in titlepage description = u'MyDealz - Shopping Deals for Germany' - # add date to description so for dayly downloads you can find them easier - # ---- can be edit by user - description = description + ' fetched: ' + \ - datetime.now().strftime("%Y-%m-%d") # %H:%M:%S") # Who published the content? publisher = u'https://www.mydealz.de' # What is the content of? diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe index 57ff66dc8d..4f902af9cc 100644 --- a/recipes/natgeo.recipe +++ b/recipes/natgeo.recipe @@ -19,57 +19,112 @@ def classes(classes): def extract_json(raw): s = raw.find("window['__natgeo__']") script = raw[s:raw.find('', s)] - return json.loads( - script[script.find('{'):].rstrip(';'))['page']['content']['article'] + return json.loads(script[script.find('{'):].rstrip(';'))['page']['content']['prismarticle'] def parse_contributors(grp): for item in grp: - line = '

    ' + escape(item['title']) + ' ' + line = '

    ' + escape(item['title']) + ' ' for c in item['contributors']: line += escape(c['displayName']) - yield line + '

    ' + yield line + '
    ' def parse_lead_image(media): - yield '
    {}
    '.format( - escape(media['image']['src'], True), escape(media['image']['dsc'], True)) - yield '

    ' + escape(media['caption']) + '

    ' - if 'credit' in media: - yield '

    ' + escape(media['credit']) + '

    ' + if 'image' in media: + yield '

    ' + if 'dsc' in media['image']: + yield '

    {}
    '.format( + escape(media['image']['src'], True), escape(media['image']['dsc'], True)) + else: + yield '
    '.format(escape(media['image']['src'], True)) + if 'caption' in media and 'credit' in media: + yield '
    ' + media['caption'] + ' ' + media['credit'] + '
    ' + elif 'caption' in media: + yield '
    ' + media['caption'] + '
    ' + yield '

    ' -def parse_body(item): - c = item['cntnt'] - if item.get('type') == 'inline': - if c.get('cmsType') == 'listicle': - yield '

    ' + escape(c['title']) + "

    " - yield c['text'] - elif c.get('cmsType') == 'image': - for line in parse_lead_image(c): - yield line - else: - yield '<{tag}>{markup}'.format( - tag=item['type'], markup=c['mrkup']) +def parse_inline(inl): + if inl.get('content', {}).get('name', '') == 'Image': + props = inl['content']['props'] + yield '

    ' + if 'image' in props: + yield '

    '.format(props['image']['src']) + if 'caption' in props: + yield '
    {}{}
    '.format( + props['caption']['text'], ' ' + props['caption']['credit'] + ) + yield '

    ' + if inl.get('content', {}).get('name', '') == 'ImageGroup': + if 'images' in inl['content']['props']: + for imgs in inl['content']['props']['images']: + yield '

    ' + if 'src' in imgs: + yield '

    '.format(imgs['src']) + if 'caption' in imgs: + yield '
    {}{}
    '.format( + imgs['caption']['text'], ' ' + imgs['caption']['credit'] + ) + yield '

    ' + + +def parse_cont(content): + for cont in content.get('content', {}): + if isinstance(cont, dict): + yield from parse_body(cont) + if isinstance(cont, str): + yield cont + + +def parse_body(x): + if isinstance(x, dict): + if 'type' in x: + tag = x['type'] + if tag == 'inline': + yield ''.join(parse_inline(x)) + elif 'attrs' in x and 'href' in x.get('attrs', ''): + yield '<' + tag + ' href = "{}">'.format(x['attrs']['href']) + for yld in parse_cont(x): + yield yld + yield '' + else: + yield '<' + tag + '>' + for yld in parse_cont(x): + yield yld + yield '' + elif isinstance(x, list): + for y in x: + if isinstance(y, dict): + yield from parse_body(y) def parse_article(edg): sc = edg['schma'] - yield '

    ' + escape(edg['sctn']) + '

    ' + yield '
    ' + escape(edg['sctn']) + '
    ' yield '

    ' + escape(sc['sclTtl']) + '

    ' - yield '
    ' + escape(sc['sclDsc']) + '
    ' + yield '' + yield '

    ' for line in parse_contributors(edg['cntrbGrp']): yield line ts = parse_iso8601(edg['mdDt'], as_utc=False).strftime('%B %d, %Y') - yield '

    Published: ' + escape(ts) + '

    ' + yield '
    Published: ' + escape(ts) + '
    ' if 'readTime' in edg: - yield '

    ' + escape(edg['readTime']) + '

    ' + yield '
    ' + escape(edg['readTime']) + '
    ' + yield '

    ' if edg.get('ldMda', {}).get('cmsType') == 'image': for line in parse_lead_image(edg['ldMda']): yield line - for item in edg['bdy']: - for line in parse_body(item): - yield line + for main in edg['prismData']['mainComponents']: + if main['name'] == 'Body': + for item in main['props']['body']: + if isinstance(item, dict): + if item.get('type', '') == 'inline': + for inl in parse_inline(item): + yield inl + elif isinstance(item, list): + for line in item: + yield ''.join(parse_body(line)) def article_parse(data): @@ -79,6 +134,10 @@ def article_parse(data): continue for mod in frm.get('mods', ()): for edg in mod.get('edgs', ()): + if edg.get('cmsType') == 'ImmersiveLeadTile': + if 'image' in edg.get('cmsImage', {}): + for line in parse_lead_image(edg['cmsImage']): + yield line if edg.get('cmsType') == 'ArticleBodyTile': for line in parse_article(edg): yield line @@ -87,26 +146,69 @@ def article_parse(data): class NatGeo(BasicNewsRecipe): title = u'National Geographic' - description = 'Daily news articles from The National Geographic' + description = 'News articles from The National Geographic, Download Monthly.' language = 'en' encoding = 'utf8' publisher = 'nationalgeographic.com' category = 'science, nat geo' - __author__ = 'Kovid Goyal' + __author__ = 'Kovid Goyal, unkn0wn' description = 'Inspiring people to care about the planet since 1888' timefmt = ' [%a, %d %b, %Y]' no_stylesheets = True use_embedded_content = False remove_attributes = ['style'] remove_javascript = False + masthead_url = 'https://i.natgeofe.com/n/e76f5368-6797-4794-b7f6-8d757c79ea5c/ng-logo-2fl.png?w=600&h=600' + remove_empty_feeds = True + resolve_internal_links = True + ignore_duplicate_articles = {'url'} + + extra_css = ''' + blockquote { color:#404040; } + .byline, i { font-style:italic; color:#202020; } + .cap { font-size:small; } + img {display:block; margin:0 auto;} + .cred { font-style:italic; font-size:small; color:#404040; } + .auth, .time, .sub { font-size:small; color:#5c5c5c; } + ''' + + def get_cover_url(self): + # soup = self.index_to_soup('https://www.nationalgeographic.com/magazine/') + # png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8')) + from datetime import date + url = 'https://www.nationalgeographic.com/magazine/issue/' + (date.today().strftime('%B-%Y')).lower() + soup = self.index_to_soup(url) + png = soup.find('meta', attrs={'property':'og:image'})['content'].split('?') + return png[0] + '?w=1000&h=1000' def parse_index(self): - soup = self.index_to_soup('https://www.nationalgeographic.com/latest-stories/') + pages = [ + 'https://www.nationalgeographic.com/animals', + 'https://www.nationalgeographic.com/environment', + 'https://www.nationalgeographic.com/history', + 'https://www.nationalgeographic.com/science', + 'https://www.nationalgeographic.com/travel' + ] + + feeds = [] + + for sec in pages: + soup = self.index_to_soup(sec) + parsed = self.articles_from_soup(soup) + if parsed: + feeds += parsed + return feeds + + def articles_from_soup(self, soup): ans = {} for article in soup.findAll('article'): a = article.find('a') url = a['href'] + if url.startswith('/'): + url = 'https://www.nationalgeographic.com' + url section = self.tag_to_string(article.find(**classes('SectionLabel'))) + if section.startswith('Paid Content'): + continue title = self.tag_to_string(article.find(**classes('PromoTile__Title--truncated'))) articles = ans.setdefault(section, []) articles.append({'title': title, 'url': url}) @@ -116,3 +218,17 @@ class NatGeo(BasicNewsRecipe): def preprocess_raw_html(self, raw_html, url): data = extract_json(raw_html) return '\n'.join(article_parse(data)) + + def preprocess_html(self, soup): + for h2 in soup.findAll('h2'): + h2.name = 'h4' + for img in soup.findAll('img', src=True): + # for high res images use '?w=2000&h=2000' + img['src'] = img['src'] + '?w=600&h=600' + return soup + + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'byline'}) + if summ: + article.summary = self.tag_to_string(summ) + article.text_summary = self.tag_to_string(summ) diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe new file mode 100644 index 0000000000..d52a63f7c5 --- /dev/null +++ b/recipes/natgeohis.recipe @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals + +import json + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre import prepare_string_for_xml as escape +from calibre.utils.iso8601 import parse_iso8601 + + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + +def extract_json(raw): + s = raw.find("window['__natgeo__']") + script = raw[s:raw.find('', s)] + return json.loads(script[script.find('{'):].rstrip(';'))['page']['content']['prismarticle'] + + +def parse_contributors(grp): + for item in grp: + line = '
    ' + escape(item['title']) + ' ' + for c in item['contributors']: + line += escape(c['displayName']) + yield line + '
    ' + + +def parse_lead_image(media): + if 'image' in media: + yield '

    ' + if 'dsc' in media['image']: + yield '

    {}
    '.format( + escape(media['image']['src'], True), escape(media['image']['dsc'], True)) + else: + yield '
    '.format(escape(media['image']['src'], True)) + if 'caption' in media and 'credit' in media: + yield '
    ' + media['caption'] + ' ' + media['credit'] + '
    ' + elif 'caption' in media: + yield '
    ' + media['caption'] + '
    ' + yield '

    ' + + +def parse_inline(inl): + if inl.get('content', {}).get('name', '') == 'Image': + props = inl['content']['props'] + yield '

    ' + if 'image' in props: + yield '

    '.format(props['image']['src']) + if 'caption' in props: + yield '
    {}{}
    '.format( + props['caption']['text'], ' ' + props['caption']['credit'] + ) + yield '

    ' + if inl.get('content', {}).get('name', '') == 'ImageGroup': + if 'images' in inl['content']['props']: + for imgs in inl['content']['props']['images']: + yield '

    ' + if 'src' in imgs: + yield '

    '.format(imgs['src']) + if 'caption' in imgs: + yield '
    {}{}
    '.format( + imgs['caption']['text'], ' ' + imgs['caption']['credit'] + ) + yield '

    ' + + +def parse_cont(content): + for cont in content.get('content', {}): + if isinstance(cont, dict): + yield from parse_body(cont) + if isinstance(cont, str): + yield cont + + +def parse_body(x): + if isinstance(x, dict): + if 'type' in x: + tag = x['type'] + if tag == 'inline': + yield ''.join(parse_inline(x)) + elif 'attrs' in x and 'href' in x.get('attrs', ''): + yield '<' + tag + ' href = "{}">'.format(x['attrs']['href']) + for yld in parse_cont(x): + yield yld + yield '' + else: + yield '<' + tag + '>' + for yld in parse_cont(x): + yield yld + yield '' + elif isinstance(x, list): + for y in x: + if isinstance(y, dict): + yield from parse_body(y) + + +def parse_article(edg): + sc = edg['schma'] + yield '
    ' + escape(edg['sctn']) + '
    ' + yield '

    ' + escape(sc['sclTtl']) + '

    ' + yield '' + yield '

    ' + for line in parse_contributors(edg['cntrbGrp']): + yield line + ts = parse_iso8601(edg['mdDt'], as_utc=False).strftime('%B %d, %Y') + yield '

    Published: ' + escape(ts) + '
    ' + if 'readTime' in edg: + yield '
    ' + escape(edg['readTime']) + '
    ' + yield '

    ' + if edg.get('ldMda', {}).get('cmsType') == 'image': + for line in parse_lead_image(edg['ldMda']): + yield line + for main in edg['prismData']['mainComponents']: + if main['name'] == 'Body': + for item in main['props']['body']: + if isinstance(item, dict): + if item.get('type', '') == 'inline': + for inl in parse_inline(item): + yield inl + elif isinstance(item, list): + for line in item: + yield ''.join(parse_body(line)) + + +def article_parse(data): + yield "" + for frm in data['frms']: + if not frm: + continue + for mod in frm.get('mods', ()): + for edg in mod.get('edgs', ()): + if edg.get('cmsType') == 'ImmersiveLeadTile': + if 'image' in edg.get('cmsImage', {}): + for line in parse_lead_image(edg['cmsImage']): + yield line + if edg.get('cmsType') == 'ArticleBodyTile': + for line in parse_article(edg): + yield line + yield "" + + +class NatGeo(BasicNewsRecipe): + title = u'National Geographic History' + description = ( + 'From Caesar to Napoleon, the Pyramids to the Parthenon, the Trojan War to the Civil War—National Geographic ' + 'HISTORY draws readers in with more than 5,000 years of people, places, and things to explore.' + ) + language = 'en' + encoding = 'utf8' + publisher = 'nationalgeographic.com' + category = 'science, nat geo' + __author__ = 'Kovid Goyal, unkn0wn' + description = 'Inspiring people to care about the planet since 1888' + timefmt = ' [%a, %d %b, %Y]' + no_stylesheets = True + use_embedded_content = False + remove_attributes = ['style'] + remove_javascript = False + masthead_url = 'https://i.natgeofe.com/n/e76f5368-6797-4794-b7f6-8d757c79ea5c/ng-logo-2fl.png?w=600&h=600' + resolve_internal_links = True + + extra_css = ''' + blockquote { color:#404040; } + .byline, i { font-style:italic; color:#202020; } + .cap { font-size:small; } + img {display:block; margin:0 auto;} + .cred { font-style:italic; font-size:small; color:#404040; } + .auth, .time, .sub { font-size:small; color:#5c5c5c; } + ''' + + def get_cover_url(self): + soup = self.index_to_soup('https://ngsingleissues.nationalgeographic.com/history') + wrap = soup.find(attrs={'class':'product-image-wrapper'}) + return wrap.img['src'] + + def parse_index(self): + soup = self.index_to_soup('https://www.nationalgeographic.com/history/history-magazine') + ans = [] + for article in soup.findAll('article'): + a = article.find('a') + url = a['href'] + if url.startswith('/'): + url = 'https://www.nationalgeographic.com' + url + title = self.tag_to_string(article.find(**classes('PromoTile__Title--truncated'))) + ans.append({'title': title, 'url': url}) + self.log(title, ' ', url) + return [('Articles', ans)] + + def preprocess_raw_html(self, raw_html, url): + data = extract_json(raw_html) + return '\n'.join(article_parse(data)) + + def preprocess_html(self, soup): + for h2 in soup.findAll('h2'): + h2.name = 'h4' + for img in soup.findAll('img', src=True): + # for high res images use '?w=2000&h=2000' + img['src'] = img['src'] + '?w=600&h=600' + return soup + + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'byline'}) + if summ: + article.summary = self.tag_to_string(summ) + article.text_summary = self.tag_to_string(summ) diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe new file mode 100644 index 0000000000..d4c179c88c --- /dev/null +++ b/recipes/natgeomag.recipe @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals + +import json +from datetime import date +from pprint import pformat + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre import prepare_string_for_xml as escape +from calibre.utils.iso8601 import parse_iso8601 + +edition = date.today().strftime('%B-%Y') + +# edition = 'March-2023' + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + +def extract_json(raw): + s = raw.find("window['__natgeo__']") + script = raw[s:raw.find('', s)] + return json.loads(script[script.find('{'):].rstrip(';'))['page']['content']['prismarticle'] + + +def parse_contributors(grp): + for item in grp: + line = '
    ' + escape(item['title']) + ' ' + for c in item['contributors']: + line += escape(c['displayName']) + yield line + '
    ' + + +def parse_lead_image(media): + if 'image' in media: + yield '

    ' + if 'dsc' in media['image']: + yield '

    {}
    '.format( + escape(media['image']['src'], True), escape(media['image']['dsc'], True)) + else: + yield '
    '.format(escape(media['image']['src'], True)) + if 'caption' in media and 'credit' in media: + yield '
    ' + media['caption'] + ' ' + media['credit'] + '
    ' + elif 'caption' in media: + yield '
    ' + media['caption'] + '
    ' + yield '

    ' + + +def parse_inline(inl): + if inl.get('content', {}).get('name', '') == 'Image': + props = inl['content']['props'] + yield '

    ' + if 'image' in props: + yield '

    '.format(props['image']['src']) + if 'caption' in props: + yield '
    {}{}
    '.format( + props['caption']['text'], ' ' + props['caption']['credit'] + ) + yield '

    ' + if inl.get('content', {}).get('name', '') == 'ImageGroup': + if 'images' in inl['content']['props']: + for imgs in inl['content']['props']['images']: + yield '

    ' + if 'src' in imgs: + yield '

    '.format(imgs['src']) + if 'caption' in imgs: + yield '
    {}{}
    '.format( + imgs['caption']['text'], ' ' + imgs['caption']['credit'] + ) + yield '

    ' + + +def parse_cont(content): + for cont in content.get('content', {}): + if isinstance(cont, dict): + yield from parse_body(cont) + if isinstance(cont, str): + yield cont + + +def parse_body(x): + if isinstance(x, dict): + if 'type' in x: + tag = x['type'] + if tag == 'inline': + yield ''.join(parse_inline(x)) + elif 'attrs' in x and 'href' in x.get('attrs', ''): + yield '<' + tag + ' href = "{}">'.format(x['attrs']['href']) + for yld in parse_cont(x): + yield yld + yield '' + else: + yield '<' + tag + '>' + for yld in parse_cont(x): + yield yld + yield '' + elif isinstance(x, list): + for y in x: + if isinstance(y, dict): + yield from parse_body(y) + + +def parse_article(edg): + sc = edg['schma'] + yield '
    ' + escape(edg['sctn']) + '
    ' + yield '

    ' + escape(sc['sclTtl']) + '

    ' + yield '' + yield '

    ' + for line in parse_contributors(edg['cntrbGrp']): + yield line + ts = parse_iso8601(edg['mdDt'], as_utc=False).strftime('%B %d, %Y') + yield '

    Published: ' + escape(ts) + '
    ' + if 'readTime' in edg: + yield '
    ' + escape(edg['readTime']) + '
    ' + yield '

    ' + if edg.get('ldMda', {}).get('cmsType') == 'image': + for line in parse_lead_image(edg['ldMda']): + yield line + for main in edg['prismData']['mainComponents']: + if main['name'] == 'Body': + for item in main['props']['body']: + if isinstance(item, dict): + if item.get('type', '') == 'inline': + for inl in parse_inline(item): + yield inl + elif isinstance(item, list): + for line in item: + yield ''.join(parse_body(line)) + + +def article_parse(data): + yield "" + for frm in data['frms']: + if not frm: + continue + for mod in frm.get('mods', ()): + for edg in mod.get('edgs', ()): + if edg.get('cmsType') == 'ImmersiveLeadTile': + if 'image' in edg.get('cmsImage', {}): + for line in parse_lead_image(edg['cmsImage']): + yield line + if edg.get('cmsType') == 'ArticleBodyTile': + for line in parse_article(edg): + yield line + yield "" + + +class NatGeo(BasicNewsRecipe): + title = u'National Geographic Magazine' + description = 'The National Geographic, an American monthly magazine' + language = 'en' + encoding = 'utf8' + publisher = 'nationalgeographic.com' + category = 'science, nat geo' + __author__ = 'Kovid Goyal, unkn0wn' + description = 'Inspiring people to care about the planet since 1888' + timefmt = ' [%a, %d %b, %Y]' + no_stylesheets = True + use_embedded_content = False + remove_attributes = ['style'] + remove_javascript = False + masthead_url = 'https://i.natgeofe.com/n/e76f5368-6797-4794-b7f6-8d757c79ea5c/ng-logo-2fl.png?w=600&h=600' + remove_empty_feeds = True + resolve_internal_links = True + + extra_css = ''' + blockquote { color:#404040; } + .byline, i { font-style:italic; color:#202020; } + .cap { font-size:small; } + img {display:block; margin:0 auto;} + .cred { font-style:italic; font-size:small; color:#404040; } + .auth, .time, .sub { font-size:small; color:#5c5c5c; } + ''' + + def parse_index(self): + url = 'https://www.nationalgeographic.com/magazine/issue/' + edition.lower() + self.log('Downloading ', url) + self.timefmt = ' [' + edition + ']' + soup = self.index_to_soup(url) + # png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8')) + # self.cover_url = png[0] + '?w=1000&h=1000' + self.cover_url = soup.find('meta', attrs={'property':'og:image'})['content'].split('?')[0] + '?w=1000' + + name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()}) + self.title = 'National Geographic ' + self.tag_to_string(name) + ans = {} + ans2 = None + if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}): + ans2 = [] + title = self.tag_to_string(photoart) + url = photoart['href'] + if url.startswith('/'): + url = 'https://www.nationalgeographic.com' + url + ans2.append(('Photo Essay', [{'title': title, 'url': url}])) + for gird in soup.findAll(attrs={'class':'GridPromoTile'}): + for article in soup.findAll('article'): + a = article.find('a') + url = a['href'] + if url.startswith('/'): + url = 'https://www.nationalgeographic.com' + url + if '/graphics/' in url: + continue + section = self.tag_to_string(article.find(**classes('SectionLabel'))) + title = self.tag_to_string(article.find(**classes('PromoTile__Title--truncated'))) + articles = ans.setdefault(section, []) + articles.append({'title': title, 'url': url}) + self.log(pformat(ans)) + if ans2: + return list(ans.items()) + ans2 + return list(ans.items()) + + def preprocess_raw_html(self, raw_html, url): + data = extract_json(raw_html) + return '\n'.join(article_parse(data)) + + def preprocess_html(self, soup): + for h2 in soup.findAll('h2'): + h2.name = 'h4' + for img in soup.findAll('img', src=True): + # for high res images use '?w=2000&h=2000' + img['src'] = img['src'] + '?w=600&h=600' + return soup + + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'byline'}) + if summ: + article.summary = self.tag_to_string(summ) + article.text_summary = self.tag_to_string(summ) diff --git a/recipes/nature.recipe b/recipes/nature.recipe index 91eaa74328..5809737e0b 100644 --- a/recipes/nature.recipe +++ b/recipes/nature.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python - +import re from collections import defaultdict from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -51,11 +51,11 @@ class Nature(BasicNewsRecipe): def parse_index(self): soup = self.index_to_soup(BASE + '/nature/current-issue') - self.cover_url = 'https:' + soup.find( + self.cover_url = soup.find( 'img', attrs={'data-test': check_words('issue-cover-image')} )['src'] try: - self.cover_url = self.cover_url.replace("w200", "w500") # enlarge cover size resolution + self.cover_url = re.sub(r"\bw\d+\b", "w1000", self.cover_url) # enlarge cover size resolution except: """ failed, img src might have changed, use default width 200 diff --git a/recipes/nautilus.recipe b/recipes/nautilus.recipe index 4470af6aa2..40aa812c28 100644 --- a/recipes/nautilus.recipe +++ b/recipes/nautilus.recipe @@ -33,7 +33,7 @@ class Nautilus(BasicNewsRecipe): remove_tags = [ classes( 'article-action-list article-bottom-newsletter_box main-post-comments-toggle-wrap main-post-comments-wrapper' - ' social-share supported-one article-collection_box' + ' social-share supported-one article-collection_box browsi-ad' ) ] @@ -63,12 +63,11 @@ class Nautilus(BasicNewsRecipe): ] def get_cover_url(self): - soup = self.index_to_soup('https://www.presspassnow.com/nautilus/issues/') - div = soup.find('li', **classes('product')) - if div: - self.cover_url = div.find('img', src=True)['src'] + soup = self.index_to_soup('https://nautil.us/shop/category/issues/') + a = soup.find('a', attrs={'href':lambda x: x and x.startswith('https://nautil.us/shop/issues/issue-')}) + if a: + self.cover_url = a.img['src'].split('?')[0] return getattr(self, 'cover_url', self.cover_url) - def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'].split('?')[0] diff --git a/recipes/new_scientist.recipe b/recipes/new_scientist.recipe index 5f55d013c7..ccbc2fc73d 100644 --- a/recipes/new_scientist.recipe +++ b/recipes/new_scientist.recipe @@ -25,7 +25,6 @@ __author__ = 'Darko Miletic' newscientist.com ''' -import re from calibre.web.feeds.news import BasicNewsRecipe @@ -37,7 +36,7 @@ def classes(classes): class NewScientist(BasicNewsRecipe): title = 'New Scientist - Online News w. subscription' - description = 'Science news and science articles from New Scientist.' + description = 'Science news and science articles from New Scientist, based on feeds.' language = 'en' publisher = 'Reed Business Information Ltd.' category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software' @@ -49,40 +48,55 @@ class NewScientist(BasicNewsRecipe): needs_subscription = 'optional' remove_empty_feeds = True ignore_duplicate_articles = {'url'} - compress_news_images = False - scale_news_images = True resolve_internal_links = True - extra_css = """ - body{font-family: "PT Serif", serif} - img{margin-bottom: 0.8em; display: block} - .quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em} - .article-title,h2,h3{font-family: "Lato Black", sans-serif} - .strap{font-family: "Lato Light", sans-serif} - .quote{font-family: "Lato Black", sans-serif} - .box-out{font-family: "Lato Regular", sans-serif} - .wp-caption-text{font-family: "Lato Bold", sans-serif; font-size:x-small;} - """ + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://cdn.shopify.com/s/files/1/0266/6843/3505/files/logo.svg?v=1629189295' + + conversion_options = { + 'comment': description, + 'tags': category, + 'publisher': publisher, + 'language': language + } + + extra_css = ''' + img {display:block; margin:0 auto;} + .ArticleHeader__Category { font-size:small; color:#404040; } + .ArticleHeader__Author, .ArticleHeader__DateTimeWrapper { font-size:small; } + .ArticleHeader__Copy { font-style:italic; color:#202020; } + .ArticleImage { font-size:small; text-align:center; } + .ArticleImageCaption__Credit { font-size:smaller; } + ''' keep_only_tags = [ - classes('article-header article__content') + classes('ArticleHeader ArticleContent') ] remove_tags = [ - classes('social__button-container') + dict(name=['svg', 'button']), + classes('ArticleHeader__SocialWrapper AdvertWrapper ReadMoreWithImage ArticleTopics') ] def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-src': True}): - img['src'] = img['data-src'] - for img in soup.findAll('img', attrs={'data-srcset': True}): - img['src'] = img['data-srcset'].split(',')[-1].strip().split()[0] - img['width'] = img['height'] = '' + time = soup.find(**classes('ArticleHeader__DateTimeWrapper')) + if time: + time.name = 'div' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'].split('?')[0] + '?width=700' + for figc in soup.findAll('figcaption'): + for p in figc.findAll('p'): + p.name = 'div' return soup def get_article_url(self, article): ans = BasicNewsRecipe.get_article_url(self, article) return ans.partition('?')[0] + def print_version(self, url): + if '/video/' in url: + return None + return url + def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: @@ -94,7 +108,7 @@ class NewScientist(BasicNewsRecipe): br['email'] = self.username br['password'] = self.password res = br.submit().read() - if b'>Log out<' not in res: + if b'>Your account<' not in res: raise ValueError('Failed to log in to New Scientist, check your username and password') return br @@ -111,19 +125,6 @@ class NewScientist(BasicNewsRecipe): ] def get_cover_url(self): - cover_url = None - soup = self.index_to_soup( - 'https://www.newscientist.com/issue/current/') - cover_item = soup.find( - 'img', attrs={'class': 'issue-new-magazine-cover'}) - if cover_item: - cover_url = self.image_url_processor(None, cover_item['src']) - # Configure series and issue number - issue_nr = soup.find('div', attrs={'class': 'magnavissue'}) - if issue_nr: - if issue_nr.string is not None: - non_decimal = re.compile(r'[^\d.]+') - nr = non_decimal.sub('', issue_nr.string) - self.conversion_options.update({'series': 'New Scientist'}) - self.conversion_options.update({'series_index': nr}) - return cover_url + soup = self.index_to_soup('https://www.newscientist.com/issues/current/') + div = soup.find('div', attrs={'class':'ThisWeeksMagazineHero__CoverInfo'}) + return div.find(**classes('ThisWeeksMagazineHero__ImageLink')).img['src'] diff --git a/recipes/new_scientist_mag.recipe b/recipes/new_scientist_mag.recipe new file mode 100644 index 0000000000..c1559e51e5 --- /dev/null +++ b/recipes/new_scientist_mag.recipe @@ -0,0 +1,110 @@ +''' +newscientist.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class NewScientist(BasicNewsRecipe): + title = 'New Scientist Magazine' + __author__ = 'unkn0wn' + description = ( + 'New Scientist is the world’s most popular weekly science and technology publication. ' + 'We cover international news from a scientific standpoint, and ask the big-picture questions ' + 'about life, the universe and what it means to be human. If someone in the world has a good idea, ' + 'you will read about it in New Scientist.' + ) + language = 'en' + publisher = 'Reed Business Information Ltd.' + category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software' + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + needs_subscription = 'optional' + remove_empty_feeds = True + ignore_duplicate_articles = {'url'} + resolve_internal_links = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://cdn.shopify.com/s/files/1/0266/6843/3505/files/logo.svg?v=1629189295' + + conversion_options = { + 'comment': description, + 'tags': category, + 'publisher': publisher, + 'language': language + } + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + def is_login_form(form): + return "action" in form.attrs and form.attrs['action'] == "/login/" + + br.open('https://www.newscientist.com/login/') + br.select_form(predicate=is_login_form) + br['email'] = self.username + br['password'] = self.password + res = br.submit().read() + if b'>Your account<' not in res: + raise ValueError('Failed to log in to New Scientist, check your username and password') + return br + + # def print_version(self, url): + # return 'https://webcache.googleusercontent.com/search?q=cache:' + url.split('?')[0] + + extra_css = ''' + img {display:block; margin:0 auto;} + .ArticleHeader__Category { font-size:small; color:#404040; } + .ArticleHeader__Author, .ArticleHeader__DateTimeWrapper { font-size:small; } + .ArticleHeader__Copy { font-style:italic; color:#202020; } + .ArticleImage { font-size:small; text-align:center; } + .ArticleImageCaption__Credit { font-size:smaller; } + ''' + + keep_only_tags = [ + classes('ArticleHeader ArticleContent') + ] + + remove_tags = [ + dict(name=['svg', 'button']), + classes('ArticleHeader__SocialWrapper AdvertWrapper ReadMoreWithImage ArticleTopics') + ] + + def parse_index(self): + soup = self.index_to_soup('https://www.newscientist.com/issues/current/') + div = soup.find('div', attrs={'class':'ThisWeeksMagazineHero__CoverInfo'}) + tme = div.find(**classes('ThisWeeksMagazineHero__MagInfoHeading')) + self.log('Downloading issue:', self.tag_to_string(tme)) + self.timefmt = ' [' + self.tag_to_string(tme) + ']' + self.cover_url = div.find(**classes('ThisWeeksMagazineHero__ImageLink')).img['src'] + + feeds = [] + for cont in soup.findAll(attrs={'class':'TableOfContents__Section'}): + sec = self.tag_to_string(cont.find('h3')) + self.log(sec) + articles = [] + for a in cont.findAll('a', attrs={'class':'CardLink'}): + url = a['href'] + if url.startswith('http') is False: + url = 'https://www.newscientist.com' + a['href'] + title = self.tag_to_string(a.find(**classes('Card__Title'))) + desc = '' + desc += self.tag_to_string(a.find(**classes('Card__Category'))) + teaser = a.find(**classes('Card__TeaserCopy')) + if teaser: + desc += ' | ' + self.tag_to_string(teaser) + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description': desc, 'url': url}) + if articles: + feeds.append((sec, articles)) + return feeds + + def preprocess_html(self, soup): + time = soup.find(**classes('ArticleHeader__DateTimeWrapper')) + if time: + time.name = 'div' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'].split('?')[0] + '?width=700' + for figc in soup.findAll('figcaption'): + for p in figc.findAll('p'): + p.name = 'div' + return soup diff --git a/recipes/new_york_review_of_books.recipe b/recipes/new_york_review_of_books.recipe index 5d368ee52c..48bca9d3d7 100644 --- a/recipes/new_york_review_of_books.recipe +++ b/recipes/new_york_review_of_books.recipe @@ -62,7 +62,7 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): # Find cover cover = soup.find('img', attrs={'class':'border-light-gray'}) if cover is not None: - self.cover_url = absurl(cover['src']) + self.cover_url = absurl(cover['data-lazy-src']) self.log('Found cover at:', self.cover_url) # Find date @@ -91,3 +91,8 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): 'description': desc}) return [('Current Issue', articles)] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-lazy-src':True}): + img['src'] = img['data-lazy-src'] + return soup diff --git a/recipes/new_york_review_of_books_no_sub.recipe b/recipes/new_york_review_of_books_no_sub.recipe index 29de2aebec..d179aca304 100644 --- a/recipes/new_york_review_of_books_no_sub.recipe +++ b/recipes/new_york_review_of_books_no_sub.recipe @@ -52,7 +52,7 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): # Find cover cover = soup.find('img', attrs={'class':'border-light-gray'}) if cover is not None: - self.cover_url = absurl(cover['src']) + self.cover_url = absurl(cover['data-lazy-src']) self.log('Found cover at:', self.cover_url) # Find date @@ -81,3 +81,8 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): 'description': desc}) return [('Current Issue', articles)] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-lazy-src':True}): + img['src'] = img['data-lazy-src'] + return soup diff --git a/recipes/new_yorker.recipe b/recipes/new_yorker.recipe index b74c66713f..adaa5e398b 100644 --- a/recipes/new_yorker.recipe +++ b/recipes/new_yorker.recipe @@ -34,6 +34,7 @@ class NewYorker(BasicNewsRecipe): timefmt = ' [%b %d]' encoding = 'utf-8' extra_css = ''' + img { display:block; margin:0 auto; } .byline { font-size:smaller; font-weight: bold;} h3 { margin-bottom: 6px; } .caption { font-size: smaller; font-style: italic; font-weight: normal; } @@ -58,18 +59,53 @@ class NewYorker(BasicNewsRecipe): ), prefixed_classes('ConsentBannerWrapper- ResponsiveCartoonCTA-'), dict(childtypes='iframe'), + dict(name='svg'), ] remove_attributes = ['style'] + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + # Reduce image sizes to get file size below amazon's email + # sending threshold + self.web2disk_options.compress_news_images = True + self.web2disk_options.compress_news_images_auto_size = 5 + self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') + def preprocess_html(self, soup): + w = '/w_320' # use '/w_640' for highres + for img in soup.findAll('img'): + if img.has_attr('srcset'): + for x in img['srcset'].split(): + if w in x: + img['src'] = x + elif img.find_previous_sibling('source', attrs={'srcset':True}): + srcset = img.find_previous_sibling('source', attrs={'srcset':True}) + for x in srcset['srcset'].split(): + if w in x: + img['src'] = x + elif '/w_560' in x: + img['src'] = x + for src in soup.findAll('source'): + src.decompose() for noscript in soup.findAll('noscript'): noscript.name = 'div' return soup + # def preprocess_image(self, img_data, image_url): + # from PIL import Image + # from calibre import fit_image + # from io import BytesIO + # img = Image.open(BytesIO(img_data)).convert('RGB') + # scaled, nwidth, nheight = fit_image(img.width, img.height, 1024, 1024) + # if scaled: + # img = img.resize((nwidth, nheight)) + # buf = BytesIO() + # img.save(buf, format='JPEG') + # return buf.getvalue() + def parse_index(self): - # Get cover - cover_soup = self.index_to_soup('https://www.newyorker.com/archive') cover_img = cover_soup.find( attrs={'class': lambda x: x and 'MagazineSection__cover___' in x}) @@ -126,7 +162,7 @@ class NewYorker(BasicNewsRecipe): # Get description body = story.find(attrs={'class': 'River__dek___CayIg'}) if body is not None: - desc = body.contents[0] + desc = str(body.contents[0]) self.log('Found article:', title) self.log('\t' + url) diff --git a/recipes/newrepublicmag.recipe b/recipes/newrepublicmag.recipe new file mode 100644 index 0000000000..beb43200a6 --- /dev/null +++ b/recipes/newrepublicmag.recipe @@ -0,0 +1,314 @@ +""" +newrepublic.com +""" +import json +from functools import cmp_to_key +from urllib.parse import urljoin, urlencode, urlsplit, urlparse + +from calibre import iswindows +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.utils.date import parse_date +from calibre.web.feeds.news import BasicNewsRecipe + + +_issue_url = "" # example: https://newrepublic.com/magazine/may-2023 + + +def sort_section(a, b, sections_sort): + try: + a_index = sections_sort.index(a["section"]) + except ValueError: + a_index = 999 + try: + b_index = sections_sort.index(b["section"]) + except ValueError: + b_index = 999 + + if a_index < b_index: + return -1 + if a_index > b_index: + return 1 + if a["section"] == b["section"]: + return -1 if a["date"] < b["date"] else 1 + return -1 if a["section"] < b["section"] else 1 + + +class NewRepublicMagazine(BasicNewsRecipe): + title = "The New Republic Magazine" + language = "en" + __author__ = "ping" + description = ( + "Founded in 1914, The New Republic is a media organization dedicated to addressing " + "today’s most critical issues. https://newrepublic.com/magazine" + ) + publication_type = "magazine" + use_embedded_content = False + masthead_url = "https://images.newrepublic.com/f5acdc0030e3212e601040dd24d5c2c0c684b15f.png?w=512&q=65&dpi=1&fit=crop&crop=faces&h=256" + remove_attributes = ["height", "width"] + ignore_duplicate_articles = {"title", "url"} + remove_empty_feeds = True + compress_news_images_auto_size = 6 + requires_version = (5, 0, 0) + + BASE_URL = "https://newrepublic.com" + + extra_css = """ + h1.headline { margin-bottom: 0.4rem; } + h2.subheadline { font-style: italic; margin-bottom: 1rem; font-weight: normal; } + .article-meta { margin-bottom: 1rem; } + .article-meta span { display: inline-block; font-weight: bold; margin-right: 0.5rem; } + .article-meta span:last-child { font-weight: normal; } + div.pullquote { font-size: 1.25rem; margin-left: 0; text-align: center; } + .lede-media img, .article-embed img, img { + display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto; + box-sizing: border-box; + } + .lede-media .caption, .article-embed .caption { font-size: 0.8rem; } + div.author-bios { margin-top: 2rem; font-style: italic; border-top: solid 1px dimgray; } + """ + + def _article_endpoint(self, nid): + """ + Graphql endpoint to fetch full article + :param nid: + :return: + """ + query = """ +query ($id: ID, $nid: ID) { + Article(id: $id, nid: $nid) { + ...ArticlePageFields + } +} +fragment ArticlePageFields on Article { + id + nid + slug + title + cleanTitle + badge + frontPage { + id + slug + title + } + LinkedSeriesId + authors { + id + name + slug + blurb + meta { + twitter + } + } + body + publishedAt + displayAt + publicPublishedDate + status + ledeImage { + id + src + format + width + height + alt + } + ledeAltImage { + id + src + format + width + height + alt + } + url + urlFull + meta { + wordCount + template + navigationTheme + bigLede + hideLede + cropModeFronts + ledeOverrideSource + disableAds + } + ledeImageCredit + ledeImageCreditBottom + ledeImageRealCaption + bylines + deck + type + galleries { + id + galleryData { + captionText + creditText + image { + id + src + width + height + } + } + } + tags { + id + slug + label + } +}""" + params = {"query": query, "variables": json.dumps({"nid": str(nid)})} + return f"https://newrepublic.com/graphql?{urlencode(params)}" + + def _resize_image(self, image_url, width, height): + """ + Rewrite the image url to fetch a device appropriate sized one instead + of the full-res one + + :param image_url: + :param width: + :param height: + :return: + """ + crop_params = { + "auto": "compress", + "ar": f"{width}:{height}", + "fm": "jpg", + "fit": "crop", + "crop": "faces", + "ixlib": "react-9.0.2", + "dpr": 1, + "q": 65, + "w": self.scale_news_images[0] if self.scale_news_images else 800, + } + url_tuple = urlsplit(image_url) + return f"{url_tuple.scheme}://{url_tuple.netloc}{url_tuple.path}?{urlencode(crop_params)}" + + def populate_article_metadata(self, article, soup, first): + # pick up the og link from preprocess_raw_html() and set it as url instead of the api endpoint + og_link = soup.select("[data-og-link]") + if og_link: + article.url = og_link[0]["data-og-link"] + + def preprocess_raw_html(self, raw_html, url): + # formulate the api response into html + article = json.loads(raw_html)["data"]["Article"] + # Example: 2022-08-12T10:00:00.000Z + date_published_loc = parse_date(article["publishedAt"]) + # authors + author_bios_html = "" + post_authors = [] + try: + post_authors = [a["name"] for a in article.get("authors", [])] + if post_authors: + author_bios_html = "".join( + [a.get("blurb", "") for a in article.get("authors", [])] + ) + author_bios_html = f'
    {author_bios_html}
    ' + except (KeyError, TypeError): + pass + + # lede image + lede_image_html = "" + if article.get("ledeImage"): + img = article["ledeImage"] + lede_img_url = self._resize_image( + urljoin(self.BASE_URL, img["src"]), img["width"], img["height"] + ) + lede_image_caption = "" + if article.get("ledeImageRealCaption"): + lede_image_caption = ( + f'{article["ledeImageRealCaption"]}>/span>' + ) + lede_image_html = f"""

    + {lede_image_caption} +

    """ + + body_soup = BeautifulSoup(article["body"], features="html.parser") + for img in body_soup.find_all("img", attrs={"data-serialized": True}): + try: + img_info = json.loads(img["data-serialized"]) + img_src = self._resize_image( + urljoin(self.BASE_URL, img_info["src"]), + img_info["width"], + img_info["height"], + ) + img["src"] = img_src + del img["data-serialized"] + except: # noqa + pass + + return f""" + {article["cleanTitle"]} + +
    +

    {article["cleanTitle"]}

    + {('

    ' + article["deck"] + "

    ") if article.get("deck") else ""} + + {lede_image_html} + {str(body_soup)} + {author_bios_html} +
    + """ + + def parse_index(self): + br = self.get_browser() + params = "" + if _issue_url: + month = urlparse(_issue_url).path.split("/")[-1] + params = f'?{urlencode({"magazineTag": month})}' + res = br.open_novisit(f"https://newrepublic.com/api/content/magazine{params}") + magazine = json.loads(res.read().decode("utf-8"))["data"] + self.log.debug(f'Found issue: {magazine["metaData"]["issueTag"]["text"]}') + self.timefmt = f': {magazine["metaData"]["issueTag"]["text"]}' + self.cover_url = urljoin(self.BASE_URL, magazine["metaData"]["image"]["src"]) + + feed_articles = [] + for k, articles in magazine.items(): + if not (k.startswith("magazine") and articles): + continue + try: + for article in articles: + self.log.debug(f'Found article: {article["title"]}') + feed_articles.append( + { + "url": self._article_endpoint(article["nid"]), + "title": article["title"].replace("\n", " "), + "description": article.get("deck", ""), + "date": article["publishedAt"], + "section": k[len("magazine") :], + } + ) + except TypeError: + # not iterable + pass + + sort_sections = [ + "Cover", + "Editorsnote", + "Features", + "StateOfTheNation", + "ResPublica", + "Columns", + "Upfront", + "Backstory", + "SignsAndWonders", + "Usandtheworld", + "Booksandthearts", + "Poetry", + "Exposure", + ] + sort_category_key = cmp_to_key(lambda a, b: sort_section(a, b, sort_sections)) + return [ + ( + magazine["metaData"]["issueTag"]["text"], + sorted(feed_articles, key=sort_category_key), + ) + ] diff --git a/recipes/newslaundry.recipe b/recipes/newslaundry.recipe new file mode 100644 index 0000000000..9873b60157 --- /dev/null +++ b/recipes/newslaundry.recipe @@ -0,0 +1,37 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class newslaundry(BasicNewsRecipe): + title = 'Newslaundry' + __author__ = 'unkn0wn' + description = ( + 'Newslaundry is a reader-supported, independent news media company. In an industry driven by corporate' + ' and government interests, we strongly believe in the need for an independent news model, and a free' + ' and accountable press.' + ) + language = 'en_IN' + masthead_url = 'https://images.assettype.com/newslaundry/2020-01/d91cad07-9650-47e9-8bdc-9a6247354d95/Header_logo_NL__2_New.png' + encoding = 'utf-8' + no_stylesheets = True + remove_javascript = True + oldest_article = 7 # days + resolve_internal_links = True + + ignore_duplicate_articles = {'url'} + + # keep_only_tags = [classes('headline subheadline authorWithTimeStamp story-card')] + + feeds = [ + ('Articles', 'https://www.newslaundry.com/stories.rss?time-period=last-7-days') + ] + + # def preprocess_html(self, soup): + # if h1 := soup.find(**classes('headline')): + # h1.name = 'h1' + # if h3 := soup.find(**classes('subheadline')): + # h3.name = 'h3' + # return soup + + def print_version(self, url): + if 'hindi.newslaundry' in url: + self.abort_article('Skipping hindi article') # remove this line if you want hindi articles. + return url diff --git a/recipes/newsminute.recipe b/recipes/newsminute.recipe new file mode 100644 index 0000000000..ac675581a9 --- /dev/null +++ b/recipes/newsminute.recipe @@ -0,0 +1,70 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.ptempfile import PersistentTemporaryFile + + +class newsminute(BasicNewsRecipe): + title = 'The News Minute' + __author__ = 'unkn0wn' + description = ( + 'The News Minute is a digital news platform reporting and writing on issues in India, with a ' + 'specific focus on the 5 southern states. Our content includes news, ground reportage, news ' + 'analysis, opinion and blogs. Our core strengths include our deep access in the southern states, ' + 'incisive editorial acumen and insightful news analysis and opinions.' + ) + language = 'en_IN' + + no_stylesheets = True + remove_javascript = True + masthead_url = 'https://pkcindia.com/wp-content/uploads/2021/09/TMN-Logo-1.png' + ignore_duplicate_articles = {'title', 'url'} + resolve_internal_links = True + remove_empty_feeds = True + remove_attributes = ['style', 'height', 'width'] + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + try: + br.open(url) + except Exception as e: + url = e.hdrs.get('location') + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', 'podcast-' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + keep_only_tags = [ + classes( + 'arr--section-name arr--story--headline-h1 arr--sub-headline arr--hero-image author-card-wrapper arr--story-page-card-wrapper' + ), + ] + + feeds = [] + + sections = [ + 'tamil-nadu', 'telangana', 'andhra-pradesh', 'karnataka', 'kerala' + ] + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:https%3A%2F%2Fwww.thenewsminute.com{}&hl=en-IN&gl=IN&ceid=IN:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + feeds.append(('Others', a.format(''))) + + def populate_article_metadata(self, article, soup, first): + article.title = article.title.replace(' - The News Minute', '') + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/nhk_news.recipe b/recipes/nhk_news.recipe new file mode 100644 index 0000000000..61e906aa81 --- /dev/null +++ b/recipes/nhk_news.recipe @@ -0,0 +1,31 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +# feed source: https://www.nhk.or.jp/toppage/rss/index.html + + +class ReutersJa(BasicNewsRecipe): + + title = 'NHK News' + description = 'NHK News in Japanese' + __author__ = 'Richard A. Steps' + use_embedded_content = False + language = 'ja' + max_articles_per_feed = 30 + remove_javascript = True + auto_cleanup = True + + # This line added to deal with bots on site + def get_browser(self, *a, **kw): + kw['user_agent'] = 'common_words/based' + return super().get_browser(*a, **kw) + + feeds = [ + ('主要ニュース', 'https://www.nhk.or.jp/rss/news/cat0.xml?format=xml'), + ('社会', 'https://www.nhk.or.jp/rss/news/cat1.xml?format=xml'), + ('科学・医療', 'https://www.nhk.or.jp/rss/news/cat3.xml?format=xml'), + ('政治', 'https://www.nhk.or.jp/rss/news/cat4.xml?format=xml'), + ('経済', 'https://www.nhk.or.jp/rss/news/cat5.xml?format=xml'), + ('国際', 'https://www.nhk.or.jp/rss/news/cat6.xml?format=xml'), + ('スポーツ', 'https://www.nhk.or.jp/rss/news/cat7.xml?format=xml'), + ('文化・エンタメ', 'https://www.nhk.or.jp/rss/news/cat2.xml?format=xml') + ] diff --git a/recipes/nikkeiasia.recipe b/recipes/nikkeiasia.recipe new file mode 100644 index 0000000000..74ffdc3ae6 --- /dev/null +++ b/recipes/nikkeiasia.recipe @@ -0,0 +1,67 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes + +def absurl(url): + if url.startswith('/'): + url = 'https://asia.nikkei.com' + url + return url + +class nikkei(BasicNewsRecipe): + title = 'Nikkei Asia' + __author__ = 'unkn0wn' + language = 'en' + no_stylesheets = True + description = ( + 'Japan, China, India and Southeast Asia news and expert analysis published by Nikkei' + ', an award-winning independent provider of quality journalism.' + ) + masthead_url = 'https://www.global-nikkei.com/22ia/images/logo/Nikkei-Asia-Logo.svg' + remove_attributes = ['style', 'height', 'width'] + ignore_duplicate_articles = {'url'} + resolve_internal_links = True + remove_empty_feeds = True + encoding = 'utf-8' + use_embedded_content = False + + extra_css = ''' + .article-header__sub-title { font-style:italic; color:#202020; } + .article-header__details, .article__details { font-size:small; font-weight:bold; } + .timestamp { color:#5c5c5c; } + .article-header__topic { font-size:small; font-weight:bold; color:#5c5c5c; } + .article__image, .article__caption { font-size:small; text-align:center; color:#202020; } + ''' + + keep_only_tags = [ + classes('article-header__container article') + ] + + remove_tags = [ + dict(name='svg'), + classes('article__advert share__container no-print') + ] + + def parse_index(self): + archives = self.index_to_soup('https://asia.nikkei.com/Print-Edition/Archives') + card = archives.find(attrs={'class':'card-article__body'}) + self.title = 'Nikkei Asia: ' + self.tag_to_string(card.h4).strip() + self.description = self.tag_to_string(card.p) + self.timefmt = ' [' + self.tag_to_string(card.span.time).strip() + ']' + self.log('Downloading ', self.title, self.timefmt, self.description) + + soup = self.index_to_soup(absurl(card.h4.a['href'])) + self.cover_url = soup.find(**classes('print-edition__cover-image')).img['src'] + + ans = [] + + for art in soup.findAll(**classes('card-article__body')): + head = art.find(**classes('card-article__headline')) + title = self.tag_to_string(head).strip() + url = absurl(head.a['href']) + desc = '' + if exc := art.find(**classes('card-article__excerpt')): + desc = self.tag_to_string(exc).strip() + self.log( title, '\n ', desc, '\n ', url ) + ans.append({'title': title, 'url': url, 'description': desc}) + return [('Articles', ans)] + + def print_version(self, url): + return 'https://webcache.googleusercontent.com/search?q=cache:' + url.split('?')[0] diff --git a/recipes/noaa.recipe b/recipes/noaa.recipe index 335711cc99..0d653bb525 100644 --- a/recipes/noaa.recipe +++ b/recipes/noaa.recipe @@ -29,7 +29,7 @@ class NOAA(BasicNewsRecipe): use_embedded_content = False simultaneous_downloads = 1 encoding = 'utf-8' - lang = 'en-US' + lang = 'en' language = 'en' remove_tags = [dict(name=['embed', 'object']), dict(name='div', attrs={'id': 'leftNav'}), dict(name='div', attrs={'id': 'topNav'}), dict(name='div', attrs={'class': 'feedback_box'}), dict(name='div', attrs={'id': 'midBlock'}), dict(name='div', attrs={'id': 'footer'}) ] # noqa diff --git a/recipes/nos_nl.recipe b/recipes/nos_nl.recipe index 584777c39e..4a4f377b77 100644 --- a/recipes/nos_nl.recipe +++ b/recipes/nos_nl.recipe @@ -8,7 +8,7 @@ class nosnl(BasicNewsRecipe): __author__ = u'erkfuizfeuadjfjzefzfuzeff' description = u'News from the Netherlands in Dutch' oldest_article = 7 - language = 'nl_NL' + language = 'nl' max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/nymag.recipe b/recipes/nymag.recipe index 2bc8166ddb..46050559af 100644 --- a/recipes/nymag.recipe +++ b/recipes/nymag.recipe @@ -24,12 +24,10 @@ class NewYorkMagazine(BasicNewsRecipe): remove_javascript = True encoding = 'utf-8' keep_only_tags = [ - classes('lede-text headline-primary article-timestamp by-authors'), - dict(id='main'), - dict(itemprop='articleBody'), + dict(name='article', attrs={'class':lambda x: x and 'article' in x.split()}) ] remove_tags = [ - classes('related-stories start-discussion'), + classes('related-stories start-discussion newsletter-flex-text comments-link tags related secondary-area'), dict(id=['minibrowserbox', 'article-related', 'article-tools']) ] remove_attributes = ['srcset'] @@ -70,6 +68,9 @@ class NewYorkMagazine(BasicNewsRecipe): return feeds def preprocess_html(self, soup): + if lede := soup.findAll('div', attrs={'class':lambda x: x and 'lede-image-wrapper' in x.split()}): + if len(lede) > 1: + lede[1].extract() for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index be947f6591..d11c5cc92a 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -197,7 +197,33 @@ class NewYorkTimes(BasicNewsRecipe): def parse_article_group(self, container): for li in container.findAll('li'): article = li.find('article') - h2 = article.find('h2') + if article is None: + a = li.find('a', href=True) + if a is not None: + title = self.tag_to_string(li.find(['h3', 'h2'])).strip() + paras = li.findAll('p') + if not title: + title = self.tag_to_string(paras[0]).strip() + if not title: + raise ValueError('No title found in article') + url = a['href'] + if url.startswith('/'): + url = 'https://www.nytimes.com' + url + desc = '' + if len(paras) > 0: + desc = self.tag_to_string(paras[-1]) + date = '' + d = date_from_url(url) + if d is not None: + date = format_date(d) + today = datetime.date.today() + delta = today - d + if delta.days > oldest_web_edition_article: + self.log.debug('\tSkipping article', title, 'as it is too old') + continue + yield {'title': title, 'url': url, 'description': desc, 'date': date} + continue + h2 = article.find(['h2', 'h3']) if h2 is not None: title = self.tag_to_string(h2) a = h2.find('a', href=True) diff --git a/recipes/nytimes_cooking.recipe b/recipes/nytimes_cooking.recipe index dc466d246c..76873b0ff5 100644 --- a/recipes/nytimes_cooking.recipe +++ b/recipes/nytimes_cooking.recipe @@ -7,7 +7,7 @@ class NYTCooking(BasicNewsRecipe): title = 'NY Times Cooking' description = 'NY Times Cooking Magazine' __author__ = 'gourav' - language = 'en_US' + language = 'en' encoding = 'utf-8' oldest_article = 2 max_articles_per_feed = 30 diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 4f26842323..36455d2d83 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -197,7 +197,33 @@ class NewYorkTimes(BasicNewsRecipe): def parse_article_group(self, container): for li in container.findAll('li'): article = li.find('article') - h2 = article.find('h2') + if article is None: + a = li.find('a', href=True) + if a is not None: + title = self.tag_to_string(li.find(['h3', 'h2'])).strip() + paras = li.findAll('p') + if not title: + title = self.tag_to_string(paras[0]).strip() + if not title: + raise ValueError('No title found in article') + url = a['href'] + if url.startswith('/'): + url = 'https://www.nytimes.com' + url + desc = '' + if len(paras) > 0: + desc = self.tag_to_string(paras[-1]) + date = '' + d = date_from_url(url) + if d is not None: + date = format_date(d) + today = datetime.date.today() + delta = today - d + if delta.days > oldest_web_edition_article: + self.log.debug('\tSkipping article', title, 'as it is too old') + continue + yield {'title': title, 'url': url, 'description': desc, 'date': date} + continue + h2 = article.find(['h2', 'h3']) if h2 is not None: title = self.tag_to_string(h2) a = h2.find('a', href=True) diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe index ffd6d463f8..a1b6df9dda 100644 --- a/recipes/nytimesbook.recipe +++ b/recipes/nytimesbook.recipe @@ -62,7 +62,7 @@ class NewYorkTimesBookReview(BasicNewsRecipe): main_articles, articles = [], [] feeds = [('Features', main_articles), ('Latest', articles)] for li in toc.findAll('li'): - h2 = li.find('h2') + h2 = li.find(['h2', 'h3']) a = h2.find('a', href=True) if a is not None: title = self.tag_to_string(a) @@ -77,7 +77,7 @@ class NewYorkTimesBookReview(BasicNewsRecipe): if desc: self.log('\t', desc) for li in soup.find(id='stream-panel').find('ol').findAll('li'): - h2 = li.find('h2') + h2 = li.find(['h2', 'h3']) a = h2.findParent('a') url = absolutize(a['href']) p = h2.findNextSibling('p') diff --git a/recipes/observer_reach_foundation.recipe b/recipes/observer_reach_foundation.recipe new file mode 100644 index 0000000000..c1b88a6ea6 --- /dev/null +++ b/recipes/observer_reach_foundation.recipe @@ -0,0 +1,60 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class ORF(BasicNewsRecipe): + title = u'Observer Research Foundation' + description = ( + 'Set up in 1990, ORF seeks to lead and aid policy thinking towards building a strong and prosperous India' + ' in a fair and equitable world. It helps discover and inform India’s choices, and carries Indian voices ' + 'and ideas to forums shaping global debates. ' + ) + language = 'en_IN' + __author__ = 'unkn0wn' + oldest_article = 7.5 # days + max_articles_per_feed = 25 + encoding = 'utf-8' + masthead_url = 'https://www.orfonline.org/wp-content/uploads/2015/09/Logo_ORF_JPEG.jpg' + remove_attributes = ['style', 'height', 'width'] + ignore_duplicate_articles = {'url'} + + extra_css = ''' + .report-slider {font-size:small; color:#404040;} + .report {font-size:small; font-weight:bold;} + .excert-italic, .recent-block-people {font-style:italic; color:#202020;} + blockquote, em {color:#202020;} + ''' + + def get_browser(self): + return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + + resolve_internal_links = True + remove_empty_feeds = True + + keep_only_tags = [classes('recent-updates-block recent-block-people')] + remove_tags = [ + classes( + 'social socialshare comment-area-section telegramhtml post-tag ' + 'research-prev research-next' + ) + ] + + feeds = [ + ('Commentaries', 'https://www.orfonline.org/content-type/commentary/feed/'), + ('Expert Speak', 'https://www.orfonline.org/expert-speak/feed/'), + ('Books and Monographs', 'https://www.orfonline.org/content-type/books/feed/'), + ('Event Reports', 'https://www.orfonline.org/content-type/event-reports/feed/'), + ('Events', 'https://www.orfonline.org/content-type/events/feed/'), + ('Forums', 'https://www.orfonline.org/content-type/forums/feed/'), + ('GP-ORF Series', 'https://www.orfonline.org/content-type/gp-orf-series/feed/'), + ('Issue Briefs & Special Reports', 'https://www.orfonline.org/content-type/issue-brief/feed/'), + ('Monitors', 'https://www.orfonline.org/content-type/monitors/feed/'), + ('Occasional Papers', 'https://www.orfonline.org/content-type/occasional-paper/feed/'), + ('Primer', 'https://www.orfonline.org/content-type/primer/feed/'), + ('Series', 'https://www.orfonline.org/content-type/series/feed/'), + ('Surveys & Polls', 'https://www.orfonline.org/content-type/surveys-polls/feed/'), + ('Young Voices', 'https://www.orfonline.org/content-type/young-voices/feed/'), + ] + + def print_version(self, url): + if 'marathi' in url or 'hindi' in url or 'bangla' in url: + return '' + return url diff --git a/recipes/onda_rock.recipe b/recipes/onda_rock.recipe index 7c96cc5385..fc0f0d811d 100644 --- a/recipes/onda_rock.recipe +++ b/recipes/onda_rock.recipe @@ -26,5 +26,16 @@ class AdvancedUserRecipe1328535130(BasicNewsRecipe): masthead_url = 'http://api.ning.com/files/4ot8ampp*-rYQuwL2NoaHvVqcyu7VMyWyan12a9QMsJUWxk-q5V1-34wnD-Wj9B5qWjc1yPMLGiwQg8hZJxaySeaG2lx8hpV/2009_banner_ondarock.gif' # noqa extra_css = ''' # noqa - .boxtabscontain_page {border: 1px solid #E0E0E0;clear: both;font-family: "Verdana", "Arial", "Helvetica", sans-serif;font-size: 10px;line-height: 17px;margin: 0px 0px 20px;padding: 10px 10px 10px 40px;position: relative;top: -1px;width: 258px;z-index: 1;} - ''' + .boxtabscontain_page { + border: 1px solid #E0E0E0;clear: both; + font-family: "Verdana", "Arial", "Helvetica", sans-serif; + font-size: 10px; + line-height: 17px; + margin: 0px 0px 20px; + padding: 10px 10px 10px 40px; + position: relative; + top: -1px; + width: 258px; + z-index: 1; + } + ''' diff --git a/recipes/outlook_india.recipe b/recipes/outlook_india.recipe index 8c4f5bb7e1..72a59b6f8b 100644 --- a/recipes/outlook_india.recipe +++ b/recipes/outlook_india.recipe @@ -18,23 +18,30 @@ class outlook(BasicNewsRecipe): remove_attributes = ['height', 'width', 'style'] ignore_duplicate_articles = {'url'} resolve_internal_links = True - masthead_url = 'https://www.outlookindia.com/images/home_new_v4/logo_outlook.svg' + extra_css = ''' + .story-summary{font-style:italic; color:#202020;} + .author_wrapper, .relatedCategory{font-size:small; color:#404040;} + #figcap{font-size:small; text-align:center;} + ''' keep_only_tags = [classes('__story_detail')] remove_tags = [ classes( - 'social_sharing_article left_trending left-sticky __tag_links' - ' next_prev_stories downarrow uparrow more_from_author_links next prev __related_stories_thumbs' + 'social_sharing_article left_trending left-sticky __tag_links next_prev_stories ' + 'downarrow uparrow more_from_author_links next prev __related_stories_thumbs home_ad_title' ) ] + def get_browser(self): + return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False) + def parse_index(self): soup = self.index_to_soup('https://www.outlookindia.com/magazine') div = soup.find('div', attrs={'class':'wrapper'}) a = div.find('a', href=lambda x: x and x.startswith('/magazine/issue/')) url = a['href'] - self.log('Downloading issue:', url) - self.timefmt = ' [' + self.tag_to_string(a) + ']' + self.timefmt = ' [' + self.tag_to_string(a.find('p')).strip() + ']' + self.log('Downloading issue:', url, self.timefmt) soup = self.index_to_soup('https://www.outlookindia.com' + url) cover = soup.find(**classes('listingPage_lead_story')) self.cover_url = cover.find('img', attrs={'src': True})['src'] @@ -42,7 +49,7 @@ class outlook(BasicNewsRecipe): for h3 in soup.findAll(['h3', 'h4'], attrs={'class': 'tk-kepler-std-condensed-subhead'}): - a = h3.find('a', href=lambda x: x) + a = h3.find('a', href=True) url = a['href'] title = self.tag_to_string(a) desc = '' @@ -55,6 +62,11 @@ class outlook(BasicNewsRecipe): ans.append({'title': title, 'url': url, 'description': desc}) return [('Articles', ans)] + def preprocess_html(self,soup): + for fig in soup.findAll('figure'): + fig['id'] = 'figcap' + return soup + def preprocess_raw_html(self, raw, *a): return raw m = re.search('.*?script.*?>', raw, flags=re.DOTALL) diff --git a/recipes/pc_world.recipe b/recipes/pc_world.recipe index 3ede833131..589745a526 100644 --- a/recipes/pc_world.recipe +++ b/recipes/pc_world.recipe @@ -1,75 +1,98 @@ #!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' -__copyright__ = '2009, Lorenzo Vigentini ' -__version__ = 'v1.01' -__date__ = '14, January 2010' -__description__ = 'PC World and Macworld consistently deliver editorial excellence through award-winning content and trusted product reviews.' ''' http://www.pcworld.com/ ''' -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ptempfile import PersistentTemporaryFile - -temp_files = [] -articles_are_obfuscated = True +from calibre.web.feeds.news import BasicNewsRecipe, classes class pcWorld(BasicNewsRecipe): - __author__ = 'Lorenzo Vigentini' - description = 'PC World and Macworld consistently deliver editorial excellence through award-winning content and trusted product reviews.' - cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif' - - title = 'PCWorld ' + __author__ = 'unkn0wn' + description = 'PCWorld helps you navigate the PC ecosystem to find the products you want and the advice you need to get the job done.' + title = 'PCWorld' publisher = 'IDG Communication' - category = 'PC, video, computing, product reviews, editing, cameras, production' - language = 'en' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 7 - max_articles_per_feed = 20 - use_embedded_content = False - recursion = 10 - + encoding = 'utf-8' + ignore_duplicate_articles = {'url'} remove_javascript = True - no_stylesheets = True - auto_cleanup = True + resolve_internal_links = True + remove_empty_feeds = True + remove_attributes = ['height', 'width'] - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url + '&print') + extra_css = ''' + .entry-meta, .imageCredit {font-size:small;} + .entry-eyebrow, .article_author_box_bio {font-size:small; color:#404040;} + .subheadline {font-style:italic; color:#202020;} + ''' - response = br.follow_link(url, nr=0) - html = response.read() - - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name - - feeds = [ - (u'All Stories', u'http://www.pcworld.com/index.rss'), - (u'Reviews', u'http://www.pcworld.com/reviews/index.rss'), - (u'How-To', u'http://www.pcworld.com/howto/index.rss'), - (u'Video', u'http://www.pcworld.com/video/index.rss'), - (u'Game On', u'http://www.pcworld.com/column/game-on/index.rss'), - (u'Hassle free PC', u'http://www.pcworld.com/column/hassle-free-pc/index.rss'), - (u'Go Social', u'http://www.pcworld.com/column/go-social/index.rss'), - (u'Linux Line', u'http://www.pcworld.com/column/linux-line/index.rss'), - (u'Net Work', u'http://www.pcworld.com/column/net-work/index.rss'), - (u'Security Alert', u'http://www.pcworld.com/column/security-alert/index.rss'), - (u'Simply Business', u'http://www.pcworld.com/column/simply-business/index.rss'), - (u'Business', u'http://www.pcworld.com/category/business/index.rss'), - (u'Security & Privacy', u'http://www.pcworld.com/category/privacy/index.rss'), - (u'Windows', u'http://www.pcworld.com/category/windows/index.rss'), - (u'Laptops', u'http://www.pcworld.com/category/laptop-computers/index.rss'), - (u'Software', u'http://www.pcworld.com/category/software/index.rss'), - (u'Desktops', u'http://www.pcworld.com/category/desktop-computers/index.rss'), - (u'Printers', u'http://www.pcworld.com/category/printers/index.rss'), - (u'Phones', u'http://www.pcworld.com/category/phones/index.rss'), - (u'Tablets', u'http://www.pcworld.com/category/tablets/index.rss') + keep_only_tags = [ + classes('entry-header post-thumbnail'), + dict(name='div', attrs={'id':'link_wrapped_content'}), + classes('article_author_box_bio') ] + def parse_index(self): + + section_list = [ + ('PC & Components', 'pc-components'), + ('Laptops', 'laptops'), + ('Mobile', 'mobile'), + ('How-To', 'howto'), + ('Gaming', 'gaming'), + ('Windows', 'windows'), + ('Best-Picks','best-picks'), + ('Reviews', 'reviews'), + ('Security', 'security'), + ('Smart Tech', 'smart-tech'), + ('Software', 'software'), + ('WiFi & Networks', 'wifi-networks'), + ('Deals', 'deals'), + ('Business', 'business'), + ('Entertainment', 'entertainment'), + ] + + feeds = [] + + # For each section title, fetch the article urls + for section in section_list: + section_title = section[0] + section_url = 'https://www.pcworld.com/' + section[1] + self.log(section_title, section_url) + soup = self.index_to_soup(section_url) + articles = self.articles_from_soup(soup) + if articles: + feeds.append((section_title, articles)) + return feeds + + def articles_from_soup(self, soup): + ans = [] + feed = soup.find('div', attrs={'class':lambda x: x and 'articleFeed-inner' in x.split()}) + for item in feed.findAll('div', attrs={'class':'item-text-inner'}): + a = item.find('h3').find('a', href=True) + title = self.tag_to_string(a) + url = a['href'] + desc = '' + if span := item.find(attrs={'class':'item-excerpt'}): + desc = self.tag_to_string(span) + if byline := item.find(attrs={'class':'item-byline'}): + desc = self.tag_to_string(byline) + ' | ' + desc + if eye := item.find(attrs={'class':lambda x: x and 'item-eyebrow' in x.split()}): + desc = self.tag_to_string(eye) + ' | ' + desc + if itdate := item.find(attrs={'class':'item-date'}): + date = self.tag_to_string(itdate) + check = 'hours', 'day', 'days' # skipping articles older than a week + if not any(x in date for x in check): + continue + if not url or not title: + continue + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + ans.append({'title': title, 'url': url, 'description': desc}) + return ans + + def get_cover_url(self): + soup = self.index_to_soup( + 'https://www.magzter.com/US/IDG-Consumer-and-SMB,-Inc./PCWorld/Computer-&-Mobile/' + ) + for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + return citem['content'] diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe index 2354cd0651..350a57b4f0 100644 --- a/recipes/phillosophy_now.recipe +++ b/recipes/phillosophy_now.recipe @@ -21,16 +21,25 @@ class PhilosophyNow(BasicNewsRecipe): remove_attributes = ['height', 'width', 'style'] encoding = 'utf-8' ignore_duplicate_articles = {'url'} + masthead_url = 'https://philosophynow.org/media/images/regulars/logoStructuredData.png' keep_only_tags = [classes('article_page')] remove_tags = [dict(name='div', attrs={'id':'welcome_box'})] + extra_css = ''' + img {display:block; margin:0 auto;} + .articleImageCaption { font-size:small; text-align:center; } + em, blockquote { color:#202020; } + ''' def parse_index(self): soup = self.index_to_soup('https://philosophynow.org/') div = soup.find('div', attrs={'id': 'aside_issue_cover'}) url = div.find('a', href=True)['href'] - for issue in div.findAll('div', attrs={'id':'aside_issue_text'}): + issue = div.find('div', attrs={'id':'aside_issue_text'}) + if issue: self.log('Downloading issue:', self.tag_to_string(issue).strip()) + self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']' + self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'})) cov_url = div.find('img', src=True)['src'] self.cover_url = 'https://philosophynow.org' + cov_url soup = self.index_to_soup('https://philosophynow.org' + url) diff --git a/recipes/poetrymagazine.recipe b/recipes/poetrymagazine.recipe new file mode 100644 index 0000000000..0aeae8c4f3 --- /dev/null +++ b/recipes/poetrymagazine.recipe @@ -0,0 +1,135 @@ +import re +from collections import OrderedDict +from urllib.parse import urlparse + +from calibre.web.feeds.news import BasicNewsRecipe + +_issue_url = "" + +COMMA_SEP_RE = re.compile(r"\s*,\s*") +SPACE_SEP_RE = re.compile(r"\s+") +NON_NUMERIC_RE = re.compile(r"[^\d]+") + + +class Poetry(BasicNewsRecipe): + title = "Poetry Magazine" + __author__ = "ping" + description = ( + "Founded in Chicago by Harriet Monroe in 1912, Poetry is the oldest monthly " + "devoted to verse in the English-speaking world. https://www.poetryfoundation.org/poetrymagazine" + ) + publication_type = "magazine" + language = "en" + encoding = "utf-8" + remove_javascript = True + no_stylesheets = True + auto_cleanup = False + ignore_duplicate_articles = {"url"} + compress_news_images = False + + remove_attributes = ["style", "font"] + keep_only_tags = [dict(name="article")] + + remove_tags = [ + dict(name="button"), + dict( + attrs={ + "class": [ + "c-socialBlocks", + "c-index", + "o-stereo", + "u-hideAboveSmall", + "c-slideTrigger", + "js-slideshow", + ] + } + ), + ] + + extra_css = """ + h1 { font-size: 1.8rem; margin-bottom: 0.5rem; } + .o-titleBar-summary { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; } + div.o-titleBar-meta, div.c-feature-sub { font-weight: bold; color: #444; margin-bottom: 1.5rem; } + div.pcms_media img, div.o-mediaEnclosure img { max-width: 100%; height: auto; } + div.o-mediaEnclosure .o-mediaEnclosure-metadata { font-size: 0.8rem; margin-top: 0.2rem; } + div.c-feature-bd { margin-bottom: 2rem; } + div.c-auxContent { color: #222; font-size: 0.85rem; margin-top: 2rem; } + """ + + def extract_from_img_srcset(self, srcset: str, max_width=0): + sources = [s.strip() for s in COMMA_SEP_RE.split(srcset) if s.strip()] + if len(sources) == 1: + # just a regular img url probably + return sources[0] + parsed_sources = [] + for src in sources: + src_n_width = [s.strip() for s in SPACE_SEP_RE.split(src) if s.strip()] + if len(src_n_width) != 2: + raise ValueError(f"Not a valid srcset: {srcset}") + parsed_sources.append( + ( + src_n_width[0].strip(), + int(NON_NUMERIC_RE.sub("", src_n_width[1].strip())), + ) + ) + parsed_sources = list(set(parsed_sources)) + parsed_sources = sorted(parsed_sources, key=lambda x: x[1], reverse=True) + if not max_width: + return parsed_sources[0][0] + for img, width in parsed_sources: + if width <= max_width: + return img + return parsed_sources[-1][0] + + def preprocess_html(self, soup): + for img in soup.select("div.o-mediaEnclosure img"): + if not img.get("srcset"): + continue + img["src"] = self.extract_from_img_srcset(img["srcset"], max_width=1000) + return soup + + def parse_index(self): + if _issue_url: + soup = self.index_to_soup(_issue_url) + else: + soup = self.index_to_soup("https://www.poetryfoundation.org/poetrymagazine") + current_issue = soup.select("div.c-cover-media a") + if not current_issue: + self.abort_recipe_processing("Unable to find latest issue") + current_issue = current_issue[0] + soup = self.index_to_soup(current_issue["href"]) + + issue_edition = self.tag_to_string(soup.find("h1")) + self.timefmt = f" [{issue_edition}]" + cover_image = soup.select("div.c-issueBillboard-cover-media img")[0] + parsed_cover_url = urlparse( + cover_image["srcset"].split(",")[-1].strip().split(" ")[0] + ) + self.cover_url = f"{parsed_cover_url.scheme}://{parsed_cover_url.netloc}{parsed_cover_url.path}" + + sectioned_feeds = OrderedDict() + + tabs = soup.find_all("div", attrs={"class": "c-tier_tabbed"}) + for tab in tabs: + tab_title = tab.find("div", attrs={"class": "c-tier-tab"}) + tab_content = tab.find("div", attrs={"class": "c-tier-content"}) + if not (tab_title and tab_content): + continue + tab_title = self.tag_to_string(tab_title) + sectioned_feeds[tab_title] = [] + for li in tab_content.select("ul.o-blocks > li"): + author = self.tag_to_string( + li.find("span", attrs={"class": "c-txt_attribution"}) + ) + for link in li.find_all("a", attrs={"class": "c-txt_abstract"}): + self.log("Found article:", self.tag_to_string(link)) + sectioned_feeds[tab_title].append( + { + "title": self.tag_to_string(link), + "url": link["href"], + "author": author, + "description": author, + } + ) + + return sectioned_feeds.items() diff --git a/recipes/politico.recipe b/recipes/politico.recipe index 9af588d2f2..a6b376ee2c 100644 --- a/recipes/politico.recipe +++ b/recipes/politico.recipe @@ -37,7 +37,7 @@ class Politico(BasicNewsRecipe): ] remove_tags = [ - dict(name=['notags', 'embed', 'aside', 'object', 'link', 'img', 'figure']), + dict(name=['notags', 'embed', 'aside', 'object', 'link', 'img', 'figure', 'svg', 'button']), dict( attrs={'class': lambda x: x and 'story-tools' in x.split()}), dict( diff --git a/recipes/private_eye.recipe b/recipes/private_eye.recipe index bbce44f129..8ddda8392d 100644 --- a/recipes/private_eye.recipe +++ b/recipes/private_eye.recipe @@ -1,49 +1,210 @@ +''' +Fetch Private Eye (Online Edition) +''' + import re from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime, timedelta +class PrivateEyeRecipe(BasicNewsRecipe): + ## + # Last Edited: 2023-07-14 + # + # Remark: Version 3.1 2023-07-14 + # Show crossword on right so clues are continuous down left + # Link to crossword image removed + # Improve many image layouts + # Version 3.0 2023-07-01 + # Rewrite (by Sophist-UK) to fit latest web pages, correctly identify pages to include + # and improve formatting. + # Edited to add: inclusion of About page, + # identifying series number and publication date and setting metadata. + # -class AdvancedUserRecipe1359406781(BasicNewsRecipe): - title = u'Private Eye' - publication_type = 'magazine' + title = u'Private Eye (Online Edition)' description = u'Private Eye is a fortnightly British satirical and current affairs magazine, edited by Ian Hislop' - oldest_article = 13 - max_articles_per_feed = 100 - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - ignore_duplicate_articles = {'title'} + publication_type = 'magazine' language = 'en_GB' encoding = 'utf-8' - __author__ = u'Martyn Pritchard' - __copyright__ = '2020, Martyn Pritchard ' + oldest_article = 13 + max_articles_per_feed = 100 + remove_javascript = True + ignore_duplicate_articles = {'url'} + + __author__ = u'Martyn Pritchard & Sophist-UK' + __copyright__ = '2020, Martyn Pritchard & Sophist-UK ' + + current_issue = 'https://www.private-eye.co.uk/current-issue' + about_page = 'https://www.private-eye.co.uk/about' + masthead_url = 'https://www.private-eye.co.uk/grfx/logos/logo-new.png' + author = 'Private Eye' + series = title = 'Private Eye Online' + conversion_options = { + 'authors': author, + 'author_sort': author, + 'series': series, + 'series_index': 0, + 'title': title, + 'title_sort': title, + } def get_cover_url(self): - cover_url = None - soup = self.index_to_soup('https://www.private-eye.co.uk') - for citem in soup.findAll('img'): - if citem['src'].endswith('big.jpg'): - return citem['src'] - return cover_url + soup = self.index_to_soup(self.current_issue) - remove_tags_before = {'class': "article"} - remove_tags_after = {'class': "article"} - remove_tags = [dict(name='div', attrs={'id': 'sections-sidebar'})] - remove_tags = {'class': "sub-nav-bar"} - remove_tags = [dict(name='a', attrs={'class': 'twitter-share-button'})] - remove_tags = [dict(name='div', attrs={'id': 'nav-box-sections-mobile'})] + for img in soup.findAll('img'): + src = img['src'] + if src.endswith('_big.jpg'): + file_name = src.rsplit('/',1)[1] + if file_name is None: + file_name = src + try: + self.conversion_options.update({'series_index': int(file_name[:-len('_big.jpg')])}) + self.log('series-index:', self.conversion_options['series_index']) + except (TypeError, ValueError): + # wrong big image + continue + return src + return None + def parse_index(self): + soup = self.index_to_soup(self.current_issue) + + # Get publication date + sidebar = soup.find('div', attrs={'id': 'current-issue-sidebar'}) + next_issue_text = sidebar.find('b').nextSibling.strip() + try: + day, month, year = next_issue_text.split(' ') + day = ''.join(c for c in day if c.isdigit()) + pub_date = datetime.strptime(" ".join((day, month, year)), "%d %B %Y") - timedelta(12) + self.log('pub-date:', pub_date) + self.conversion_options.update({'pubdate': datetime.strftime(pub_date, "%d %B %Y").lstrip("0")}) + title = self.title + " " + datetime.strftime(pub_date, "%Y-%m-%d") + self.conversion_options.update({'title': title}) + self.conversion_options.update({'title_sort': title}) + except (TypeError, ValueError): + # Bad date + self.log('Cannot parse next issue date from:', next_issue_text) + + # Get pages first from the sub-menu, and then from the contents panel. + # Duplicates will be eliminated automatically. + articles = [] + for menu_attrs in ( + {'class': 'sub-nav-bar', 'id':'sub-nav-box'}, + {'class': 'article', 'id': 'block-left'}, + ): + menu = soup.find('div', attrs=menu_attrs) + + if not menu: + continue + + for a in menu.findAll('a', href=True): + title = a.getText().rstrip(' »\n') + if not title: + continue + articles.append({ + 'title': title, + 'url': a.get('href'), + }) + + if not articles: + raise ValueError('Private-Eye Online index of pages not found') + + # Add the About page as a final article + articles.append({ + 'title': 'About Private Eye', + 'url': self.about_page, + }) + + self.log('parse_index:', articles) + + return [('Private Eye', articles)] + + def preprocess_html(self, soup): + # Remove
    tag link to crossword image + for tag in soup.findAll('a', {'href': re.compile(r'/pictures/crossword/')}): + self.log("Removing link to crossword image...") + tag.unwrap() + + # Remove align tag in crossword image (so float right works) + for tag in soup.findAll('img', {'src': re.compile(r'/pictures/crossword/')}): + if "align" in tag.attrs: + self.log("Removing crossword image align attribute...") + del tag.attrs['align'] + + return soup + + # We remove vast swathes of HTML which is not part of the articles. + # Remove sibling content + remove_tags_before = [ + {'name': 'div', 'class': "article"}, + {'name': 'div', 'id': "page"}, + {'name': 'div', 'id': "page-wide"}, + {'name': 'div', 'id': "content"}, + {'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}}, + ] + remove_tags_after = remove_tags_before.copy() + remove_tags_after.append( + {'name': 'div', 'id': 'about-covers'}, + ) + # Remove non-sibling content + remove_tags = [ + {'name': 'div', 'attrs': {'id': 'top-bar'}}, + {'name': 'div', 'attrs': {'id': 'header-wide'}}, + {'name': 'div', 'attrs': {'id': 'footer-wide'}}, + {'name': 'div', 'attrs': {'id': 'follow-buttons'}}, + {'name': 'div', 'attrs': {'id': 'sidebar'}}, + {'name': 'div', 'attrs': {'id': 'sections-sidebar'}}, + {'name': 'div', 'attrs': {'id': 'nav-box-sections-mobile'}}, + {'name': 'div', 'attrs': {'id': 'nav-box-pages-mobile'}}, + {'name': 'div', 'attrs': {'id': 'about-covers'}}, + {'name': 'a', ' attrs': {'href': 'https://shop.private-eye.co.uk'}}, + {'name': 'iframe'}, + {'name': 'link', 'attrs': {'href': re.compile('/javastyle/lightbox/')}}, + {'name': 'link', 'attrs': {'href': re.compile('/javastyle/news_ticker/')}}, + {'name': 'link', 'attrs': {'href': re.compile('/javastyle/media-queries-')}}, + ] + + # Convert headers to h1, strapline to h4 preprocess_regexps = [ ( re.compile( - r'(.*?)\s*(?:
    \s*)*(?:(.*?))?', re.DOTALL | re.IGNORECASE - ), lambda match: '' - ), - ( - re.compile( - r'