Merge pull request #2 from kovidgoyal/master

Update fork
2025-07-09 03:04:10 -04:00 · 2020-03-15 14:04:48 +01:00 · 2020-03-15 14:04:48 +01:00 · c8bbfc4598
commit c8bbfc4598
parent bd903d8286 a15acae96d
146 changed files with 2208 additions and 1559 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,100 @@
 #   new recipes:
 #     - title:

+- version: 4.12.0
+  date: 2020-03-06
+
+  new features:
+    - title: "Kobo driver: Add support for new firmware with the series list on the device"
+
+    - title: "Automatically extract the source DOCX file from Kindle Create KPF files when adding them to calibre. If you prefer to preserve the KPF file you can disable the KPF Extract plugin in Preferences->Plugins"
+
+    - title: "Content server: Add a button to delete all locally cached books."
+      tickets: [1864305]
+
+    - title: "Edit Book: Allow selecting the contents of a tag with Ctrl+Alt+t"
+
+    - title: "Viewer: Save current position after 3 seconds of last position change. Useful if the viewer crashes on resume from sleep."
+
+    - title: "Viewer: Add a keyboard shortcut (Ctrl+w) to toggle the scrollbar."
+      tickets: [1864356]
+
+    - title: "Viewer: Keyboard shortcuts to change number of columns (Ctrl+[ and Ctrl+])"
+
+  bug fixes:
+    - title: "Fix the Douban metadata download plugin"
+      tickets: [1853091]
+
+    - title: "Viewer: Fix searching in Regex and Whole words mode not working well."
+
+    - title: "Viewer: Fix searching for multiple words in fixed layout books not working."
+      tickets: [1863464]
+
+    - title: "RTF Input: Fix handling of RTF files with invalid encoded text."
+      tickets: [1864719]
+
+    - title: "PDF Output: Add a hangcheck for loading HTML if there is no progress for sixty seconds abort"
+      tickets: [1865380]
+
+    - title: 'Viewer: When starting without a book allowing quitting the viewer by clicking the close button on the "Open book" page'
+      tickets: [1864343]
+
+  improved recipes:
+    - Wired
+    - ABC News Australia
+
+  new recipes:
+    - title: Spectator Australia
+      author: James Cridland
+
+- version: 4.11.2
+  date: 2020-02-21
+
+  new features:
+    - title: "Viewer: Allow right clicking on the scrollbar to easily access commonly used scrolling shortcuts"
+
+    - title: "Edit book: Preview panel: Allow right clicking on images to edit them"
+
+    - title: "Add a new Quick select action to quickly select a virtual library with a few keystrokes. Activated by Ctrl+t or the Virtual library menu"
+
+    - title: "Viewer: Calculate default column widths based on current font size"
+
+    - title: "Viewer: Add a control to quit the viewer useful on touchscreens."
+      tickets: [1862441]
+
+    - title: "Viewer: Add shortcut for showing metadata (Ctrl+n)"
+      tickets: [1862432]
+
+  bug fixes:
+    - title: "4.11.2 fixes a couple of regressions that broke the New bookmark button in the viewer and scrolling in the content server library view. Also fixes calculation of default column widths in viewer not changing when font size is changed."
+
+    - title: "Viewer: Fix a regression that broke detection of pop-up footnotes using EPUB 3 markup"
+
+    - title: "Viewer: Fix current reading position not preserved when changing preferences and auto scroll is active."
+      tickets: [1863438]
+
+    - title: "Viewer: Fix stopping autoscroll at end of chapter not stopping next chapter jump."
+      tickets: [1863487]
+
+    - title: "Fix for viewer window going off screen even when not restoring window geometry"
+
+    - title: "Edit book: Fix syntax highlighting for break-(before|after)"
+      tickets: [1863020]
+
+    - title: "Fix drag and drop of some image files onto edit metadata dialog not working"
+      tickets: [1862440]
+
+    - title: "Conversion pipeline: Fix styles applied via selectors to the <html> element being ignored"
+      tickets: [1862401]
+
+    - title: "Bulk metadata edit: Fix clear series not resetting series index"
+
+    - title: "Fix clicking on author name in book details panel to search in Goodreads not working if author has more than two parts in his name"
+
+
+  improved recipes:
+    - New York Times
+
 - version: 4.10.0
  date: 2020-02-07

@ -955,7 +1049,7 @@
    - title: "Allow adding files to selected book records from the clipboard. To use copy a file from windows explorer, right click the Add books button and choose: Add files to selected books from clipboard"
      tickets: [1815419]

-    - title: "Tag browser: When right clicking on a saved search add a menu option to search using the raw search expression."
+    - title: "Tag browser: When right clicking on a Saved search add a menu option to search using the raw search expression."
      tickets: [1816274]

    - title: "Tag browser: Have pressing the Enter key find the next match."
@ -1029,7 +1123,7 @@
    - title: "Content server: Fix editing metadata that affects multiple books causing all the metadata for all the books to become the same."
      tickets: [1812781]

-    - title: "Open With: Fix using .bat files as the program not working."
+    - title: "Open with: Fix using .bat files as the program not working."
      tickets: [1811045]

    - title: "ZIP Output: Fix an error when building the ToC on macOS for some books with non-ASCII ToC entries"
@ -2083,7 +2177,7 @@ to appear as Unknown if metadata management was set to manual in calibre."
    - title: "Content server: Open links in the comments section from the book details page in new windows."
      tickets: [1737644]

-    - title: "Choose English as the User interface language when a locale related environment variable is set to the C locale"
+    - title: "Choose English as the user interface language when a locale related environment variable is set to the C locale"

    - title: "Linux installer: A nicer error message if the user tries to run the installer on an ARM machine"

@ -2115,7 +2209,7 @@ to appear as Unknown if metadata management was set to manual in calibre."

    - title: "Edit book: Pre-select existing cover image (if any) in add cover dialog"

-    - title: "Make the Manage saved searches dialog a little easier for new users."
+    - title: "Make the Manage Saved searches dialog a little easier for new users."
      tickets: [1733163]

    - title: "Add a tweak to control behavior of Enter on the book list"
@ -2132,7 +2226,7 @@ to appear as Unknown if metadata management was set to manual in calibre."
    - title: "Content server: Improve rendering of tags/categories with long words on small screens."
      tickets: [1734119]

-    - title: "Fix first added saved search not appearing in Tag browser until calibre restart."
+    - title: "Fix first added Saved search not appearing in Tag browser until calibre restart."
      tickets: [1733151]

    - title: "When checking added books for duplicates, also check on the language field. So books with the same title/authors but different languages are not considered duplicates."
--- a/README.md
+++ b/README.md
@ -10,7 +10,7 @@ reading. It is cross platform, running on Linux, Windows and macOS.

 For more information, see the [calibre About page](https://calibre-ebook.com/about)

-[![Build Status](https://github.com/kovidgoyal/calibre/workflows/Continuous%20Integration/badge.svg)](https://github.com/kovidgoyal/calibre/actions?workflow=Continuous+Integration)
+[![Build Status](https://github.com/kovidgoyal/calibre/workflows/Continuous%20Integration/badge.svg)](https://github.com/kovidgoyal/calibre/actions?query=workflow%3ACI)

 ## Screenshots  

--- a/bypy/macos/util.c
+++ b/bypy/macos/util.c
@ -163,7 +163,6 @@ run(const char **ENV_VARS, const char **ENV_VAR_VALS, char *PROGRAM,
    char *t = NULL;
    int ret = 0, i;
    PyObject *site, *mainf, *res;
-    uint32_t buf_size = PATH_MAX+1;

    for (i = 0; i < 3; i++) {
        t = rindex(full_exe_path, '/');
--- a/imgsrc/srv/window-restore.svg
+++ b/imgsrc/srv/window-restore.svg
@ -0,0 +1 @@
+<svg width="512" height="512" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg"><path d="M464 0H144c-26.5 0-48 21.5-48 48v48H48c-26.5 0-48 21.5-48 48v320c0 26.5 21.5 48 48 48h320c26.5 0 48-21.5 48-48v-48h48c26.5 0 48-21.5 48-48V48c0-26.5-21.5-48-48-48zm-96 464H48V256h320v208zm96-96h-48V144c0-26.5-21.5-48-48-48H144V48h320v320z"/></svg>
--- a/manual/catalogs.rst
+++ b/manual/catalogs.rst
@ -19,7 +19,7 @@ If you want only *some* of your library cataloged, you have two options:
    * Create a multiple selection of the books you want cataloged. With more than one book selected in calibre's main window, only the selected books will be cataloged.
    * Use the Search field or the Tag browser to filter the displayed books. Only the displayed books will be cataloged.

-To begin catalog generation, select the menu item :guilabel:`Convert books > Create a catalog of the books in your calibre library`. You may also add a :guilabel:`Create Catalog` button to a toolbar in :guilabel:`Preferences > Interface > Toolbars` for easier access to the Generate catalog dialog.
+To begin catalog generation, select the menu item :guilabel:`Convert books > Create a catalog of the books in your calibre library`. You may also add a :guilabel:`Create catalog` button to a toolbar in :guilabel:`Preferences > Interface > Toolbars & menus` for easier access to the Generate catalog dialog.

 .. image:: images/catalog_options.png
    :alt: Catalog options
@ -134,4 +134,3 @@ Additional help resources
 For more information on calibre's Catalog feature, see the MobileRead forum sticky `Creating Catalogs - Start here <https://www.mobileread.com/forums/showthread.php?t=118556>`_, where you can find information on how to customize the catalog templates, and how to submit a bug report.

 To ask questions or discuss calibre's Catalog feature with other users, visit the MobileRead forum `Calibre Catalogs <https://www.mobileread.com/forums/forumdisplay.php?f=236>`_.
-
--- a/manual/develop.rst
+++ b/manual/develop.rst
@ -136,7 +136,7 @@ for inclusion into the main calibre repository:
        git clone git@github.com:<username>/calibre.git
        git remote add upstream https://github.com/kovidgoyal/calibre.git

-    Replace <username> above with your github username. That will get your fork checked out locally.
+    Replace <username> above with your GitHub username. That will get your fork checked out locally.
  * You can make changes and commit them whenever you like. When you are ready to have your work merged, do a::

        git push
--- a/manual/drm.rst
+++ b/manual/drm.rst
@ -0,0 +1,107 @@
+
+.. _dmr:
+
+Digital Rights Management (DRM)
+===============================================
+
+Digital rights management (DRM) is a generic term for access control
+technologies that can be used by hardware manufacturers, publishers, copyright
+holders and individuals to try to impose limitations on the usage of digital
+content and devices. It is also, sometimes, disparagingly described as Digital
+Restrictions Management. The term is used to describe any technology which
+inhibits uses (legitimate or otherwise) of digital content that were not
+desired or foreseen by the content provider. The term generally doesn't refer
+to other forms of copy protection which can be circumvented without modifying
+the file or device, such as serial numbers or key-files. It can also refer to
+restrictions associated with specific instances of digital works or devices.
+DRM technologies attempt to control use of digital media by preventing access,
+copying or conversion to other formats by end users. See `wikipedia
+<https://en.wikipedia.org/wiki/Digital_rights_management>`_.
+
+
+What does DRM imply for me personally?
+------------------------------------------
+
+When you buy an e-book with DRM you don't really own it but have purchased the
+permission to use it in a manner dictated to you by the seller. DRM limits what
+you can do with e-books you have "bought". Often people who buy books with DRM
+are unaware of the extent of these restrictions. These restrictions prevent you
+from reformatting the e-book to your liking, including making stylistic changes
+like adjusting the font sizes, although there is software that empowers you to
+do such things for non DRM books. People are often surprised that an e-book
+they have bought in a particular format cannot be converted to another format
+if the e-book has DRM. So if you have an Amazon Kindle and buy a book sold by
+Barnes and Nobles, you should know that if that e-book has DRM you will not be
+able to read it on your Kindle. Notice that I am talking about a book you buy,
+not steal or pirate but BUY.
+
+
+What does DRM do for authors?
+----------------------------------
+
+Publishers of DRMed e-books argue that the DRM is all for the sake of authors
+and to protect their artistic integrity and prevent piracy. But DRM does NOT
+prevent piracy. People who want to pirate content or use pirated content still
+do it and succeed. The three major DRM schemes for e-books today are run by
+Amazon, Adobe and Barnes and Noble and all three DRM schemes have been cracked.
+All DRM does is inconvenience legitimate users. It can be argued that it
+actually harms authors as people who would have bought the book choose to find
+a pirated version as they are not willing to put up with DRM. Those that would
+pirate in the absence of DRM do so in its presence as well. To reiterate, the
+key point is that DRM *does not prevent piracy*. So DRM is not only pointless
+and harmful to buyers of e-books but also a waste of money.
+
+
+DRM and freedom
+-------------------
+
+Although digital content can be used to make information as well as creative
+works easily available to everyone and empower humanity, this is not in the
+interests of some publishers who want to steer people away from this
+possibility of freedom simply to maintain their relevance in world developing
+so fast that they cant keep up.
+
+
+Why does calibre not support DRM?
+-------------------------------------
+
+calibre is open source software while DRM by its very nature is closed. If
+calibre were to support opening or viewing DRM files it could be trivially
+modified to be used as a tool for DRM removal which is illegal under today's
+laws. Open source software and DRM are a clash of principles. While DRM is all
+about controlling the user open source software is about empowering the user.
+The two simply can not coexist.
+
+
+What is calibre's view on content providers?
+------------------------------------------------
+
+We firmly believe that authors and other content providers should be
+compensated for their efforts, but DRM is not the way to go about it. We are
+developing this database of DRM-free e-books from various sources to help you
+find DRM-free alternatives and to help independent authors and publishers of
+DRM-free e-books publicize their content. We hope you will find this useful and
+we request that you do not pirate the content made available to you here.
+
+
+How can I help fight DRM?
+-----------------------------
+
+As somebody who reads and buys e-books you can help fight DRM. Do not buy
+e-books with DRM. There are some publishers who publish DRM-free e-books. Make
+an effort to see if they carry the e-book you are looking for. If you like
+books by certain independent authors that sell DRM-free e-books and you can
+afford it make donations to them. This is money well spent as their e-books
+tend to be cheaper (there may be exceptions) than the ones you would buy from
+publishers of DRMed books and would probably work on all devices you own in the
+future saving you the cost of buying the e-book again. Do not discourage
+publishers and authors of DRM-free e-books by pirating their content. Content
+providers deserve compensation for their efforts. Do not punish them for trying
+to make your reading experience better by making available DRM-free e-books. In
+the long run this is detrimental to you.  If you have bought books from sellers
+that carry both DRMed as well as DRM-free books, not knowing if they carry DRM
+or not make it a point to leave a comment or review on the website informing
+future buyers of its DRM status. Many sellers do not think it important to
+clearly indicate to their buyers if an e-book carries DRM or not.  `Here
+<https://www.defectivebydesign.org/guide/ebooks>` you will find a Guide to
+DRM-free living.
--- a/manual/edit.rst
+++ b/manual/edit.rst
@ -802,7 +802,8 @@ The HTML editor has very sophisticated syntax highlighting. Features include:
    * The text inside bold, italic and heading tags is made bold/italic
    * As you move your cursor through the HTML, the matching HTML tags are
      highlighted, and you can jump to the opening or closing tag with the
-      keyboard shortcuts :kbd:`Ctrl+{` and :kbd:`Ctrl+}`
+      keyboard shortcuts :kbd:`Ctrl+{` and :kbd:`Ctrl+}`. Similarly, you
+      can select the contents of a tag with :kbd:`Ctrl+Alt+T`.
    * Invalid HTML is highlighted with a red underline
    * Spelling errors in the text inside HTML tags and attributes such as title
      are highlighted. The spell checking is language aware, based on the value
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -983,7 +983,7 @@ If you want to backup the calibre configuration/plugins, you have to backup the

 How do I use purchased EPUB books with calibre (or what do I do with .acsm files)?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Most purchased EPUB books have `DRM <https://drmfree.calibre-ebook.com/about#drm>`_. This prevents calibre from opening them. You can still use calibre to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with calibre will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to calibre.
+Most purchased EPUB books have :doc:`DRM <drm>`. This prevents calibre from opening them. You can still use calibre to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with calibre will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to calibre.

 I am getting a "Permission Denied" error?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/manual/gui.rst
+++ b/manual/gui.rst
@ -94,7 +94,7 @@ Convert books
    :class: float-right-img

 |cei| E-books can be converted from a number of formats into whatever format your e-book reader prefers.
-Many e-books available for purchase will be protected by `Digital Rights Management <https://drmfree.calibre-ebook.com/about#drm>`_ *(DRM)* technology.
+Many e-books available for purchase will be protected by :doc:`Digital Rights Management <drm>` *(DRM)* technology.
 calibre will not convert these e-books. It is easy to remove the DRM from many formats, but as this may be illegal,
 you will have to find tools to liberate your books yourself and then use calibre to convert them.

--- a/manual/news.rst
+++ b/manual/news.rst
@ -40,7 +40,7 @@ and then the :guilabel:`Add a custom news source` menu item and then the
 .. image:: images/custom_news.png
    :align: center

-First enter ``calibre Blog`` into the :guilabel:`Recipe title` field. This will be the title of the e-book that will be created from the articles in the above feeds.
+First enter ``Calibre Blog`` into the :guilabel:`Recipe title` field. This will be the title of the e-book that will be created from the articles in the above feeds.

 The next two fields (:guilabel:`Oldest article` and :guilabel:`Max. number of articles`) allow you some control over how many articles should be downloaded from each feed, and they are pretty self explanatory.

--- a/manual/simple_index.rst
+++ b/manual/simple_index.rst
@ -21,7 +21,7 @@ available <https://calibre-ebook.com/help>`_.

 .. only:: online

-    **An e-book version of this user manual is available in** `EPUB format <calibre.epub>`_,  `AZW3 (Kindle Fire) format <calibre.azw3>`_ and `PDF format <calibre.pdf>`_.
+    **An e-book version of this User Manual is available in** `EPUB format <calibre.epub>`_,  `AZW3 (Kindle Fire) format <calibre.azw3>`_ and `PDF format <calibre.pdf>`_.

 .. rubric:: Sections

@ -41,4 +41,5 @@ available <https://calibre-ebook.com/help>`_.
   customize
   generated/en/cli-index
   develop
+   drm
   glossary
--- a/manual/sub_groups.rst
+++ b/manual/sub_groups.rst
@ -98,7 +98,7 @@ The Tag browser search mechanism knows if an item has children. If it does, clic
 Restrictions
 ---------------

-If you search for a genre then create a saved search for it, you can use the 'restrict to' box to create a virtual library of books with that genre. This is useful if you want to do other searches within the genre or to manage/update metadata for books in the genre. Continuing our example, you can create a saved search named 'History.Japanese' by first clicking on the genre Japanese in the Tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).
+If you search for a genre then create a saved search for it, you can use the 'restrict to' box to create a Virtual library of books with that genre. This is useful if you want to do other searches within the genre or to manage/update metadata for books in the genre. Continuing our example, you can create a Saved search named 'History.Japanese' by first clicking on the genre Japanese in the Tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).

 .. image:: images/sg_restrict.jpg
    :align: center
--- a/manual/viewer.rst
+++ b/manual/viewer.rst
@ -125,15 +125,15 @@ Dictionary lookup
 -------------------

 You can look up the meaning of words in the current book by opening the
-:guilabel:`Lookup/search panel` via the viewer controls. Then simply double
-click on any word and its definition will be displayed in the lookup panel.
+:guilabel:`Lookup/search word panel` via the viewer controls. Then simply double
+click on any word and its definition will be displayed in the Lookup panel.


 Copying text and images
 -------------------------

 You can select text and images by dragging the content with your mouse and then
-right clicking and selecting "Copy" to copy to the clipboard.  The copied
+right clicking and selecting :guilabel:`Copy` to copy to the clipboard.  The copied
 material can be pasted into another application as plain text and images.


@ -143,7 +143,7 @@ Non re-flowable content
 Some books have very wide content that content be broken up at page boundaries.
 For example tables or :code:`<pre>` tags. In such cases, you should switch the
 viewer to *flow mode* by pressing :kbd:`Ctrl+m` to read this content.
-Alternately, you can also add the following CSS to the Styling section of the
+Alternately, you can also add the following CSS to the :guilabel:`Styles` section of the
 viewer preferences to force the viewer to break up lines of text in
 :code:`<pre>` tags::

--- a/manual/virtual_libraries.rst
+++ b/manual/virtual_libraries.rst
@ -45,12 +45,12 @@ You can switch back to the full library at any time by once again clicking the
 :guilabel:`Virtual library` and selecting the entry named :guilabel:`<None>`.

 Virtual libraries are based on *searches*. You can use any search as the
-basis of a virtual library. The virtual library will contain only the 
+basis of a Virtual library. The Virtual library will contain only the
 books matched by that search. First, type in the search you want to use
 in the Search bar or build a search using the :guilabel:`Tag browser`.
-When you are happy with the returned results, click the Virtual library 
-button, choose :guilabel:`Create library` and enter a name for the new virtual 
-library. The virtual library will then be created based on the search 
+When you are happy with the returned results, click the :guilabel:`Virtual library`
+button, choose :guilabel:`Create library` and enter a name for the new Virtual
+library. The Virtual library will then be created based on the search
 you just typed in. Searches are very powerful, for examples of the kinds
 of things you can do with them, see :ref:`search_interface`.

@ -124,4 +124,3 @@ saved search that shows you unread books, you can click the :guilabel:`Virtual
 Library` button and choose the :guilabel:`Additional restriction` option to
 show only unread Historical Fiction books. To learn about saved searches, see
 :ref:`saved_searches`.
-
--- a/recipes/1843.recipe
+++ b/recipes/1843.recipe
@ -15,7 +15,8 @@ def classes(classes):
 class E1843(BasicNewsRecipe):
    title = '1843'
    __author__ = 'Kovid Goyal'
-    language = 'en'
+    description = 'The ideas, culture and lifestyle magazine from The Economist'
+    language = 'en_GB'
    no_stylesheets = True
    remove_javascript = True
    oldest_article = 365
--- a/recipes/20_minutos.recipe
+++ b/recipes/20_minutos.recipe
@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1294946868(BasicNewsRecipe):

-    title = u'20 Minutos new'
+    title = u'20 Minutos'
    publisher = u'Grupo 20 Minutos'

    __author__ = 'Luis Hernandez'
--- a/recipes/20minutos.recipe
+++ b/recipes/20minutos.recipe
@ -1,65 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
-'''
-www.20minutos.es
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class t20Minutos(BasicNewsRecipe):
-    title = '20 Minutos'
-    __author__ = 'Darko Miletic'
-    description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas'  # noqa
-    publisher = '20 Minutos Online SL'
-    category = 'news, politics, Spain'
-    oldest_article = 2
-    max_articles_per_feed = 200
-    no_stylesheets = True
-    encoding = 'utf8'
-    use_embedded_content = True
-    language = 'es'
-    remove_empty_feeds = True
-    publication_type = 'newspaper'
-    masthead_url = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
-    extra_css             = """
-                               body{font-family: Arial,Helvetica,sans-serif }
-                               img{margin-bottom: 0.4em; display:block}
-                            """
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
-
-    remove_tags = [dict(attrs={'class': 'mf-viral'})]
-    remove_attributes = ['border']
-
-    feeds = [
-
-    (u'Principal', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss'),
-    (u'Cine', u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss'),
-    (u'Internacional', u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss'),
-    (u'Deportes', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss'),
-    (u'Nacional', u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss'),
-    (u'Economia', u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss'),
-    (u'Tecnologia', u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
-    ]
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for item in soup.findAll('a'):
-            limg = item.find('img')
-            if item.string is not None:
-                str = item.string
-                item.replaceWith(str)
-            else:
-                if limg:
-                    item.name = 'div'
-                    item.attrs = []
-                else:
-                    str = self.tag_to_string(item)
-                    item.replaceWith(str)
-        for item in soup.findAll('img', alt=False):
-            item['alt'] = 'image'
-        return soup
--- a/recipes/ZIVE.sk.recipe
+++ b/recipes/ZIVE.sk.recipe
@ -1,43 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-import re
-
-
-class ZiveRecipe(BasicNewsRecipe):
-    __license__ = 'GPL v3'
-    __author__ = 'Abelturd'
-    language = 'sk'
-    version = 1
-
-    title = u'ZIVE.sk'
-    publisher = u''
-    category = u'News, Newspaper'
-    description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
-    encoding = 'UTF-8'
-
-    oldest_article = 7
-    max_articles_per_feed = 100
-    use_embedded_content = False
-    remove_empty_feeds = True
-
-    no_stylesheets = True
-    remove_javascript = True
-    cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
-
-    feeds = []
-    feeds.append((u'V\u0161etky \u010dl\xe1nky',
-                  u'http://www.zive.sk/rss/sc-47/default.aspx'))
-
-    preprocess_regexps = [
-        (re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL | re.IGNORECASE),
-         lambda match: ''),
-
-    ]
-
-    remove_tags = []
-
-    keep_only_tags = [dict(name='h1'), dict(name='span', attrs={
-        'class': 'arlist-data-info-author'}), dict(name='div', attrs={'class': 'bbtext font-resizer-area'}), ]
-    extra_css = '''
-                h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
-                h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
-                '''
--- a/recipes/abc_au.recipe
+++ b/recipes/abc_au.recipe
@ -9,47 +9,53 @@ from calibre.web.feeds.recipes import BasicNewsRecipe

 class ABCNews(BasicNewsRecipe):
    title = 'ABC News'
-    __author__ = 'Pat Stapleton, Dean Cording'
-    description = 'News from Australia'
-    masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
-    cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
-
+    __author__ = 'Pat Stapleton, Dean Cording, James Cridland'
+    description = 'From the Australian Broadcasting Corporation. The ABC is owned and funded by the Australian Government, but is editorially independent.'
+    masthead_url = 'https://www.abc.net.au/cm/lb/8212706/data/news-logo-2017---desktop-print-data.png'
+    cover_url = 'https://www.abc.net.au/news/linkableblob/8413676/data/abc-news-og-data.jpg'
+    cover_margins = (0,20,'#000000')
    oldest_article = 2
-    max_articles_per_feed = 100
-    no_stylesheets = False
+    handle_gzip = True
+    no_stylesheets = True
    use_embedded_content = False
+    scale_news_images_to_device = True
    encoding = 'utf8'
    publisher = 'ABC News'
-    category = 'News, Australia, World'
+    category = 'Australia,News'
    language = 'en_AU'
-    publication_type = 'newsportal'
-#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
-# Remove annoying map links (inline-caption class is also used for some
-# image captions! hence regex to match maps.google)
+    publication_type = 'newspaper'
+    extra_css = '.byline{font-size:smaller;margin-bottom:10px;}.inline-caption{display:block;font-size:smaller;text-decoration: none;}'
    preprocess_regexps = [(re.compile(
-        r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
+        r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]  # Remove map links
    conversion_options = {
        'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
    }

    keep_only_tags = [dict(attrs={'class': ['article section']})]

-    remove_tags = [dict(attrs={'class': ['related', 'tags', 'tools', 'attached-content ready',
-                                         'inline-content story left', 'inline-content map left contracted', 'published',
+    remove_tags = [dict(attrs={'class': ['inner', 'attached-content', 'authorpromo', 'related', 'tags', 'tools', 'attached-content ready',
+                                         'inline-content story left', 'inline-content map left contracted',
+                                         'inline-content full embedYouTube embedded', 'published',
                                         'story-map', 'statepromo', 'topics', ]})]
+    # inner = key points
+    # attached-content = related stories
+    # authorpromo = "Contact NameOfJournalist"

    remove_attributes = ['width', 'height']

    feeds = [
-        ('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
-        ('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
-        ('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
-        ('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
-        ('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
-        ('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
-        ('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
-        ('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
-        ('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
-        ('Science and Technology',
-         'http://www.abc.net.au/news/feed/2298/rss.xml'),
+        ('Top Stories', 'https://www.abc.net.au/news/feed/45910/rss.xml'),
+        ('Politics', 'https://www.abc.net.au/news/feed/51120/rss.xml'),
+        ('World', 'https://www.abc.net.au/news/feed/6497190/rss.xml'),
+        ('Business', 'https://www.abc.net.au/news/feed/51892/rss.xml'),
+        ('Analysis', 'https://www.abc.net.au/news/feed/7571224/rss.xml'),
+        ('Sport', 'https://www.abc.net.au/news/feed/2942460/rss.xml'),
+        ('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'),
+        ('Brisbane', 'https://www.abc.net.au/news/feed/12858/rss.xml'),
+        ('Canberra', 'https://www.abc.net.au/news/feed/6910/rss.xml'),
+        ('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'),
+        ('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'),
+        ('Melbourne', 'https://www.abc.net.au/news/feed/21708/rss.xml'),
+        ('Sydney', 'https://www.abc.net.au/news/feed/10232/rss.xml'),
+        ('Perth', 'https://www.abc.net.au/news/feed/24886/rss.xml'),
        ]
--- a/recipes/ads_of_the_world.recipe
+++ b/recipes/ads_of_the_world.recipe
@ -1,26 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdvancedUserRecipe1336986047(BasicNewsRecipe):
-    title = u'Ads of the World'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    auto_cleanup = False
-    description = 'The best international advertising campaigns'
-    language = 'en'
-    __author__ = 'faber1971'
-
-    no_stylesheets = True
-    keep_only_tags = [
-        dict(name='div', attrs={'id': 'primary'})
-    ]
-
-    remove_tags = [
-        dict(name='ul', attrs={'class': 'links inline'}), dict(name='div', attrs={'class': 'form-item'}), dict(
-            name='div', attrs={'id': ['options', 'comments']}), dict(name='ul', attrs={'id': 'nodePager'})
-    ]
-
-    reverse_article_order = True
-    masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
-    feeds = [
-        (u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
--- a/recipes/business_spectator.recipe
+++ b/recipes/business_spectator.recipe
@ -1,40 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2010, Dean Cording'
-'''
-abc.net.au/news
-'''
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-
-class BusinessSpectator(BasicNewsRecipe):
-    title = 'Business Spectator'
-    __author__ = 'Dean Cording'
-    description = 'Australian Business News & commentary delivered the way you want it.'
-    masthead_url = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
-    cover_url = masthead_url
-
-    oldest_article = 2
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    auto_cleanup = True
-    use_embedded_content = False
-    encoding = 'utf8'
-    publisher = 'Business Spectator'
-    category = 'News, Australia, Business'
-    language = 'en_AU'
-    publication_type = 'newsportal'
-    preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
-    conversion_options = {
-        'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
-    }
-
-    feeds = [
-        ('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
-        ('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
-        ('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
-        ('Stephen Bartholomeusz',
-         'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
-        ('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
-        ('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
-    ]
--- a/recipes/courier_mail.recipe
+++ b/recipes/courier_mail.recipe
@ -1,30 +1,32 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import datetime


 class Politics(BasicNewsRecipe):
-    title = u'Courier Mail'
+    title = u'The Courier-Mail'
+    description = 'Breaking news headlines for Brisbane and Queensland, Australia. The Courier-Mail is owned by News Corp Australia.'
    language = 'en_AU'
-    __author__ = 'Krittika Goyal'
+    __author__ = 'Krittika Goyal, James Cridland'
    oldest_article = 3  # days
    max_articles_per_feed = 20
    use_embedded_content = False

+    d = datetime.datetime.today()
+    cover_url='http://mfeeds.news.com.au/smedia/NCCOURIER/NCCM_1_' + d.strftime('%Y_%m_%d') + '_thumb_big.jpg'
+    masthead_url='https://couriermail.digitaleditions.com.au/images/couriermail-logo.jpg'
+
    no_stylesheets = True
    auto_cleanup = True
+    handle_gzip = True

    feeds = [
-        ('Top Stories',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_top_stories_257.xml'),
-        ('Breaking News',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_breaking_news_67.xml'),
-        ('Queensland News',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_queensland_news_70.xml'),
-        ('Technology News',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_technology_news_66.xml'),
-        ('Entertainment News',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_entertainment_news_256.xml'),
-        ('Business News',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_business_news_64.xml'),
-        ('Sport News',
-         'http://feeds.news.com.au/public/rss/2.0/bcm_sports_news_65.xml'),
+        ('Top Stories', 'http://www.couriermail.com.au/rss'),
+        ('Breaking', 'https://www.couriermail.com.au/news/breaking-news/rss'),
+        ('Queensland', 'https://www.couriermail.com.au/news/queensland/rss'),
+        ('Technology', 'https://www.couriermail.com.au/technology/rss'),
+        ('Entertainment', 'https://www.couriermail.com.au/entertainment/rss'),
+        ('Finance','https://www.couriermail.com.au/business/rss'),
+        ('Sport', 'https://www.couriermail.com.au/sport/rss'),
    ]
+
+# This isn't perfect, but works rather better than it once did. To do - remove links to subscription content.
--- a/recipes/glasgow_herald.recipe
+++ b/recipes/glasgow_herald.recipe
@ -1,3 +1,9 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
 from calibre.web.feeds.news import BasicNewsRecipe


@ -15,10 +21,12 @@ class GlasgowHerald(BasicNewsRecipe):
    auto_cleanup = True

    feeds = [
-        (u'News', u'http://www.heraldscotland.com/cmlink/1.758'),
-        (u'Sport', u'http://www.heraldscotland.com/cmlink/1.761'),
-        (u'Business', u'http://www.heraldscotland.com/cmlink/1.763'),
-        (u'Life & Style', u'http://www.heraldscotland.com/cmlink/1.770'),
-        (u'Arts & Entertainment',
-         u'http://www.heraldscotland.com/cmlink/1.768',),
-        (u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
+        (u'News', u'https://www.heraldscotland.com/news/rss/'),
+        (u'Sport', u'https://www.heraldscotland.com/sport/rss/'),
+        (u'Business', u'https://www.heraldscotland.com/business_hq/rss/'),
+        (u'Lifestyle', u'https://www.heraldscotland.com/life_style/rss/'),
+        (u'Arts & Entertainment', u'https://www.heraldscotland.com/arts_ents/rss/',),
+        (u'Politics', u'https://www.heraldscotland.com/politics/rss/'),
+        (u'Columnists', u'https://www.heraldscotland.com/opinion/columnists/rss/')
+
+        ]
--- a/recipes/icons/1843.png
+++ b/recipes/icons/1843.png
--- a/recipes/icons/20minutos.png
+++ b/recipes/icons/20minutos.png
--- a/recipes/icons/abc_au.png
+++ b/recipes/icons/abc_au.png
--- a/recipes/icons/ads_of_the_world.png
+++ b/recipes/icons/ads_of_the_world.png
--- a/recipes/icons/courier_mail.png
+++ b/recipes/icons/courier_mail.png
--- a/recipes/icons/guardian.png
+++ b/recipes/icons/guardian.png
--- a/recipes/icons/macrobusiness.png
+++ b/recipes/icons/macrobusiness.png
--- a/recipes/icons/melbourne_herald_sun.png
+++ b/recipes/icons/melbourne_herald_sun.png
--- a/recipes/icons/rossijkaja_gazeta.png
+++ b/recipes/icons/rossijkaja_gazeta.png
--- a/recipes/icons/spectator-au.png
+++ b/recipes/icons/spectator-au.png
--- a/recipes/icons/the_age.png
+++ b/recipes/icons/the_age.png
--- a/recipes/icons/vedomosti.png
+++ b/recipes/icons/vedomosti.png
--- a/recipes/latimes.recipe
+++ b/recipes/latimes.recipe
@ -2,13 +2,9 @@

 import re
 from collections import defaultdict
-from pprint import pformat

-from calibre.utils.date import strptime, utcnow
 from calibre.web.feeds.news import BasicNewsRecipe

-DT_EPOCH = strptime('1970-01-01', '%Y-%m-%d', assume_utc=True)
-
 DIR_COLLECTIONS = [['world'],
                   ['nation'],
                   ['politics'],
@ -29,84 +25,22 @@ DIR_COLLECTIONS = [['world'],
                   ['travel'],
                   ['fashion']]

-SECTIONS=['THE WORLD',
-          'THE NATION',
-          'POLITICS',
-          'OPINION',
-          'CALIFORNIA',
-          'OBITUARIES',
-          'BUSINESS',
-          'HOLLYWOOD',
-          'SPORTS',
-          'ENTERTAINMENT',
-          'MOVIES',
-          'TELEVISION',
-          'BOOKS',
-          'FOOD',
-          'HEALTH',
-          'SCIENCE AND TECHNOLOGY',
-          'HOME',
-          'TRAVEL',
-          'FASHION',
-          'NEWSLETTERS'
-          'OTHER']
+
+def classes(classes):
+    q = frozenset(classes.split(' '))
+    return dict(attrs={
+        'class': lambda x: x and frozenset(x.split()).intersection(q)})


 def absurl(url):
    if url.startswith('/'):
-        url = 'http://www.latimes.com' + url
+        url = 'https://www.latimes.com' + url
    return url


-def check_words(words):
-    return lambda x: x and frozenset(words.split()).intersection(x.split())
-
-
 def what_section(url):
-    if re.compile(r'^https?://www[.]latimes[.]com/local/obituaries').search(url):
-        return 'OBITUARIES'
-    elif re.compile(r'^https?://www[.]latimes[.]com/business/hollywood').search(url):
-        return 'HOLLYWOOD'
-    elif re.compile(r'^https?://www[.]latimes[.]com/entertainment/movies').search(url):
-        return 'MOVIES'
-    elif re.compile(r'^https?://www[.]latimes[.]com/entertainment/tv').search(url):
-        return 'TELEVISION'
-    elif re.compile(r'^https?://www[.]latimes[.]com/business/technology').search(url):
-        return 'SCIENCE AND TECHNOLOGY'
-    elif re.compile(r'^https?://www[.]latimes[.]com/world').search(url):
-        return 'THE WORLD'
-    elif re.compile(r'^https?://www[.]latimes[.]com/nation').search(url):
-        return 'THE NATION'
-    elif re.compile(r'^https?://www[.]latimes[.]com/politics').search(url):
-        return 'POLITICS'
-    elif re.compile(r'^https?://www[.]latimes[.]com/opinion').search(url):
-        return 'OPINION'
-    elif re.compile(r'^https?://www[.]latimes[.]com/(?:local|style)').search(url):
-        return 'CALIFORNIA'
-    elif re.compile(r'^https?://www[.]latimes[.]com/business').search(url):
-        return 'BUSINESS'
-    elif re.compile(r'^https?://www[.]latimes[.]com/sports').search(url):
-        return 'SPORTS'
-    elif re.compile(r'^https?://www[.]latimes[.]com/entertainment').search(url):
-        return 'ENTERTAINMENT'
-    elif re.compile(r'^https?://www[.]latimes[.]com/books').search(url):
-        return 'BOOKS'
-    elif re.compile(r'^https?://www[.]latimes[.]com/food').search(url):
-        return 'FOOD'
-    elif re.compile(r'^https?://www[.]latimes[.]com/health').search(url):
-        return 'HEALTH'
-    elif re.compile(r'^https?://www[.]latimes[.]com/science').search(url):
-        return 'SCIENCE AND TECHNOLOGY'
-    elif re.compile(r'^https?://www[.]latimes[.]com/home').search(url):
-        return 'HOME'
-    elif re.compile(r'^https?://www[.]latimes[.]com/travel').search(url):
-        return 'TRAVEL'
-    elif re.compile(r'^https?://www[.]latimes[.]com/fashion').search(url):
-        return 'FASHION'
-    elif re.compile(r'^https?://www[.]latimes[.]com/newsletter').search(url):
-        return 'NEWSLETTERS'
-    else:
-        return 'OTHER'
+    parts = url.split('/')
+    return parts[-4].capitalize()


 class LATimes(BasicNewsRecipe):
@ -126,32 +60,25 @@ class LATimes(BasicNewsRecipe):
    cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'

    keep_only_tags = [
-        dict(name='header', attrs={'id': 'top'}),
-        dict(name='article'),
-        dict(name='div', attrs={'id': 'liveblog-story-wrapper'})
+        classes('ArticlePage-breadcrumbs ArticlePage-headline ArticlePage-mainContent'),
    ]

    remove_tags= [
-        dict(name='div', attrs={'class': check_words(
-            'hidden-tablet hidden-mobile hidden-desktop pb-f-ads-dfp')})
-    ]
-
-    remove_tags_after = [
-        dict(name='div', attrs={'class': check_words('pb-f-article-body')})
+        classes('ArticlePage-actions Enhancement hidden-tablet hidden-mobile hidden-desktop pb-f-ads-dfp')
    ]

    def parse_index(self):
-        index = 'http://www.latimes.com/'
-        pat = r'^(?:https?://www[.]latimes[.]com)?/[^#]+20[0-9]{6}-(?:html)?story[.]html'
+        index = 'https://www.latimes.com/'
+        pat = r'^https://www\.latimes\.com/[^/]+?/story/20\d{2}-\d{2}-\d{2}/\S+'
        articles = self.find_articles(index, pat)
        for collection in DIR_COLLECTIONS:
+            if self.test:
+                continue
            topdir = collection.pop(0)
-            index = 'http://www.latimes.com/' + topdir + '/'
-            pat = r'^(?:https?://www[.]latimes[.]com)?/' + \
-                      topdir + '/[^#]+20[0-9]{6}-(?:html)?story[.]html'
-            articles += self.find_articles(index, pat)
+            collection_index = index + topdir + '/'
+            articles += self.find_articles(collection_index, pat)
            for subdir in collection:
-                sub_index = index + subdir + '/'
+                sub_index = collection_index + subdir + '/'
                articles += self.find_articles(sub_index, pat)

        feeds = defaultdict(list)
@ -159,12 +86,7 @@ class LATimes(BasicNewsRecipe):
            section = what_section(article['url'])
            feeds[section].append(article)

-        keys = []
-        for key in SECTIONS:
-            if key in feeds.keys():
-                keys.append(key)
-        self.log(pformat(dict(feeds)))
-        return [(k, feeds[k]) for k in keys]
+        return [(k, feeds[k]) for k in sorted(feeds)]

    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-src': True}):
@ -190,16 +112,6 @@ class LATimes(BasicNewsRecipe):
        alinks = [a for a in alinks if len(
            a.contents) == 1 and a.find(text=True, recursive=False)]
        articles = [
-            {'title': a.find(text=True), 'url': absurl(a['href'])} for a in alinks]
-        date_rx = re.compile(
-            r'^https?://www[.]latimes[.]com/[^#]+-(?P<date>20[0-9]{6})-(?:html)?story[.]html')
-        for article in articles:
-            mdate = date_rx.match(article['url'])
-            if mdate is not None:
-                try:
-                    article['timestamp'] = (strptime(mdate.group('date'),'%Y%m%d') - DT_EPOCH).total_seconds()
-                except Exception:
-                    article['timestamp'] = (utcnow() - DT_EPOCH).total_seconds()
-                article['url'] = mdate.group(0)
+            {'title': self.tag_to_string(a), 'url': absurl(a['href'])} for a in alinks]
        self.log('Found: ', len(articles), ' articles.\n')
        return articles
--- a/recipes/list_apart.recipe
+++ b/recipes/list_apart.recipe
@ -17,7 +17,7 @@ class AListApart (BasicNewsRecipe):
    oldest_article = 120
    remove_empty_feeds = True
    encoding = 'utf8'
-    cover_url = u'http://alistapart.com/pix/alalogo.gif'
+    cover_url = u'https://alistapart.com/wp-content/uploads/2019/03/cropped-icon_navigation-laurel-512.jpg'

    def get_extra_css(self):
        if not self.extra_css:
--- a/recipes/newsweek.recipe
+++ b/recipes/newsweek.recipe
@ -1,3 +1,8 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
+
+import json
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import defaultdict

@ -49,28 +54,23 @@ class Newsweek(BasicNewsRecipe):
        a = li.xpath('descendant::a[@href]')[0]
        url = href_to_url(a, add_piano=True)
        self.timefmt = self.tag_to_string(a)
-        img = li.xpath('descendant::a[@href]//img[@data-src]')[0]
-        self.cover_url = img.get('data-src').partition('?')[0]
+        img = li.xpath('descendant::a[@href]//source[@type="image/jpeg"]/@srcset')[0]
+        self.cover_url = img.partition('?')[0]
+        self.log('Found cover url:', self.cover_url)
        root = self.index_to_soup(url, as_tree=True)
        features = []
-        try:
-            div = root.xpath('//div[@class="magazine-features"]')[0]
-        except IndexError:
-            pass
-        else:
-            for a in div.xpath('descendant::div[@class="h1"]//a[@href]'):
+        for article in root.xpath('//div[@class="magazine-features"]//article'):
+            a = article.xpath('descendant::a[@class="article-link"]')[0]
            title = self.tag_to_string(a)
-                article = a.xpath('ancestor::article')[0]
+            url = href_to_url(a)
            desc = ''
            s = article.xpath('descendant::div[@class="summary"]')
            if s:
                desc = self.tag_to_string(s[0])
            features.append({'title': title, 'url': href_to_url(a), 'description': desc})
-                self.log(title, href_to_url(a))
+            self.log(title, url)

-        index = []
-        if features:
-            index.append(('Features', features))
+        index = [('Features', features)]
        sections = defaultdict(list)
        for widget in ('editor-pick',):
            self.parse_widget(widget, sections)
@ -79,30 +79,18 @@ class Newsweek(BasicNewsRecipe):
        return index

    def parse_widget(self, widget, sections):
-        root = self.index_to_soup('https://d.newsweek.com/widget/' + widget, as_tree=True)
-        div = root.xpath('//div')[0]
-        href_xpath = 'descendant::*[local-name()="h1" or local-name()="h2" or local-name()="h3" or local-name()="h4"]/a[@href]'
-        for a in div.xpath(href_xpath):
-            title = self.tag_to_string(a)
-            article = a.xpath('ancestor::article')[0]
-            desc = ''
-            s = article.xpath('descendant::div[@class="summary"]')
-            if s:
-                desc = self.tag_to_string(s[0])
-            sec = article.xpath('descendant::div[@class="category"]')
-            if sec:
-                sec = self.tag_to_string(sec[0])
-            else:
-                sec = 'Articles'
-            sections[sec].append(
-                {'title': title, 'url': href_to_url(a), 'description': desc})
-            self.log(title, href_to_url(a))
-            if desc:
-                self.log('\t' + desc)
-            self.log('')
-
-    def print_version(self, url):
-        return url + '?piano_d=1'
+        raw = self.index_to_soup('https://d.newsweek.com/json/' + widget, raw=True)
+        data = json.loads(raw)['items']
+        for item in data:
+            title = item['title']
+            url = BASE + item['link']
+            self.log(title, url)
+            sections[item['label']].append(
+                {
+                    'title': title,
+                    'url': url,
+                    'description': item['description'],
+                })

    def preprocess_html(self, soup):
        # Parallax images in the articles are loaded as background images
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -90,6 +90,7 @@ class NewYorkTimes(BasicNewsRecipe):
    compress_news_images = True
    compress_news_images_auto_size = 5
    remove_attributes = ['style']
+    conversion_options = {'flow_size': 0}

    remove_tags = [
        dict(attrs={'aria-label':'tools'.split()}),
@ -266,11 +267,16 @@ class NewYorkTimes(BasicNewsRecipe):
            if article.get('description'):
                self.log('\t\t', article['description'])

-        container = soup.find(itemtype='http://schema.org/CollectionPage')
-        container.find('header').extract()
-        div = container.find('div')
-        for section in div.findAll('section'):
-            for ol in section.findAll('ol'):
+        cid = slug.split('/')[-1]
+        if cid == 'dining':
+            cid = 'food'
+        try:
+            container = soup.find(id='collection-{}'.format(cid)).find('section')
+        except AttributeError:
+            container = None
+        if container is None:
+            raise ValueError('Failed to find articles container for slug: {}'.format(slug))
+        for ol in container.findAll('ol'):
            for article in self.parse_article_group(ol):
                log(article)
                yield article
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -90,6 +90,7 @@ class NewYorkTimes(BasicNewsRecipe):
    compress_news_images = True
    compress_news_images_auto_size = 5
    remove_attributes = ['style']
+    conversion_options = {'flow_size': 0}

    remove_tags = [
        dict(attrs={'aria-label':'tools'.split()}),
@ -266,11 +267,16 @@ class NewYorkTimes(BasicNewsRecipe):
            if article.get('description'):
                self.log('\t\t', article['description'])

-        container = soup.find(itemtype='http://schema.org/CollectionPage')
-        container.find('header').extract()
-        div = container.find('div')
-        for section in div.findAll('section'):
-            for ol in section.findAll('ol'):
+        cid = slug.split('/')[-1]
+        if cid == 'dining':
+            cid = 'food'
+        try:
+            container = soup.find(id='collection-{}'.format(cid)).find('section')
+        except AttributeError:
+            container = None
+        if container is None:
+            raise ValueError('Failed to find articles container for slug: {}'.format(slug))
+        for ol in container.findAll('ol'):
            for article in self.parse_article_group(ol):
                log(article)
                yield article
--- a/recipes/rossijkaja_gazeta.recipe
+++ b/recipes/rossijkaja_gazeta.recipe
@ -1,72 +0,0 @@
-# vim:fileencoding=utf-8
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdjectiveSpecies(BasicNewsRecipe):
-    title = u'Российская Газета'
-    __author__ = 'bug_me_not'
-    cover_url = u'http://img.rg.ru/img/d/logo2012.png'
-    description = 'Российская Газета'
-    publisher = 'Правительство Российской Федерации'
-    category = 'news'
-    language = 'ru'
-    no_stylesheets = True
-    remove_javascript = True
-    oldest_article = 300
-    max_articles_per_feed = 100
-
-    remove_tags_before = dict(name='h1')
-    remove_tags_after = dict(name='div', attrs={'class': 'ar-citate'})
-    remove_tags = [dict(name='div', attrs={'class': 'insert_left'}),
-                   dict(name='a', attrs={'href': '#comments'}),
-                   dict(name='div', attrs={'class': 'clear'}),
-                   dict(name='div', attrs={'class': 'ar-citate'}),
-                   dict(name='div', attrs={'class': 'ar-social red'}),
-                   dict(name='div', attrs={'class': 'clear clear-head'}), ]
-
-    feeds = [
-        (u'Все материалы', u'http://www.rg.ru/tema/rss.xml'),
-        (u'Еженедельный выпуск',
-         u'http://www.rg.ru/tema/izd-subbota/rss.xml'),
-        (u'Государство',
-         u'http://www.rg.ru/tema/gos/rss.xml'),
-        (u'Экономика',
-         u'http://www.rg.ru/tema/ekonomika/rss.xml'),
-        (u'Бизнес',
-         u'http://www.rg.ru/tema/izd-biznes/rss.xml'),
-        (u'В мире', u'http://www.rg.ru/tema/mir/rss.xml'),
-        (u'Происшествия',
-         u'http://www.rg.ru/tema/bezopasnost/rss.xml'),
-        (u'Общество',
-         u'http://www.rg.ru/tema/obshestvo/rss.xml'),
-        (u'Культура',
-         u'http://www.rg.ru/tema/kultura/rss.xml'),
-        (u'Спорт', u'http://www.rg.ru/tema/sport/rss.xml'),
-        (u'Документы', u'http://rg.ru/tema/doc-any/rss.xml'),
-        (u'РГ: Башкортостан',
-         u'http://www.rg.ru/org/filial/bashkortostan/rss.xml'),
-        (u'РГ: Волга-Кама',
-         u'http://www.rg.ru/org/filial/volga-kama/rss.xml'),
-        (u'РГ: Восточная Сибирь',
-         u'http://www.rg.ru/org/filial/enisey/rss.xml'),
-        (u'РГ: Дальний Восток',
-         u'http://www.rg.ru/org/filial/dvostok/rss.xml'),
-        (u'РГ: Кубань. Северный Кавказ',
-         u'http://www.rg.ru/org/filial/kuban/rss.xml'),
-        (u'РГ: Пермский край',
-         u'http://www.rg.ru/org/filial/permkray/rss.xml'),
-        (u'РГ: Приволжье',
-         u'http://www.rg.ru/org/filial/privolzhe/rss.xml'),
-        (u'РГ: Северо-Запад',
-         u'http://www.rg.ru/org/filial/szapad/rss.xml'),
-        (u'РГ: Сибирь',
-         u'http://www.rg.ru/org/filial/sibir/rss.xml'),
-        (u'РГ: Средняя Волга',
-         u'http://www.rg.ru/org/filial/svolga/rss.xml'),
-        (u'РГ: Урал и Западная Сибирь',
-         u'http://www.rg.ru/org/filial/ural/rss.xml'),
-        (u'РГ: Центральная Россия',
-         u'http://www.rg.ru/org/filial/roscentr/rss.xml'),
-        (u'РГ: Юг России',
-         u'http://www.rg.ru/org/filial/jugrossii/rss.xml'),
-    ]
--- a/recipes/spectator-au.recipe
+++ b/recipes/spectator-au.recipe
@ -0,0 +1,51 @@
+__license__ = 'GPL v3'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
+'''
+https://www.spectator.com.au/
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class SpectatorAU(BasicNewsRecipe):
+    title = 'Spectator Australia'
+    __author__ = 'Pat Stapleton, Dean Cording, James Cridland'
+    description = 'Spectator Australia is an Australian edition of The Spectator, first published in the UK in July 1828.'
+    masthead_url = 'https://www.spectator.com.au/content/themes/spectator-australia/assets/images/spec-aus-logo.png'
+    cover_url = 'https://spectator.imgix.net/content/uploads/2015/10/Spectator-Australia-Logo.jpg'
+    oldest_article = 7
+    handle_gzip = True
+    no_stylesheets = True
+    use_embedded_content = False
+    scale_news_images_to_device = True
+    encoding = 'utf8'
+    publisher = 'Spectator Australia'
+    category = 'Australia,News'
+    language = 'en_AU'
+    publication_type = 'newspaper'
+    extra_css = '.article-header__author{margin-bottom:20px;}'
+    conversion_options = {
+        'comments': description,
+        'tags': category,
+        'language': language,
+        'publisher': publisher,
+        'linearize_tables': False
+    }
+
+    keep_only_tags = [dict(attrs={'class': ['article']})]
+    remove_tags = [
+        dict(
+            attrs={
+                'class': [
+                    'big-author', 'article-header__category', 'margin-menu',
+                    'related-stories', 'disqus_thread', 'middle-promo',
+                    'show-comments', 'article-tags'
+                ]
+            }
+        ),
+        dict(name=['h4', 'hr'])
+    ]
+    remove_attributes = ['width', 'height']
+
+    feeds = [
+        ('Spectator Australia', 'https://www.spectator.com.au/feed/'),
+    ]
--- a/recipes/spectator_magazine.recipe
+++ b/recipes/spectator_magazine.recipe
@ -1,10 +1,19 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import json
+import re
+
+from mechanize import Request
+
 from calibre.web.feeds.recipes import BasicNewsRecipe


-def class_sel(cls):
-    def f(x):
-        return x and cls in x.split()
-    return f
+def absolutize(url):
+    return 'https://spectator.co.uk' + url


 class Spectator(BasicNewsRecipe):
@ -15,52 +24,100 @@ class Spectator(BasicNewsRecipe):
    language = 'en'

    no_stylesheets = True
-
-    keep_only_tags = dict(name='div', attrs={
-                          'class': ['article-header__text', 'featured-image', 'article-content']})
-    remove_tags = [
-        dict(name='div', attrs={'id': ['disqus_thread']}),
-        dict(attrs={'class': ['middle-promo',
-                              'sharing', 'mejs-player-holder']}),
-        dict(name='a', onclick=lambda x: x and '__gaTracker' in x and 'outbound-article' in x),
-    ]
-    remove_tags_after = [
-        dict(name='hr', attrs={'class': 'sticky-clear'}),
-    ]
-
-    def parse_spec_section(self, div):
-        h2 = div.find('h2')
-        sectitle = self.tag_to_string(h2)
-        self.log('Section:', sectitle)
-        articles = []
-        for div in div.findAll('div', id=lambda x: x and x.startswith('post-')):
-            h2 = div.find('h2', attrs={'class': class_sel('term-item__title')})
-            if h2 is None:
-                h2 = div.find(attrs={'class': class_sel('news-listing__title')})
-            title = self.tag_to_string(h2)
-            a = h2.find('a')
-            url = a['href']
-            desc = ''
-            self.log('\tArticle:', title)
-            p = div.find(attrs={'class': class_sel('term-item__excerpt')})
-            if p is not None:
-                desc = self.tag_to_string(p)
-            articles.append({'title': title, 'url': url, 'description': desc})
-        return sectitle, articles
+    use_embedded_content = True

    def parse_index(self):
-        soup = self.index_to_soup('https://www.spectator.co.uk/magazine/')
-        a = soup.find('a', attrs={'class': 'issue-details__cover-link'})
-        self.timefmt = ' [%s]' % a['title']
-        self.cover_url = a['href']
-        if self.cover_url.startswith('//'):
-            self.cover_url = 'http:' + self.cover_url
+        br = self.get_browser()
+        main_js = br.open_novisit('https://spectator.co.uk/main.js').read().decode('utf-8')
+        data = {}
+        fields = ('apiKey', 'apiSecret', 'contentEnvironment', 'siteUrl', 'magazineIssueContentUrl', 'contentUrl')
+        pat = r'this.({})\s*=\s*"(.+?)"'.format('|'.join(fields))
+        for m in re.finditer(pat, main_js):
+            data[m.group(1)] = m.group(2)
+        self.log('Got Spectator data:', data)
+        headers = {
+            'api_key': data['apiKey'],
+            'origin': data['siteUrl'],
+            'access_token': data['apiSecret'],
+            'Accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
+            'Accept-encoding': 'gzip, deflate',
+            'Accept': '*/*',
+        }

-        feeds = []
+        def make_url(utype, query, includes=(), limit=None):
+            ans = data[utype] + '/entries?environment=' + data['contentEnvironment']
+            if limit is not None:
+                ans += '&limit={}'.format(limit)
+            for inc in includes:
+                ans += '&include[]=' + inc
+            ans += '&query=' + json.dumps(query)
+            return ans

-        div = soup.find(attrs={'class': class_sel('content-area')})
-        for x in div.findAll(attrs={'class': class_sel('magazine-section-holder')}):
-            title, articles = self.parse_spec_section(x)
-            if articles:
-                feeds.append((title, articles))
-        return feeds
+        def get_result(url):
+            self.log('Fetching:', url)
+            req = Request(url, headers=headers)
+            raw = br.open_novisit(req).read().decode('utf-8')
+            return json.loads(raw)['entries']
+
+        # Get current issue
+        url = data['magazineIssueContentUrl'] + '/entries?environment=' + data['contentEnvironment'] + "&desc=issue_date&limit=1&only[BASE][]=url"
+        result = get_result(url)
+        slug = result[0]['url']
+        uid = result[0]['uid']  # noqa
+        date = slug.split('/')[-1]
+        self.log('Downloading issue:', date)
+
+        # Cover information
+        url = make_url(
+            'magazineIssueContentUrl',
+            {'url': slug},
+            limit=1
+        )
+        self.cover_url = get_result(url)[0]['magazine_cover']['url']
+        self.log('Found cover:', self.cover_url)
+
+        # List of articles
+        url = make_url(
+            'contentUrl',
+            {
+                "magazine_content_production_only.magazine_issue": {
+                    "$in_query": {"url": slug},
+                    "_content_type_uid": "magazine_issue"
+                },
+                "_content_type_uid": "article"
+            },
+            includes=(
+                'topic', 'magazine_content_production_only.magazine_issue',
+                'magazine_content_production_only.magazine_subsection', 'author'
+            )
+        )
+        result = get_result(url)
+        articles = {}
+        for entry in result:
+            title = entry['title']
+            url = absolutize(entry['url'])
+            blocks = []
+            a = blocks.append
+            byline = entry.get('byline') or ''
+            if byline:
+                a('<h3>{}</h3>'.format(byline))
+            if entry.get('author'):
+                for au in reversed(entry['author']):
+                    au = entry['author'][0]
+                    cac = ''
+                    if au.get('caricature'):
+                        cac = '<img src="{}">'.format(au['caricature']['url'])
+                    a('<div>{} <a href="{}>{}</a></div>'.format(cac, absolutize(au['url']), au['title']))
+            if entry.get('hero_image'):
+                hi = entry['hero_image'][0]
+                a('<div style="text-align: center"><img src="{}"></div>'.format(hi['url']))
+                if hi.get('description'):
+                    a('<div style="text-align: center; font-size: smaller">{}</div>'.format(hi['description']))
+            a(entry['text_body'])
+            section = 'Unknown'
+            if entry.get('topic'):
+                topic = entry['topic'][0]
+                section = topic['title']
+            articles.setdefault(section, []).append({
+                'title': title, 'url': url, 'description': byline, 'content': '\n\n'.join(blocks)})
+        return [(sec, articles[sec]) for sec in sorted(articles)]
--- a/recipes/the_baffler.recipe
+++ b/recipes/the_baffler.recipe
@ -15,7 +15,7 @@ class TheBaffler(BasicNewsRecipe):
    __author__ = 'Jose Ortiz'
    description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
                   ' stories, poems and art.  They publish six print issues annually.')
-    language = 'en_US'
+    language = 'en'
    encoding = 'UTF-8'
    no_javascript = True
    no_stylesheets = True
--- a/recipes/vedomosti.recipe
+++ b/recipes/vedomosti.recipe
@ -1,207 +0,0 @@
-#!/usr/bin/env  python2
-
-u'''
-Ведомости
-'''
-
-from calibre.web.feeds.feedparser import parse
-from calibre.ebooks.BeautifulSoup import Tag
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-def new_tag(soup, name, attrs=()):
-    impl = getattr(soup, 'new_tag', None)
-    if impl is not None:
-        return impl(name, attrs=dict(attrs))
-    return Tag(soup, name, attrs=attrs or None)
-
-
-class VedomostiRecipe(BasicNewsRecipe):
-    title = u'Ведомости'
-    __author__ = 'Nikolai Kotchetkov'
-    publisher = 'vedomosti.ru'
-    category = 'press, Russia'
-    description = u'Ежедневная деловая газета'
-    oldest_article = 3
-    max_articles_per_feed = 100
-
-    masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
-    cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
-
-    # Add feed names if you want them to be sorted (feeds of this list appear
-    # first)
-    sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
-
-    encoding = 'cp1251'
-    language = 'ru'
-    no_stylesheets = True
-    remove_javascript = True
-    recursions = 0
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
-
-    keep_only_tags = [dict(name='td', attrs={'class': ['second_content']})]
-
-    remove_tags_after = [dict(name='div', attrs={'class': 'article_text'})]
-
-    remove_tags = [
-        dict(name='div', attrs={'class': ['sep', 'choice', 'articleRightTbl']})]
-
-    feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
-
-    # base URL for relative links
-    base_url = u'http://www.vedomosti.ru'
-
-    extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
-                'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
-                'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
-                '.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
-                '.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
-                '.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
-                '.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
-                '.article_desc {font-size: 1em; font-style:italic;}'
-
-    def parse_index(self):
-        try:
-            feedData = parse(self.feeds[0])
-            if not feedData:
-                raise NotImplementedError
-            self.log("parse_index: Feed loaded successfully.")
-            try:
-                if feedData.feed.title:
-                    self.title = feedData.feed.title
-                    self.log("parse_index: Title updated to: ", self.title)
-            except Exception:
-                pass
-            try:
-                if feedData.feed.description:
-                    self.description = feedData.feed.description
-                    self.log("parse_index: Description updated to: ",
-                            self.description)
-            except Exception:
-                pass
-
-            def get_virtual_feed_articles(feed):
-                if feed in feeds:
-                    return feeds[feed][1]
-                self.log("Adding new feed: ", feed)
-                articles = []
-                feeds[feed] = (feed, articles)
-                return articles
-
-            feeds = {}
-
-            # Iterate feed items and distribute articles using tags
-            for item in feedData.entries:
-                link = item.get('link', '')
-                title = item.get('title', '')
-                if '' == link or '' == title:
-                    continue
-                article = {'title': title, 'url': link, 'description': item.get(
-                    'description', ''), 'date': item.get('date', ''), 'content': ''}
-                if not item.get('tags'):  # noqa
-                    get_virtual_feed_articles('_default').append(article)
-                    continue
-                for tag in item.tags:
-                    addedToDefault = False
-                    term = tag.get('term', '')
-                    if '' == term:
-                        if (not addedToDefault):
-                            get_virtual_feed_articles(
-                                '_default').append(article)
-                        continue
-                    get_virtual_feed_articles(term).append(article)
-
-            # Get feed list
-            # Select sorted feeds first of all
-            result = []
-            for feedName in self.sortOrder:
-                if (not feeds.get(feedName)):
-                    continue
-                result.append(feeds[feedName])
-                del feeds[feedName]
-            result = result + feeds.values()
-
-            return result
-
-        except Exception as err:
-            self.log(err)
-            raise NotImplementedError
-
-    def preprocess_html(self, soup):
-        return self.adeify_images(soup)
-
-    def postprocess_html(self, soup, first_fetch):
-
-        # Find article
-        contents = soup.find('div', {'class': ['article_text']})
-        if not contents:
-            self.log('postprocess_html: article div not found!')
-            return soup
-        contents.extract()
-
-        # Find title
-        title = soup.find('h1')
-        if title:
-            contents.insert(0, title)
-
-        # Find article image
-        newstop = soup.find('div', {'class': ['newstop']})
-        if newstop:
-            img = newstop.find('img')
-            if img:
-                imgDiv = new_tag(soup, 'div')
-                imgDiv['class'] = 'article_img'
-
-                if img.get('width'):
-                    del(img['width'])
-                if img.get('height'):
-                    del(img['height'])
-
-                # find description
-                element = img.parent.nextSibling
-
-                img.extract()
-                imgDiv.insert(0, img)
-
-                while element:
-                    if not isinstance(element, Tag):
-                        continue
-                    nextElement = element.nextSibling
-                    if 'p' == element.name:
-                        element.extract()
-                        element['class'] = 'article_img_desc'
-                        imgDiv.insert(len(imgDiv.contents), element)
-                    element = nextElement
-
-                contents.insert(1, imgDiv)
-
-        # find article abstract
-        abstract = soup.find('p', {'class': ['subhead']})
-        if abstract:
-            abstract['class'] = 'article_desc'
-            contents.insert(2, abstract)
-
-        # Find article authors
-        authorsDiv = soup.find('div', {'class': ['autors']})
-        if authorsDiv:
-            authorsP = authorsDiv.find('p')
-            if authorsP:
-                authorsP['class'] = 'article_authors'
-                contents.insert(len(contents.contents), authorsP)
-
-        # Fix urls that use relative path
-        urls = contents.findAll('a', href=True)
-        if urls:
-            for url in urls:
-                if '/' == url['href'][0]:
-                    url['href'] = self.base_url + url['href']
-
-        body = soup.find('td', {'class': ['second_content']})
-        if body:
-            body.replaceWith(contents)
-
-        self.log('Result: ', soup.prettify())
-        return soup
--- a/recipes/wired.recipe
+++ b/recipes/wired.recipe
@ -4,6 +4,7 @@ __copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
 www.wired.com
 '''

+from calibre import browser
 from calibre.web.feeds.news import BasicNewsRecipe


@ -80,3 +81,17 @@ class WiredDailyNews(BasicNewsRecipe):
            articles.extend(self.parse_wired_index_page(baseurl.format(pagenum), seen))

        return [('Magazine Articles', articles)]
+
+    # Wired changes the content it delivers based on cookies, so the
+    # following ensures that we send no cookies
+    def get_browser(self, *args, **kwargs):
+        return self
+
+    def clone_browser(self, *args, **kwargs):
+        return self.get_browser()
+
+    def open_novisit(self, *args, **kwargs):
+        br = browser()
+        return br.open_novisit(*args, **kwargs)
+
+    open = open_novisit
--- a/recipes/wired_daily.recipe
+++ b/recipes/wired_daily.recipe
@ -4,6 +4,7 @@ __copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
 www.wired.com
 '''

+from calibre import browser
 from calibre.web.feeds.news import BasicNewsRecipe


@ -66,3 +67,17 @@ class WiredDailyNews(BasicNewsRecipe):

    def get_article_url(self, article):
        return article.get('link',  None)
+
+    # Wired changes the content it delivers based on cookies, so the
+    # following ensures that we send no cookies
+    def get_browser(self, *args, **kwargs):
+        return self
+
+    def clone_browser(self, *args, **kwargs):
+        return self.get_browser()
+
+    def open_novisit(self, *args, **kwargs):
+        br = browser()
+        return br.open_novisit(*args, **kwargs)
+
+    open = open_novisit
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@ -40,7 +40,7 @@

 /* blocks */

-html, div, map, dt, isindex, form {
+div, map, dt, isindex, form {
  display: block;
 }

--- a/setup/check.py
+++ b/setup/check.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys, os, json, subprocess, errno, hashlib
+import os, json, subprocess, errno, hashlib
 from setup import Command, build_cache_dir, edit_file, dump_json


@ -82,10 +82,8 @@ class Check(Command):
            p = subprocess.Popen(['rapydscript', 'lint', f])
            return p.wait() != 0
        if ext == '.yaml':
-            sys.path.insert(0, self.wn_path)
-            import whats_new
-            whats_new.render_changelog(self.j(self.d(self.SRC), 'Changelog.yaml'))
-            sys.path.remove(self.wn_path)
+            p = subprocess.Popen(['python', self.j(self.wn_path, 'whats_new.py'), f])
+            return p.wait() != 0

    def run(self, opts):
        self.fhash_cache = {}
--- a/setup/hosting.py
+++ b/setup/hosting.py
@ -102,7 +102,7 @@ class SourceForge(Base):  # {{{
            for i in range(5):
                try:
                    check_call([
-                        'rsync', '-h', '-z', '--progress', '-e', 'ssh -x', x,
+                        'rsync', '-h', '-zz', '--progress', '-e', 'ssh -x', x,
                        '%s,%s@frs.sourceforge.net:%s' %
                        (self.username, self.project, self.rdir + '/')
                    ])
--- a/setup/plugins_mirror.py
+++ b/setup/plugins_mirror.py
@ -10,7 +10,6 @@ import bz2
 import errno
 import glob
 import gzip
-import HTMLParser
 import io
 import json
 import os
@ -22,8 +21,6 @@ import subprocess
 import sys
 import tempfile
 import time
-import urllib2
-import urlparse
 import zipfile
 import zlib
 from collections import namedtuple
@ -33,6 +30,24 @@ from email.utils import parsedate
 from functools import partial
 from multiprocessing.pool import ThreadPool
 from xml.sax.saxutils import escape, quoteattr
+
+try:
+    from html import unescape as u
+except ImportError:
+    from HTMLParser import HTMLParser
+    u = HTMLParser().unescape
+
+try:
+    from urllib.parse import parse_qs, urlparse
+except ImportError:
+    from urlparse import parse_qs, urlparse
+
+
+try:
+    from urllib.error import URLError
+    from urllib.request import urlopen, Request, build_opener
+except Exception:
+    from urllib2 import urlopen, Request, build_opener, URLError
 # }}}

 USER_AGENT = 'calibre mirror'
@ -44,15 +59,13 @@ INDEX = MR_URL + 'showpost.php?p=1362767&postcount=1'
 # INDEX = 'file:///t/raw.html'

 IndexEntry = namedtuple('IndexEntry', 'name url donate history uninstall deprecated thread_id')
-u = HTMLParser.HTMLParser().unescape
-
 socket.setdefaulttimeout(30)


 def read(url, get_info=False):  # {{{
    if url.startswith("file://"):
-        return urllib2.urlopen(url).read()
-    opener = urllib2.build_opener()
+        return urlopen(url).read()
+    opener = build_opener()
    opener.addheaders = [
        ('User-Agent', USER_AGENT),
        ('Accept-Encoding', 'gzip,deflate'),
@ -62,7 +75,7 @@ def read(url, get_info=False):  # {{{
        try:
            res = opener.open(url)
            break
-        except urllib2.URLError as e:
+        except URLError as e:
            if not isinstance(e.reason, socket.timeout) or i == 9:
                raise
            time.sleep(random.randint(10, 45))
@ -82,7 +95,7 @@ def read(url, get_info=False):  # {{{


 def url_to_plugin_id(url, deprecated):
-    query = urlparse.parse_qs(urlparse.urlparse(url).query)
+    query = parse_qs(urlparse(url).query)
    ans = (query['t'] if 't' in query else query['p'])[0]
    if deprecated:
        ans += '-deprecated'
@ -149,11 +162,13 @@ def convert_node(fields, x, names={}, import_data=None):
        return x.s.decode('utf-8') if isinstance(x.s, bytes) else x.s
    elif name == 'Num':
        return x.n
+    elif name == 'Constant':
+        return x.value
    elif name in {'Set', 'List', 'Tuple'}:
        func = {'Set':set, 'List':list, 'Tuple':tuple}[name]
-        return func(map(conv, x.elts))
+        return func(list(map(conv, x.elts)))
    elif name == 'Dict':
-        keys, values = map(conv, x.keys), map(conv, x.values)
+        keys, values = list(map(conv, x.keys)), list(map(conv, x.values))
        return dict(zip(keys, values))
    elif name == 'Call':
        if len(x.args) != 1 and len(x.keywords) != 0:
@ -182,7 +197,7 @@ def get_import_data(name, mod, zf, names):
    if mod in names:
        raw = zf.open(names[mod]).read()
        module = ast.parse(raw, filename='__init__.py')
-        top_level_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(module))
+        top_level_assigments = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'Assign']
        for node in top_level_assigments:
            targets = {getattr(t, 'id', None) for t in node.targets}
            targets.discard(None)
@ -196,9 +211,9 @@ def get_import_data(name, mod, zf, names):

 def parse_metadata(raw, namelist, zf):
    module = ast.parse(raw, filename='__init__.py')
-    top_level_imports = filter(lambda x:x.__class__.__name__ == 'ImportFrom', ast.iter_child_nodes(module))
-    top_level_classes = tuple(filter(lambda x:x.__class__.__name__ == 'ClassDef', ast.iter_child_nodes(module)))
-    top_level_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(module))
+    top_level_imports = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'ImportFrom']
+    top_level_classes = tuple(x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'ClassDef')
+    top_level_assigments = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'Assign']
    defaults = {
        'name':'', 'description':'',
        'supported_platforms':['windows', 'osx', 'linux'],
@ -226,7 +241,7 @@ def parse_metadata(raw, namelist, zf):
                plugin_import_found |= inames
            else:
                all_imports.append((mod, [n.name for n in names]))
-                imported_names[n.asname or n.name] = mod
+                imported_names[names[-1].asname or names[-1].name] = mod
    if not plugin_import_found:
        return all_imports

@ -245,7 +260,7 @@ def parse_metadata(raw, namelist, zf):
                names[x] = val

    def parse_class(node):
-        class_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(node))
+        class_assigments = [x for x in ast.iter_child_nodes(node) if x.__class__.__name__ == 'Assign']
        found = {}
        for node in class_assigments:
            targets = {getattr(t, 'id', None) for t in node.targets}
@ -337,7 +352,7 @@ def update_plugin_from_entry(plugin, entry):

 def fetch_plugin(old_index, entry):
    lm_map = {plugin['thread_id']:plugin for plugin in old_index.values()}
-    raw = read(entry.url)
+    raw = read(entry.url).decode('utf-8', 'replace')
    url, name = parse_plugin_zip_url(raw)
    if url is None:
        raise ValueError('Failed to find zip file URL for entry: %s' % repr(entry))
@ -346,9 +361,9 @@ def fetch_plugin(old_index, entry):
    if plugin is not None:
        # Previously downloaded plugin
        lm = datetime(*tuple(map(int, re.split(r'\D', plugin['last_modified'])))[:6])
-        request = urllib2.Request(url)
+        request = Request(url)
        request.get_method = lambda : 'HEAD'
-        with closing(urllib2.urlopen(request)) as response:
+        with closing(urlopen(request)) as response:
            info = response.info()
        slm = datetime(*parsedate(info.get('Last-Modified'))[:6])
        if lm >= slm:
@ -413,7 +428,7 @@ def fetch_plugins(old_index):
            src = plugin['file']
            plugin['file'] = src.partition('_')[-1]
            os.rename(src, plugin['file'])
-    raw = bz2.compress(json.dumps(ans, sort_keys=True, indent=4, separators=(',', ': ')))
+    raw = bz2.compress(json.dumps(ans, sort_keys=True, indent=4, separators=(',', ': ')).encode('utf-8'))
    atomic_write(raw, PLUGINS)
    # Cleanup any extra .zip files
    all_plugin_files = {p['file'] for p in ans.values()}
@ -503,7 +518,7 @@ h1 { text-align: center }
        name, count = x
        return '<tr><td>%s</td><td>%s</td></tr>\n' % (escape(name), count)

-    pstats = map(plugin_stats, sorted(stats.items(), reverse=True, key=lambda x:x[1]))
+    pstats = list(map(plugin_stats, sorted(stats.items(), reverse=True, key=lambda x:x[1])))
    stats = '''\
 <!DOCTYPE html>
 <html>
--- a/setup/translations.py
+++ b/setup/translations.py
@ -100,8 +100,12 @@ class POT(Command):  # {{{
            root = json.load(f)
        entries = root['639-3']
        ans = []
-        for x in sorted(entries, key=lambda x:(x.get('name') or '').lower()):
-            name = x.get('name')
+
+        def name_getter(x):
+            return x.get('inverted_name') or x.get('name')
+
+        for x in sorted(entries, key=lambda x:name_getter(x).lower()):
+            name = name_getter(x)
            if name:
                ans.append(u'msgid "{}"'.format(name))
                ans.append('msgstr ""')
@ -849,7 +853,7 @@ class ISO639(Command):  # {{{
                threeb = unicode_type(threeb)
            if threeb is None:
                continue
-            name = x.get('name')
+            name = x.get('inverted_name') or x.get('name')
            if name:
                name = unicode_type(name)
            if not name or name[0] in '!~=/\'"':
--- a/setup/upload.py
+++ b/setup/upload.py
@ -123,7 +123,7 @@ def get_fosshub_data():

 def send_data(loc):
    subprocess.check_call([
-        'rsync', '--inplace', '--delete', '-r', '-z', '-h', '--progress', '-e',
+        'rsync', '--inplace', '--delete', '-r', '-zz', '-h', '--progress', '-e',
        'ssh -x', loc + '/', '%s@%s:%s' % (STAGING_USER, STAGING_HOST, STAGING_DIR)
    ])

--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -6,7 +6,7 @@ from polyglot.builtins import map, unicode_type, environ_item, hasenv, getenv, a
 import sys, locale, codecs, os, importlib, collections

 __appname__   = 'calibre'
-numeric_version = (4, 10, 1)
+numeric_version = (4, 12, 0)
 __version__   = '.'.join(map(unicode_type, numeric_version))
 git_version   = None
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,7 +9,7 @@ from calibre import guess_type
 from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase)
 from calibre.constants import numeric_version
-from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
+from calibre.ebooks.metadata.archive import ArchiveExtract, KPFExtract, get_comic_metadata
 from calibre.ebooks.html.to_zip import HTML2ZIP

 plugins = []
@ -124,7 +124,7 @@ class TXT2TXTZ(FileTypePlugin):
            return path_to_ebook


-plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,]
+plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract]
 # }}}

 # Metadata reader plugins {{{
@ -1738,15 +1738,6 @@ class StoreNextoStore(StoreBase):
    affiliate = True


-class StoreOpenBooksStore(StoreBase):
-    name = 'Open Books'
-    description = 'Comprehensive listing of DRM free e-books from a variety of sources provided by users of calibre.'
-    actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore'
-
-    drm_free_only = True
-    headquarters = 'US'
-
-
 class StoreOzonRUStore(StoreBase):
    name = 'OZON.ru'
    description = 'e-books from OZON.ru'
@ -1910,7 +1901,6 @@ plugins += [
    StoreMillsBoonUKStore,
    StoreMobileReadStore,
    StoreNextoStore,
-    StoreOpenBooksStore,
    StoreOzonRUStore,
    StorePragmaticBookshelfStore,
    StorePublioStore,
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -26,7 +26,7 @@ from calibre.db.tables import VirtualTable
 from calibre.db.write import get_series_values, uniq
 from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata
 from calibre.ebooks import check_ebook_format
-from calibre.ebooks.metadata import string_to_authors, author_to_author_sort
+from calibre.ebooks.metadata import string_to_authors, author_to_author_sort, authors_to_sort_string
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
@ -1297,6 +1297,7 @@ class Cache(object):
        if set_title and mi.title:
            path_changed = True
            set_field('title', mi.title)
+        authors_changed = False
        if set_authors:
            path_changed = True
            if not mi.authors:
@ -1305,6 +1306,7 @@ class Cache(object):
            for a in mi.authors:
                authors += string_to_authors(a)
            set_field('authors', authors)
+            authors_changed = True

        if path_changed:
            self._update_path({book_id})
@ -1339,7 +1341,13 @@ class Cache(object):
                    if val is not None:
                        protected_set_field(field, val)

-                for field in ('author_sort', 'publisher', 'series', 'tags', 'comments',
+                val = mi.get('author_sort', None)
+                if authors_changed and (not val or mi.is_null('author_sort')):
+                    val = authors_to_sort_string(mi.authors)
+                if authors_changed or (force_changes and val is not None) or not mi.is_null('author_sort'):
+                    protected_set_field('author_sort', val)
+
+                for field in ('publisher', 'series', 'tags', 'comments',
                    'languages', 'pubdate'):
                    val = mi.get(field, None)
                    if (force_changes and val is not None) or not mi.is_null(field):
--- a/src/calibre/db/cli/cmd_list.py
+++ b/src/calibre/db/cli/cmd_list.py
@ -13,7 +13,7 @@ from calibre import prints
 from calibre.db.cli.utils import str_width
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.date import isoformat
-from polyglot.builtins import iteritems, unicode_type, map
+from polyglot.builtins import as_bytes, iteritems, map, unicode_type

 readonly = True
 version = 0  # change this if you change signature of implementation()
@ -203,6 +203,8 @@ def do_list(
    )
    with ColoredStream(sys.stdout, fg='green'):
        prints(''.join(titles))
+    stdout = getattr(sys.stdout, 'buffer', sys.stdout)
+    linesep = as_bytes(os.linesep)

    wrappers = [TextWrapper(x - 1).wrap if x > 1 else lambda y: y for x in widths]

@ -213,12 +215,12 @@ def do_list(
        lines = max(map(len, text))
        for l in range(lines):
            for i, field in enumerate(text):
-                ft = text[i][l] if l < len(text[i]) else u''
-                sys.stdout.write(ft.encode('utf-8'))
+                ft = text[i][l] if l < len(text[i]) else ''
+                stdout.write(ft.encode('utf-8'))
                if i < len(text) - 1:
-                    filler = (u'%*s' % (widths[i] - str_width(ft) - 1, u''))
-                    sys.stdout.write((filler + separator).encode('utf-8'))
-            print()
+                    filler = ('%*s' % (widths[i] - str_width(ft) - 1, ''))
+                    stdout.write((filler + separator).encode('utf-8'))
+            stdout.write(linesep)


 def option_parser(get_parser, args):
--- a/src/calibre/db/cli/cmd_show_metadata.py
+++ b/src/calibre/db/cli/cmd_show_metadata.py
@ -49,8 +49,9 @@ def main(opts, args, dbctx):
    if mi is None:
        raise SystemExit('Id #%d is not present in database.' % id)
    if opts.as_opf:
+        stdout = getattr(sys.stdout, 'buffer', sys.stdout)
        mi = OPFCreator(getcwd(), mi)
-        mi.render(sys.stdout)
+        mi.render(stdout)
    else:
        prints(unicode_type(mi))

--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@ -11,6 +11,7 @@ from functools import partial
 from io import BytesIO

 from calibre.ebooks.metadata import author_to_author_sort, title_sort
+from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import UNDEFINED_DATE
 from calibre.db.tests.base import BaseTest, IMG
 from polyglot.builtins import iteritems, itervalues, unicode_type
@ -421,13 +422,13 @@ class WritingTest(BaseTest):
        cache.set_metadata(2, mi)
        nmi = cache.get_metadata(2, get_cover=True, cover_as_data=True)
        ae(oldmi.cover_data, nmi.cover_data)
-        self.compare_metadata(nmi, oldmi, exclude={'last_modified', 'format_metadata'})
+        self.compare_metadata(nmi, oldmi, exclude={'last_modified', 'format_metadata', 'formats'})
        cache.set_metadata(1, mi2, force_changes=True)
        nmi2 = cache.get_metadata(1, get_cover=True, cover_as_data=True)
        # The new code does not allow setting of #series_index to None, instead
        # it is reset to 1.0
        ae(nmi2.get_extra('#series'), 1.0)
-        self.compare_metadata(nmi2, oldmi2, exclude={'last_modified', 'format_metadata', '#series_index'})
+        self.compare_metadata(nmi2, oldmi2, exclude={'last_modified', 'format_metadata', '#series_index', 'formats'})

        cache = self.init_cache(self.cloned_library)
        mi = cache.get_metadata(1)
@ -436,6 +437,12 @@ class WritingTest(BaseTest):
        cache.set_metadata(3, mi)
        self.assertEqual(set(otags), set(cache.field_for('tags', 3)), 'case changes should not be allowed in set_metadata')

+        # test that setting authors without author sort results in an
+        # auto-generated authors sort
+        mi = Metadata('empty', ['a1', 'a2'])
+        cache.set_metadata(1, mi)
+        self.assertEqual('a1 & a2', cache.field_for('author_sort', 1))
+
    # }}}

    def test_conversion_options(self):  # {{{
--- a/src/calibre/devices/kobo/books.py
+++ b/src/calibre/devices/kobo/books.py
@ -7,16 +7,14 @@ import os, time, sys
 from functools import cmp_to_key

 from calibre.constants import preferred_encoding, DEBUG, ispy3
-from calibre import isbytestring, force_unicode
-from calibre.utils.icu import sort_key
+from calibre import isbytestring

 from calibre.ebooks.metadata.book.base import Metadata
-from calibre.devices.usbms.books import Book as Book_
-from calibre.devices.usbms.books import CollectionsBookList
+from calibre.devices.usbms.books import Book as Book_, CollectionsBookList, none_cmp
 from calibre.utils.config_base import prefs
 from calibre.devices.usbms.driver import debug_print
 from calibre.ebooks.metadata import author_to_author_sort
-from polyglot.builtins import unicode_type, string_or_bytes, iteritems, itervalues, cmp
+from polyglot.builtins import unicode_type, iteritems, itervalues


 class Book(Book_):
@ -72,6 +70,7 @@ class Book(Book_):
        self.can_put_on_shelves = True
        self.kobo_series        = None
        self.kobo_series_number = None  # Kobo stores the series number as string. And it can have a leading "#".
+        self.kobo_series_id     = None
        self.kobo_subtitle      = None

        if thumbnail_name is not None:
@ -86,6 +85,10 @@ class Book(Book_):
        # If we don't have a content Id, we don't know what type it is.
        return self.contentID and self.contentID.startswith("file")

+    @property
+    def has_kobo_series(self):
+        return self.kobo_series is not None
+
    @property
    def is_purchased_kepub(self):
        return self.contentID and not self.contentID.startswith("file")
@ -104,6 +107,8 @@ class Book(Book_):
            fmt('Content ID', self.contentID)
        if self.kobo_series:
            fmt('Kobo Series', self.kobo_series + ' #%s'%self.kobo_series_number)
+        if self.kobo_series_id:
+            fmt('Kobo Series ID', self.kobo_series_id)
        if self.kobo_subtitle:
            fmt('Subtitle', self.kobo_subtitle)
        if self.mime:
@ -292,24 +297,6 @@ class KTCollectionsBookList(CollectionsBookList):
        # Sort collections
        result = {}

-        def none_cmp(xx, yy):
-            x = xx[1]
-            y = yy[1]
-            if x is None and y is None:
-                # No sort_key needed here, because defaults are ascii
-                return cmp(xx[2], yy[2])
-            if x is None:
-                return 1
-            if y is None:
-                return -1
-            if isinstance(x, string_or_bytes) and isinstance(y, string_or_bytes):
-                x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
-            c = cmp(x, y)
-            if c != 0:
-                return c
-            # same as above -- no sort_key needed here
-            return cmp(xx[2], yy[2])
-
        for category, lpaths in iteritems(collections):
            books = sorted(itervalues(lpaths), key=cmp_to_key(none_cmp))
            result[category] = [x[0] for x in books]
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -83,7 +83,7 @@ class KOBO(USBMS):

    dbversion = 0
    fwversion = (0,0,0)
-    supported_dbversion = 156
+    supported_dbversion = 158
    has_kepubs = False

    supported_platforms = ['windows', 'osx', 'linux']
@ -1349,7 +1349,7 @@ class KOBOTOUCH(KOBO):
        ' Based on the existing Kobo driver by %s.') % KOBO.author
 #    icon        = I('devices/kobotouch.jpg')

-    supported_dbversion             = 157
+    supported_dbversion             = 158
    min_supported_dbversion         = 53
    min_dbversion_series            = 65
    min_dbversion_externalid        = 65
@ -1357,11 +1357,12 @@ class KOBOTOUCH(KOBO):
    min_dbversion_images_on_sdcard  = 77
    min_dbversion_activity          = 77
    min_dbversion_keywords          = 82
+    min_dbversion_seriesid          = 136

    # Starting with firmware version 3.19.x, the last number appears to be is a
    # build number. A number will be recorded here but it can be safely ignored
    # when testing the firmware version.
-    max_supported_fwversion         = (4, 19, 14114)
+    max_supported_fwversion         = (4, 20, 14601)
    # The following document firwmare versions where new function or devices were added.
    # Not all are used, but this feels a good place to record it.
    min_fwversion_shelves           = (2, 0, 0)
@ -1377,11 +1378,13 @@ class KOBOTOUCH(KOBO):
    min_librah20_fwversion          = (4, 16, 13337)  # "Reviewers" release.
    min_fwversion_epub_location     = (4, 17, 13651)  # ePub reading location without full contentid.
    min_fwversion_dropbox           = (4, 18, 13737)  # The Forma only at this point.
+    min_fwversion_serieslist        = (4, 20, 14601)  # Series list needs the SeriesID to be set.

    has_kepubs = True

    booklist_class = KTCollectionsBookList
    book_class = Book
+    kobo_series_dict = {}

    MAX_PATH_LEN = 185  # 250 - (len(" - N3_LIBRARY_SHELF.parsed") + len("F:\.kobo\images\"))
    KOBO_EXTRA_CSSFILE = 'kobo_extra.css'
@ -1610,7 +1613,8 @@ class KOBOTOUCH(KOBO):
            bl_cache[b.lpath] = idx

        def update_booklist(prefix, path, ContentID, ContentType, MimeType, ImageID,
-                            title, authors, DateCreated, Description, Publisher, series, seriesnumber,
+                            title, authors, DateCreated, Description, Publisher,
+                            series, seriesnumber, SeriesID, SeriesNumberFloat,
                            ISBN, Language, Subtitle,
                            readstatus, expired, favouritesindex, accessibility, isdownloaded,
                            userid, bookshelves
@ -1747,10 +1751,16 @@ class KOBOTOUCH(KOBO):
                    bl[idx].kobo_metadata       = kobo_metadata
                    bl[idx].kobo_series         = series
                    bl[idx].kobo_series_number  = seriesnumber
+                    bl[idx].kobo_series_id      = SeriesID
                    bl[idx].kobo_subtitle       = Subtitle
                    bl[idx].can_put_on_shelves  = allow_shelves
                    bl[idx].mime                = MimeType

+                    if not bl[idx].is_sideloaded and bl[idx].has_kobo_series and SeriesID is not None:
+                        if show_debug:
+                            debug_print('KoboTouch:update_booklist - Have purchased kepub with series, saving SeriesID=', SeriesID)
+                        self.kobo_series_dict[series] = SeriesID
+
                    if lpath in playlist_map:
                        bl[idx].device_collections  = playlist_map.get(lpath,[])
                        bl[idx].current_shelves     = bookshelves
@ -1800,10 +1810,16 @@ class KOBOTOUCH(KOBO):
                    book.kobo_metadata      = kobo_metadata
                    book.kobo_series        = series
                    book.kobo_series_number = seriesnumber
+                    book.kobo_series_id     = SeriesID
                    book.kobo_subtitle      = Subtitle
                    book.can_put_on_shelves = allow_shelves
 #                    debug_print('KoboTouch:update_booklist - title=', title, 'book.device_collections', book.device_collections)

+                    if not book.is_sideloaded and book.has_kobo_series and SeriesID is not None:
+                        if show_debug:
+                            debug_print('KoboTouch:update_booklist - Have purchased kepub with series, saving SeriesID=', SeriesID)
+                        self.kobo_series_dict[series] = SeriesID
+
                    if bl.add_book(book, replace_metadata=False):
                        changed = True
                    if show_debug:
@ -1863,6 +1879,10 @@ class KOBOTOUCH(KOBO):
                columns += ", Series, SeriesNumber, ___UserID, ExternalId, Subtitle"
            else:
                columns += ', null as Series, null as SeriesNumber, ___UserID, null as ExternalId, null as Subtitle'
+            if self.supports_series_list:
+                columns += ", SeriesID, SeriesNumberFloat"
+            else:
+                columns += ', null as SeriesID, null as SeriesNumberFloat'

            where_clause = ''
            if self.supports_kobo_archive() or self.supports_overdrive():
@ -1957,7 +1977,8 @@ class KOBOTOUCH(KOBO):
                prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
                changed = update_booklist(prefix, path, row['ContentID'], row['ContentType'], row['MimeType'], row['ImageId'],
                                          row['Title'], row['Attribution'], row['DateCreated'], row['Description'], row['Publisher'],
-                                          row['Series'], row['SeriesNumber'], row['ISBN'], row['Language'], row['Subtitle'],
+                                          row['Series'], row['SeriesNumber'], row['SeriesID'], row['SeriesNumberFloat'],
+                                          row['ISBN'], row['Language'], row['Subtitle'],
                                          row['ReadStatus'], row['___ExpirationStatus'],
                                          int(row['FavouritesIndex']), row['Accessibility'], row['IsDownloaded'],
                                          row['___UserID'], bookshelves
@ -1972,6 +1993,7 @@ class KOBOTOUCH(KOBO):
                self.dump_bookshelves(connection)
            else:
                debug_print("KoboTouch:books - automatically managing metadata")
+            debug_print("KoboTouch:books - self.kobo_series_dict=", self.kobo_series_dict)
        # Remove books that are no longer in the filesystem. Cache contains
        # indices into the booklist if book not in filesystem, None otherwise
        # Do the operation in reverse order so indices remain valid
@ -3127,7 +3149,6 @@ class KOBOTOUCH(KOBO):
                kobo_series_number = None
            series_number_changed = not (kobo_series_number == newmi.series_index)

-        if series_changed or series_number_changed:
        if newmi.series is not None:
            new_series = newmi.series
            try:
@ -3138,10 +3159,19 @@ class KOBOTOUCH(KOBO):
            new_series = None
            new_series_number = None

+        if series_changed or series_number_changed:
            update_values.append(new_series)
            set_clause += ', Series = ? '
            update_values.append(new_series_number)
            set_clause += ', SeriesNumber = ? '
+        if self.supports_series_list and book.is_sideloaded:
+            series_id = self.kobo_series_dict.get(new_series, new_series)
+            if not book.kobo_series_id == series_id or series_changed or series_number_changed:
+                update_values.append(series_id)
+                set_clause += ', SeriesID = ? '
+                update_values.append(new_series_number)
+                set_clause += ', SeriesNumberFloat = ? '
+                debug_print("KoboTouch:set_core_metadata Setting SeriesID - new_series='%s', series_id='%s'" % (new_series, series_id))

        if not series_only:
            if not (newmi.title == kobo_metadata.title):
@ -3537,6 +3567,10 @@ class KOBOTOUCH(KOBO):
    def supports_series(self):
        return self.dbversion >= self.min_dbversion_series

+    @property
+    def supports_series_list(self):
+        return self.dbversion >= self.min_dbversion_seriesid and self.fwversion >= self.min_fwversion_serieslist
+
    def supports_kobo_archive(self):
        return self.dbversion >= self.min_dbversion_archive

--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -36,7 +36,7 @@ from calibre.utils.filenames import ascii_filename as sanitize, shorten_componen
 from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
        unpublish_zeroconf, get_all_ips)
 from calibre.utils.socket_inheritance import set_socket_inherit
-from polyglot.builtins import unicode_type, iteritems, itervalues
+from polyglot.builtins import as_bytes, unicode_type, iteritems, itervalues
 from polyglot import queue


@ -100,7 +100,7 @@ class ConnectionListener(Thread):
                        s = self.driver._json_encode(
                                        self.driver.opcodes['CALIBRE_BUSY'],
                                        {'otherDevice': d.get_gui_name()})
-                        self.driver._send_byte_string(device_socket, (b'%d' % len(s)) + s)
+                        self.driver._send_byte_string(device_socket, (b'%d' % len(s)) + as_bytes(s))
                        sock.close()
                    except queue.Empty:
                        pass
@ -636,7 +636,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            s = self._json_encode(self.opcodes[op], arg)
            if print_debug_info and extra_debug:
                self._debug('send string', s)
-            self._send_byte_string(self.device_socket, (b'%d' % len(s)) + s)
+            self._send_byte_string(self.device_socket, (b'%d' % len(s)) + as_bytes(s))
            if not wait_for_response:
                return None, None
            return self._receive_from_client(print_debug_info=print_debug_info)
@ -841,10 +841,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                    json_metadata = defaultdict(dict)
                    json_metadata[key]['book'] = self.json_codec.encode_book_metadata(book['book'])
                    json_metadata[key]['last_used'] = book['last_used']
-                    result = json.dumps(json_metadata, indent=2, default=to_json)
-                    fd.write("%0.7d\n"%(len(result)+1))
+                    result = as_bytes(json.dumps(json_metadata, indent=2, default=to_json))
+                    fd.write(("%0.7d\n"%(len(result)+1)).encode('ascii'))
                    fd.write(result)
-                    fd.write('\n')
+                    fd.write(b'\n')
                    count += 1
            self._debug('wrote', count, 'entries, purged', purged, 'entries')

--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -20,6 +20,31 @@ from calibre.utils.icu import sort_key
 from polyglot.builtins import string_or_bytes, iteritems, itervalues, cmp


+def none_cmp(xx, yy):
+    x = xx[1]
+    y = yy[1]
+    if x is None and y is None:
+        # No sort_key needed here, because defaults are ascii
+        return cmp(xx[2], yy[2])
+    if x is None:
+        return 1
+    if y is None:
+        return -1
+    if isinstance(x, string_or_bytes) and isinstance(y, string_or_bytes):
+        x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
+    try:
+        c = cmp(x, y)
+    except TypeError:
+        c = 0
+    if c != 0:
+        return c
+    # same as above -- no sort_key needed here
+    try:
+        return cmp(xx[2], yy[2])
+    except TypeError:
+        return 0
+
+
 class Book(Metadata):

    def __init__(self, prefix, lpath, size=None, other=None):
@ -280,30 +305,6 @@ class CollectionsBookList(BookList):
        # Sort collections
        result = {}

-        def none_cmp(xx, yy):
-            x = xx[1]
-            y = yy[1]
-            if x is None and y is None:
-                # No sort_key needed here, because defaults are ascii
-                return cmp(xx[2], yy[2])
-            if x is None:
-                return 1
-            if y is None:
-                return -1
-            if isinstance(x, string_or_bytes) and isinstance(y, string_or_bytes):
-                x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
-            try:
-                c = cmp(x, y)
-            except TypeError:
-                c = 0
-            if c != 0:
-                return c
-            # same as above -- no sort_key needed here
-            try:
-                return cmp(xx[2], yy[2])
-            except TypeError:
-                return 0
-
        for category, lpaths in iteritems(collections):
            books = sorted(itervalues(lpaths), key=cmp_to_key(none_cmp))
            result[category] = [x[0] for x in books]
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -38,7 +38,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
-                   'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx']
+                   'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']


 def return_raster_image(path):
--- a/src/calibre/ebooks/conversion/plugins/djvu_input.py
+++ b/src/calibre/ebooks/conversion/plugins/djvu_input.py
@ -28,8 +28,12 @@ class DJVUInput(InputFormatPlugin):
        from calibre.ebooks.djvu.djvu import DJVUFile
        x = DJVUFile(stream)
        x.get_text(stdout)
+        raw_text = stdout.getvalue()
+        if not raw_text:
+            raise ValueError('The DJVU file contains no text, only images, probably page scans.'
+                    ' calibre only supports conversion of DJVU files with actual text in them.')

-        html = convert_basic(stdout.getvalue().replace(b"\n", b' ').replace(
+        html = convert_basic(raw_text.replace(b"\n", b' ').replace(
            b'\037', b'\n\n'))
        # Run the HTMLized text through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -40,6 +40,29 @@ def archive_type(stream):
    return ans


+class KPFExtract(FileTypePlugin):
+
+    name = 'KPF Extract'
+    author = 'Kovid Goyal'
+    description = _('Extract the source DOCX file from Amazon Kindle Create KPF files.'
+            ' Note this will not contain any edits made in the Kindle Create program itself.')
+    file_types = {'kpf'}
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+
+    def run(self, archive):
+        from calibre.utils.zipfile import ZipFile
+        with ZipFile(archive, 'r') as zf:
+            fnames = zf.namelist()
+            candidates = [x for x in fnames if x.lower().endswith('.docx')]
+            if not candidates:
+                return archive
+            of = self.temporary_file('_kpf_extract.docx')
+            with closing(of):
+                of.write(zf.read(candidates[0]))
+        return of.name
+
+
 class ArchiveExtract(FileTypePlugin):
    name = 'Archive Extract'
    author = 'Kovid Goyal'
--- a/src/calibre/ebooks/metadata/search_internet.py
+++ b/src/calibre/ebooks/metadata/search_internet.py
@ -5,7 +5,7 @@
 from __future__ import absolute_import, division, print_function, unicode_literals

 from polyglot.builtins import iteritems
-from polyglot.urllib import quote_plus
+from polyglot.urllib import quote, quote_plus

 AUTHOR_SEARCHES = {
    'goodreads':
@ -48,17 +48,21 @@ all_book_searches = BOOK_SEARCHES.__iter__
 all_author_searches = AUTHOR_SEARCHES.__iter__


-def qquote(val):
+def qquote(val, use_plus=True):
    if not isinstance(val, bytes):
        val = val.encode('utf-8')
-    ans = quote_plus(val)
+    ans = quote_plus(val) if use_plus else quote(val)
    if isinstance(ans, bytes):
        ans = ans.decode('utf-8')
    return ans


+def specialised_quote(template, val):
+    return qquote(val, 'goodreads.com' not in template)
+
+
 def url_for(template, data):
-    return template.format(**{k: qquote(v) for k, v in iteritems(data)})
+    return template.format(**{k: specialised_quote(template, v) for k, v in iteritems(data)})


 def url_for_author_search(key, **kw):
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -343,8 +343,8 @@ class Source(Plugin):

        if authors:
            # Leave ' in there for Irish names
-            remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
-            replace_pat = re.compile(r'[-+.:;,]')
+            remove_pat = re.compile(r'[!@#$%^&*()（）「」{}`~"\s\[\]/]')
+            replace_pat = re.compile(r'[-+.:;,，。；：]')
            if only_first_author:
                authors = authors[:1]
            for au in authors:
@ -384,7 +384,7 @@ class Source(Plugin):
                # Remove hyphens only if they have whitespace before them
                (r'(\s-)', ' '),
                # Replace other special chars with a space
-                (r'''[:,;!@$%^&*(){}.`~"\s\[\]/]''', ' '),
+                (r'''[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”''', ' '),
            ]]

            for pat, repl in title_patterns:
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@ -8,13 +8,11 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifan
 __docformat__ = 'restructuredtext en'

 import time
-from functools import partial
 try:
    from queue import Empty, Queue
 except ImportError:
    from Queue import Empty, Queue

-
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Option, Source
 from calibre.ebooks.metadata.book.base import Metadata
@ -30,7 +28,7 @@ NAMESPACES = {

 def get_details(browser, url, timeout):  # {{{
    try:
-        if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '':
+        if Douban.DOUBAN_API_KEY:
            url = url + "?apikey=" + Douban.DOUBAN_API_KEY
        raw = browser.open_novisit(url, timeout=timeout).read()
    except Exception as e:
@ -42,97 +40,78 @@ def get_details(browser, url, timeout):  # {{{
        raw = browser.open_novisit(url, timeout=timeout).read()

    return raw
+
+
 # }}}


 class Douban(Source):

    name = 'Douban Books'
-    author = 'Li Fanxi'
-    version = (2, 1, 2)
+    author = 'Li Fanxi, xcffl, jnozsc'
+    version = (3, 1, 0)
    minimum_calibre_version = (2, 80, 0)

-    description = _('Downloads metadata and covers from Douban.com. '
-            'Useful only for Chinese language books.')
+    description = _(
+        'Downloads metadata and covers from Douban.com. '
+        'Useful only for Chinese language books.'
+    )

    capabilities = frozenset(['identify', 'cover'])
-    touched_fields = frozenset(['title', 'authors', 'tags',
-        'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
-        'identifier:douban'])  # language currently disabled
+    touched_fields = frozenset([
+        'title', 'authors', 'tags', 'pubdate', 'comments', 'publisher',
+        'identifier:isbn', 'rating', 'identifier:douban'
+    ])  # language currently disabled
    supports_gzip_transfer_encoding = True
    cached_cover_url_is_reliable = True

-    DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
+    DOUBAN_API_KEY = '0df993c66c0c636e29ecbb5344252a4a'
+    DOUBAN_API_URL = 'https://api.douban.com/v2/book/search'
    DOUBAN_BOOK_URL = 'https://book.douban.com/subject/%s/'

    options = (
-        Option('include_subtitle_in_title', 'bool', True, _('Include subtitle in book title:'),
-               _('Whether to append subtitle in the book title.')),
+        Option(
+            'include_subtitle_in_title', 'bool', True,
+            _('Include subtitle in book title:'),
+            _('Whether to append subtitle in the book title.')
+        ),
    )

    def to_metadata(self, browser, log, entry_, timeout):  # {{{
-        from lxml import etree
-        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.utils.date import parse_date, utcnow
-        from calibre.utils.cleantext import clean_ascii_chars

-        XPath = partial(etree.XPath, namespaces=NAMESPACES)
-        entry          = XPath('//atom:entry')
-        entry_id       = XPath('descendant::atom:id')
-        title          = XPath('descendant::atom:title')
-        description    = XPath('descendant::atom:summary')
-        subtitle       = XPath("descendant::db:attribute[@name='subtitle']")
-        publisher      = XPath("descendant::db:attribute[@name='publisher']")
-        isbn           = XPath("descendant::db:attribute[@name='isbn13']")
-        date           = XPath("descendant::db:attribute[@name='pubdate']")
-        creator        = XPath("descendant::db:attribute[@name='author']")
-        booktag        = XPath("descendant::db:tag/attribute::name")
-        rating         = XPath("descendant::gd:rating/attribute::average")
-        cover_url      = XPath("descendant::atom:link[@rel='image']/attribute::href")
+        douban_id = entry_.get('id')
+        title = entry_.get('title')
+        description = entry_.get('summary')
+        # subtitle = entry_.get('subtitle')  # TODO: std metada doesn't have this field
+        publisher = entry_.get('publisher')
+        isbn = entry_.get('isbn13')  # ISBN11 is obsolute, use ISBN13
+        pubdate = entry_.get('pubdate')
+        authors = entry_.get('author')
+        book_tags = entry_.get('tags')
+        rating = entry_.get('rating')
+        cover_url = entry_.get('images', {}).get('large')
+        series = entry_.get('series')

-        def get_text(extra, x):
-            try:
-                ans = x(extra)
-                if ans:
-                    ans = ans[0].text
-                    if ans and ans.strip():
-                        return ans.strip()
-            except:
-                log.exception('Programming error:')
-            return None
-
-        id_url = entry_id(entry_)[0].text.replace('http://', 'https://')
-        douban_id = id_url.split('/')[-1]
-        title_ = ': '.join([x.text for x in title(entry_)]).strip()
-        subtitle = ': '.join([x.text for x in subtitle(entry_)]).strip()
-        if self.prefs['include_subtitle_in_title'] and len(subtitle) > 0:
-            title_ = title_ + ' - ' + subtitle
-        authors = [x.text.strip() for x in creator(entry_) if x.text]
        if not authors:
            authors = [_('Unknown')]
-        if not id_url or not title:
+        if not douban_id or not title:
            # Silently discard this entry
            return None

-        mi = Metadata(title_, authors)
+        mi = Metadata(title, authors)
        mi.identifiers = {'douban': douban_id}
-        try:
-            log.info(id_url)
-            raw = get_details(browser, id_url, timeout)
-            feed = etree.fromstring(
-                xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0],
-                parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
-            )
-            extra = entry(feed)[0]
-        except:
-            log.exception('Failed to get additional details for', mi.title)
-            return mi
-        mi.comments = get_text(extra, description)
-        mi.publisher = get_text(extra, publisher)
+        mi.publisher = publisher
+        mi.comments = description
+        # mi.subtitle = subtitle

        # ISBN
        isbns = []
-        for x in [t.text for t in isbn(extra)]:
+        if isinstance(isbn, (type(''), bytes)):
+            if check_isbn(isbn):
+                isbns.append(isbn)
+        else:
+            for x in isbn:
                if check_isbn(x):
                    isbns.append(x)
        if isbns:
@ -140,22 +119,9 @@ class Douban(Source):
        mi.all_isbns = isbns

        # Tags
-        try:
-            btags = [x for x in booktag(extra) if x]
-            tags = []
-            for t in btags:
-                atags = [y.strip() for y in t.split('/')]
-                for tag in atags:
-                    if tag not in tags:
-                        tags.append(tag)
-        except:
-            log.exception('Failed to parse tags:')
-            tags = []
-        if tags:
-            mi.tags = [x.replace(',', ';') for x in tags]
+        mi.tags = [tag['name'] for tag in book_tags]

        # pubdate
-        pubdate = get_text(extra, date)
        if pubdate:
            try:
                default = utcnow().replace(day=15)
@ -164,28 +130,34 @@ class Douban(Source):
                log.error('Failed to parse pubdate %r' % pubdate)

        # Ratings
-        if rating(extra):
+        if rating:
            try:
-                mi.rating = float(rating(extra)[0]) / 2.0
+                mi.rating = float(rating['average']) / 2.0
            except:
                log.exception('Failed to parse rating')
                mi.rating = 0

        # Cover
        mi.has_douban_cover = None
-        u = cover_url(extra)
+        u = cover_url
        if u:
-            u = u[0].replace('/spic/', '/lpic/')
            # If URL contains "book-default", the book doesn't have a cover
            if u.find('book-default') == -1:
                mi.has_douban_cover = u
+
+        # Series
+        if series:
+            mi.series = series['title']
+
        return mi
+
    # }}}

    def get_book_url(self, identifiers):  # {{{
        db = identifiers.get('douban', None)
        if db is not None:
            return ('douban', db, self.DOUBAN_BOOK_URL % db)
+
    # }}}

    def create_query(self, log, title=None, authors=None, identifiers={}):  # {{{
@ -193,9 +165,9 @@ class Douban(Source):
            from urllib.parse import urlencode
        except ImportError:
            from urllib import urlencode
-        SEARCH_URL = 'https://api.douban.com/book/subjects?'
-        ISBN_URL = 'https://api.douban.com/book/subject/isbn/'
-        SUBJECT_URL = 'https://api.douban.com/book/subject/'
+        SEARCH_URL = 'https://api.douban.com/v2/book/search?count=10&'
+        ISBN_URL = 'https://api.douban.com/v2/book/isbn/'
+        SUBJECT_URL = 'https://api.douban.com/v2/book/'

        q = ''
        t = None
@ -208,16 +180,18 @@ class Douban(Source):
            q = subject
            t = 'subject'
        elif title or authors:
+
            def build_term(prefix, parts):
                return ' '.join(x for x in parts)
+
            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                q += build_term('title', title_tokens)
-            author_tokens = list(self.get_author_tokens(authors,
-                    only_first_author=True))
+            author_tokens = list(
+                self.get_author_tokens(authors, only_first_author=True)
+            )
            if author_tokens:
-                q += ((' ' if q != '' else '') +
-                    build_term('author', author_tokens))
+                q += ((' ' if q != '' else '') + build_term('author', author_tokens))
            t = 'search'
        q = q.strip()
        if isinstance(q, type(u'')):
@ -239,16 +213,32 @@ class Douban(Source):
            else:
                url = url + "&apikey=" + self.DOUBAN_API_KEY
        return url
+
    # }}}

-    def download_cover(self, log, result_queue, abort,  # {{{
-            title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
+    def download_cover(
+        self,
+        log,
+        result_queue,
+        abort,  # {{{
+        title=None,
+        authors=None,
+        identifiers={},
+        timeout=30,
+        get_best_cover=False
+    ):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
-            self.identify(log, rq, abort, title=title, authors=authors,
-                    identifiers=identifiers)
+            self.identify(
+                log,
+                rq,
+                abort,
+                title=title,
+                authors=authors,
+                identifiers=identifiers
+            )
            if abort.is_set():
                return
            results = []
@ -257,8 +247,11 @@ class Douban(Source):
                    results.append(rq.get_nowait())
                except Empty:
                    break
-            results.sort(key=self.identify_results_keygen(
-                title=title, authors=authors, identifiers=identifiers))
+            results.sort(
+                key=self.identify_results_keygen(
+                    title=title, authors=authors, identifiers=identifiers
+                )
+            )
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
@ -291,11 +284,18 @@ class Douban(Source):
            url = self.cached_identifier_to_cover_url(db)

        return url
+
    # }}}

-    def get_all_details(self, br, log, entries, abort,  # {{{
-            result_queue, timeout):
-        from lxml import etree
+    def get_all_details(
+        self,
+        br,
+        log,
+        entries,
+        abort,  # {{{
+        result_queue,
+        timeout
+    ):
        for relevance, i in enumerate(entries):
            try:
                ans = self.to_metadata(br, log, i, timeout)
@ -305,29 +305,31 @@ class Douban(Source):
                    for isbn in getattr(ans, 'all_isbns', []):
                        self.cache_isbn_to_identifier(isbn, db)
                    if ans.has_douban_cover:
-                        self.cache_identifier_to_cover_url(db,
-                                ans.has_douban_cover)
+                        self.cache_identifier_to_cover_url(db, ans.has_douban_cover)
                    self.clean_downloaded_metadata(ans)
                    result_queue.put(ans)
            except:
-                log.exception(
-                    'Failed to get metadata for identify entry:',
-                    etree.tostring(i))
+                log.exception('Failed to get metadata for identify entry:', i)
            if abort.is_set():
                break
+
    # }}}

-    def identify(self, log, result_queue, abort, title=None, authors=None,  # {{{
-            identifiers={}, timeout=30):
-        from lxml import etree
-        from calibre.ebooks.chardet import xml_to_unicode
-        from calibre.utils.cleantext import clean_ascii_chars
+    def identify(
+        self,
+        log,
+        result_queue,
+        abort,
+        title=None,
+        authors=None,  # {{{
+        identifiers={},
+        timeout=30
+    ):
+        import json

-        XPath = partial(etree.XPath, namespaces=NAMESPACES)
-        entry          = XPath('//atom:entry')
-
-        query = self.create_query(log, title=title, authors=authors,
-                identifiers=identifiers)
+        query = self.create_query(
+            log, title=title, authors=authors, identifiers=identifiers
+        )
        if not query:
            log.error('Insufficient metadata to construct query')
            return
@ -338,42 +340,53 @@ class Douban(Source):
            log.exception('Failed to make identify query: %r' % query)
            return as_unicode(e)
        try:
-            parser = etree.XMLParser(recover=True, no_network=True)
-            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
-                strip_encoding_pats=True)[0], parser=parser)
-            entries = entry(feed)
+            j = json.loads(raw)
        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
+        if 'books' in j:
+            entries = j['books']
+        else:
+            entries = []
+            entries.append(j)
        if not entries and identifiers and title and authors and \
                not abort.is_set():
-            return self.identify(log, result_queue, abort, title=title,
-                    authors=authors, timeout=timeout)
-
+            return self.identify(
+                log,
+                result_queue,
+                abort,
+                title=title,
+                authors=authors,
+                timeout=timeout
+            )
        # There is no point running these queries in threads as douban
        # throttles requests returning 403 Forbidden errors
        self.get_all_details(br, log, entries, abort, result_queue, timeout)

        return None
+
    # }}}


 if __name__ == '__main__':  # tests {{{
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
-    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
-            title_test, authors_test)
-    test_identify_plugin(Douban.name,
-        [
-            (
-                {'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
-                    'authors':['刘慈欣']},
-                [title_test('三体', exact=True),
-                    authors_test(['刘慈欣'])]
-            ),
-
-            (
-                {'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
-                [title_test('Linux内核修炼之道', exact=False)]
-            ),
-    ])
+    from calibre.ebooks.metadata.sources.test import (
+        test_identify_plugin, title_test, authors_test
+    )
+    test_identify_plugin(
+        Douban.name, [
+            ({
+                'identifiers': {
+                    'isbn': '9787536692930'
+                },
+                'title': '三体',
+                'authors': ['刘慈欣']
+            }, [title_test('三体', exact=True),
+                authors_test(['刘慈欣'])]),
+            ({
+                'title': 'Linux内核修炼之道',
+                'authors': ['任桥伟']
+            }, [title_test('Linux内核修炼之道', exact=False)]),
+        ]
+    )
 # }}}
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@ -6,14 +6,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import struct, datetime, os, numbers
+import struct, datetime, os, numbers, binascii

 from calibre.utils.date import utc_tz
 from calibre.ebooks.mobi.reader.headers import NULL_INDEX
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.mobi.debug import format_bytes
 from calibre.ebooks.mobi.utils import get_trailing_data
-from polyglot.builtins import as_bytes, iteritems, range, unicode_type
+from polyglot.builtins import iteritems, range, unicode_type

 # PalmDB {{{

@ -210,7 +210,7 @@ class EXTHRecord(object):
            else:
                self.data, = struct.unpack(b'>L', self.data)
        elif self.type in {209, 300}:
-            self.data = as_bytes(self.data.encode('hex'))
+            self.data = binascii.hexlify(self.data)

    def __str__(self):
        return '%s (%d): %r'%(self.name, self.type, self.data)
--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@ -10,7 +10,7 @@ import shutil, os, re, struct, textwrap, io

 from lxml import html, etree

-from calibre import (xml_entity_to_unicode, entity_to_unicode)
+from calibre import xml_entity_to_unicode, entity_to_unicode, guess_type
 from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from calibre.ebooks import DRMError, unit_convert
 from calibre.ebooks.chardet import strip_encoding_declarations
@ -21,7 +21,7 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.mobi.reader.headers import BookHeader
-from calibre.utils.img import save_cover_data_to
+from calibre.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF
 from calibre.utils.imghdr import what
 from polyglot.builtins import iteritems, unicode_type, range, map

@ -178,7 +178,7 @@ class MobiReader(object):
        self.processed_html = strip_encoding_declarations(self.processed_html)
        self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
            self.processed_html)
-        self.extract_images(processed_records, output_dir)
+        image_name_map = self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()
        self.cleanup_html()

@ -272,7 +272,7 @@ class MobiReader(object):
            head.insert(0, title)
            head.text = '\n\t'

-        self.upshift_markup(root)
+        self.upshift_markup(root, image_name_map)
        guides = root.xpath('//guide')
        guide = guides[0] if guides else None
        metadata_elems = root.xpath('//metadata')
@ -389,8 +389,9 @@ class MobiReader(object):
            raw += unit
        return raw

-    def upshift_markup(self, root):
+    def upshift_markup(self, root, image_name_map=None):
        self.log.debug('Converting style information to CSS...')
+        image_name_map = image_name_map or {}
        size_map = {
            'xx-small': '0.5',
            'x-small': '1',
@ -510,10 +511,11 @@ class MobiReader(object):
                    recindex = attrib.pop(attr, None) or recindex
                if recindex is not None:
                    try:
-                        recindex = '%05d'%int(recindex)
-                    except:
+                        recindex = int(recindex)
+                    except Exception:
                        pass
-                    attrib['src'] = 'images/%s.jpg' % recindex
+                    else:
+                        attrib['src'] = 'images/' + image_name_map.get(recindex, '%05d.jpg' % recindex)
                for attr in ('width', 'height'):
                    if attr in attrib:
                        val = attrib[attr]
@ -674,7 +676,7 @@ class MobiReader(object):
        for i in getattr(self, 'image_names', []):
            path = os.path.join(bp, 'images', i)
            added.add(path)
-            manifest.append((path, 'image/jpeg'))
+            manifest.append((path, guess_type(path)[0] or 'image/jpeg'))
        if cover_copied is not None:
            manifest.append((cover_copied, 'image/jpeg'))

@ -870,6 +872,7 @@ class MobiReader(object):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
+        image_name_map = {}
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
@ -882,18 +885,36 @@ class MobiReader(object):
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
-                # This record is a known non image type, not need to try to
+                # This record is a known non image type, no need to try to
                # load the image
                continue

-            path = os.path.join(output_dir, '%05d.jpg' % image_index)
            try:
-                if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
+                imgfmt = what(None, data)
+            except Exception:
                continue
+            if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
+                continue
+            if imgfmt == 'jpeg':
+                imgfmt = 'jpg'
+            if imgfmt == 'gif':
+                try:
+                    data = gif_data_to_png_data(data)
+                    imgfmt = 'png'
+                except AnimatedGIF:
+                    pass
+            path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
+            image_name_map[image_index] = os.path.basename(path)
+            if imgfmt == 'png':
+                with open(path, 'wb') as f:
+                    f.write(data)
+            else:
+                try:
                    save_cover_data_to(data, path, minify_to=(10000, 10000))
                except Exception:
                    continue
            self.image_names.append(os.path.basename(path))
+        return image_name_map


 def test_mbp_regex():
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -10,7 +10,7 @@ import struct, string, zlib, os
 from collections import OrderedDict
 from io import BytesIO

-from calibre.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image
+from calibre.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
 from calibre.utils.imghdr import what
 from calibre.ebooks import normalize
 from polyglot.builtins import unicode_type, range, as_bytes, map
@ -417,13 +417,8 @@ def to_base(num, base=32, min_num_digits=None):
 def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)
-
    if fmt == 'png':
-        from PIL import Image
-        im = Image.open(BytesIO(data))
-        buf = BytesIO()
-        im.save(buf, 'gif')
-        data = buf.getvalue()
+        data = png_data_to_gif_data(data)
    return data

 # Font records {{{
--- a/src/calibre/ebooks/mobi/writer8/skeleton.py
+++ b/src/calibre/ebooks/mobi/writer8/skeleton.py
@ -16,7 +16,7 @@ from lxml import etree
 from calibre import my_unichr
 from calibre.ebooks.oeb.base import XHTML_NS, extract
 from calibre.ebooks.mobi.utils import to_base, PolyglotDict
-from polyglot.builtins import iteritems, unicode_type
+from polyglot.builtins import iteritems, unicode_type, as_bytes

 CHUNK_SIZE = 8192

@ -397,7 +397,7 @@ class Chunker(object):
            pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
            return ':off:'.join((pos, fid)).encode('utf-8')

-        placeholder_map = {k:to_placeholder(v) for k, v in
+        placeholder_map = {as_bytes(k):to_placeholder(v) for k, v in
                iteritems(self.placeholder_map)}

        # Now update the links
--- a/src/calibre/ebooks/oeb/polish/check/css.py
+++ b/src/calibre/ebooks/oeb/polish/check/css.py
@ -222,7 +222,12 @@ class Pool(object):
            self.working = False

    def shutdown(self):
-        tuple(map(sip.delete, self.workers))
+
+        def safe_delete(x):
+            if not sip.isdeleted(x):
+                sip.delete(x)
+
+        tuple(map(safe_delete, self.workers))
        self.workers = []


--- a/src/calibre/ebooks/oeb/polish/check/main.py
+++ b/src/calibre/ebooks/oeb/polish/check/main.py
@ -48,16 +48,18 @@ def run_checks(container):
    xml_items, html_items, raster_images, stylesheets = [], [], [], []
    for name, mt in iteritems(container.mime_map):
        items = None
+        decode = False
        if mt in XML_TYPES:
            items = xml_items
        elif mt in OEB_DOCS:
            items = html_items
        elif mt in OEB_STYLES:
+            decode = True
            items = stylesheets
        elif is_raster_image(mt):
            items = raster_images
        if items is not None:
-            items.append((name, mt, container.open(name, 'rb').read()))
+            items.append((name, mt, container.raw_data(name, decode=decode)))
    errors.extend(run_checkers(check_html_size, html_items))
    errors.extend(run_checkers(check_xml_parsing, xml_items))
    errors.extend(run_checkers(check_xml_parsing, html_items))
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -351,7 +351,7 @@ class CSSFlattener(object):
                        value = 0.0
                    cssdict[property] = "%0.5fem" % (value / fsize)

-    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
+    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id, recurse=True):
        if not isinstance(node.tag, string_or_bytes) \
           or namespace(node.tag) != XHTML_NS:
            return
@ -569,6 +569,7 @@ class CSSFlattener(object):
            del node.attrib['class']
        if 'style' in node.attrib:
            del node.attrib['style']
+        if recurse:
            for child in node:
                self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)

@ -660,9 +661,9 @@ class CSSFlattener(object):
            stylizer = self.stylizers[item]
            if self.specializer is not None:
                self.specializer(item, stylizer)
-            body = html.find(XHTML('body'))
            fsize = self.context.dest.fbase
-            self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id)
+            self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False)
+            self.flatten_node(html.find(XHTML('body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
        items = sorted(((key, val) for (val, key) in iteritems(styles)), key=lambda x:numeric_sort_key(x[0]))
        # :hover must come after link and :active must come after :hover
        psels = sorted(pseudo_styles, key=lambda x :
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@ -18,7 +18,7 @@ from operator import attrgetter, itemgetter

 from html5_parser import parse
 from PyQt5.Qt import (
-    QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal
+    QApplication, QMarginsF, QObject, QPageLayout, Qt, QTimer, QUrl, pyqtSignal
 )
 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
 from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile
@ -39,6 +39,7 @@ from calibre.srv.render_book import check_for_maths
 from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
 from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
 from calibre.utils.logging import default_log
+from calibre.utils.monotonic import monotonic
 from calibre.utils.podofo import (
    dedup_type3_fonts, get_podofo, remove_unused_fonts, set_metadata_implementation
 )
@ -49,6 +50,7 @@ from polyglot.builtins import (
 from polyglot.urllib import urlparse

 OK, KILL_SIGNAL = range(0, 2)
+HANG_TIME = 60  # seconds
 # }}}


@ -172,10 +174,26 @@ class Renderer(QWebEnginePage):

        self.titleChanged.connect(self.title_changed)
        self.loadStarted.connect(self.load_started)
+        self.loadProgress.connect(self.load_progress)
        self.loadFinished.connect(self.load_finished)
+        self.load_hang_check_timer = t = QTimer(self)
+        self.load_started_at = 0
+        t.setTimerType(Qt.VeryCoarseTimer)
+        t.setInterval(HANG_TIME * 1000)
+        t.setSingleShot(True)
+        t.timeout.connect(self.on_load_hang)

    def load_started(self):
+        self.load_started_at = monotonic()
        self.load_complete = False
+        self.load_hang_check_timer.start()
+
+    def load_progress(self, amt):
+        self.load_hang_check_timer.start()
+
+    def on_load_hang(self):
+        self.log(self.log_prefix, 'Loading not complete after {} seconds, aborting.'.format(int(monotonic() - self.load_started_at)))
+        self.load_finished(False)

    def title_changed(self, title):
        if self.wait_for_title and title == self.wait_for_title and self.load_complete:
@ -187,6 +205,7 @@ class Renderer(QWebEnginePage):

    def load_finished(self, ok):
        self.load_complete = True
+        self.load_hang_check_timer.stop()
        if not ok:
            self.working = False
            self.work_done.emit(self, 'Load of {} failed'.format(self.url().toString()))
@ -900,7 +919,7 @@ def fonts_are_identical(fonts):
    return True


-def merge_font(fonts):
+def merge_font(fonts, log):
    # choose the largest font as the base font
    fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
    base_font = fonts[0]
@ -913,7 +932,7 @@ def merge_font(fonts):
    cmaps = list(filter(None, (f['ToUnicode'] for f in t0_fonts)))
    if cmaps:
        t0_font['ToUnicode'] = as_bytes(merge_cmaps(cmaps))
-    base_font['sfnt'], width_for_glyph_id, height_for_glyph_id = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
+    base_font['sfnt'], width_for_glyph_id, height_for_glyph_id = merge_truetype_fonts_for_pdf(tuple(f['sfnt'] for f in descendant_fonts), log)
    widths = []
    arrays = tuple(filter(None, (f['W'] for f in descendant_fonts)))
    if arrays:
@ -928,7 +947,7 @@ def merge_font(fonts):
    return t0_font, base_font, references_to_drop


-def merge_fonts(pdf_doc):
+def merge_fonts(pdf_doc, log):
    all_fonts = pdf_doc.list_fonts(True)
    base_font_map = {}

@ -957,7 +976,7 @@ def merge_fonts(pdf_doc):
    items = []
    for name, fonts in iteritems(base_font_map):
        if mergeable(fonts):
-            t0_font, base_font, references_to_drop = merge_font(fonts)
+            t0_font, base_font, references_to_drop = merge_font(fonts, log)
            for ref in references_to_drop:
                replacements[ref] = t0_font['Reference']
            data = base_font['sfnt']()[0]
@ -1227,7 +1246,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
        page_number_display_map, page_layout, page_margins_map,
        pdf_metadata, report_progress, toc if has_toc else None)

-    merge_fonts(pdf_doc)
+    merge_fonts(pdf_doc, log)
    num_removed = dedup_type3_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'duplicated Type3 glyphs')
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals, absolute_import, print_function, division
 import os, sys
-from codecs import EncodedFile

 from calibre.ebooks.rtf2xml import copy, check_encoding
 from calibre.ptempfile import better_mktemp
@ -274,15 +273,10 @@ class ConvertToTags:
        if self.__convert_utf or self.__bad_encoding:
            copy_obj = copy.Copy(bug_handler=self.__bug_handler)
            copy_obj.rename(self.__write_to, self.__file)
-            file_encoding = "utf-8"
-            if self.__bad_encoding:
-                file_encoding = "us-ascii"
            with open_for_read(self.__file) as read_obj:
                with open_for_write(self.__write_to) as write_obj:
-                    write_objenc = EncodedFile(write_obj, self.__encoding,
-                                    file_encoding, 'replace')
                    for line in read_obj:
-                        write_objenc.write(line)
+                        write_obj.write(line)
        copy_obj = copy.Copy(bug_handler=self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -978,13 +978,16 @@ class Application(QApplication):
        if not geom:
            return
        restored = widget.restoreGeometry(geom)
+        self.ensure_window_on_screen(widget)
+        return restored
+
+    def ensure_window_on_screen(self, widget):
        screen_rect = self.desktop().availableGeometry(widget)
        if not widget.geometry().intersects(screen_rect):
            w = min(widget.width(), screen_rect.width() - 10)
            h = min(widget.height(), screen_rect.height() - 10)
            widget.resize(w, h)
            widget.move((screen_rect.width() - w) // 2, (screen_rect.height() - h) // 2)
-        return restored

    def setup_ui_font(self):
        f = QFont(QApplication.font())
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -232,21 +232,25 @@ class AddAction(InterfaceAction):
                        return

        for id_ in ids:
-            from calibre.ebooks.oeb.polish.create import create_book
-            pt = PersistentTemporaryFile(suffix='.' + format_)
-            pt.close()
-            try:
-                mi = db.new_api.get_metadata(id_, get_cover=False,
-                                    get_user_categories=False, cover_as_data=False)
-                create_book(mi, pt.name, fmt=format_)
-                db.add_format_with_hooks(id_, format_, pt.name, index_is_id=True, notify=True)
-            finally:
-                os.remove(pt.name)
+            self.add_empty_format_to_book(id_, format_)

        current_idx = self.gui.library_view.currentIndex()
        if current_idx.isValid():
            view.model().current_changed(current_idx, current_idx)

+    def add_empty_format_to_book(self, book_id, fmt):
+        from calibre.ebooks.oeb.polish.create import create_book
+        db = self.gui.current_db
+        pt = PersistentTemporaryFile(suffix='.' + fmt.lower())
+        pt.close()
+        try:
+            mi = db.new_api.get_metadata(book_id, get_cover=False,
+                                get_user_categories=False, cover_as_data=False)
+            create_book(mi, pt.name, fmt=fmt.lower())
+            db.add_format_with_hooks(book_id, fmt, pt.name, index_is_id=True, notify=True)
+        finally:
+            os.remove(pt.name)
+
    def add_archive(self, single):
        paths = choose_files(
            self.gui, 'recursive-archive-add', _('Choose archive file'),
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -312,7 +312,7 @@ class EditMetadataAction(InterfaceAction):
                intro_msg=_('The downloaded metadata is on the left and the original metadata'
                            ' is on the right. If a downloaded value is blank or unknown,'
                            ' the original value is used.'),
-                action_button=(_('&View Book'), I('view.png'), self.gui.iactions['View'].view_historical),
+                action_button=(_('&View book'), I('view.png'), self.gui.iactions['View'].view_historical),
                db=db
            )
            if d.exec_() == d.Accepted:
--- a/src/calibre/gui2/actions/store.py
+++ b/src/calibre/gui2/actions/store.py
@ -13,6 +13,7 @@ from PyQt5.Qt import QIcon, QSize
 from calibre.gui2 import error_dialog
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2.dialogs.confirm_delete import confirm
+from calibre.utils.localization import localize_user_manual_link


 class StoreAction(InterfaceAction):
@ -146,8 +147,9 @@ class StoreAction(InterfaceAction):
            'buying from. Be sure to double check that any books you get '
            'will work with your e-book reader, especially if the book you '
            'are buying has '
-            '<a href="https://drmfree.calibre-ebook.com/about#drm">DRM</a>.'
-            )), 'about_get_books_msg',
+            '<a href="{}">DRM</a>.'
+            ).format(localize_user_manual_link(
+                'https://manual.calibre-ebook.com/drm.html'))), 'about_get_books_msg',
            parent=self.gui, show_cancel_button=False,
            confirm_msg=_('Show this message again'),
            pixmap='dialog_information.png', title=_('About Get books'))
--- a/src/calibre/gui2/actions/tweak_epub.py
+++ b/src/calibre/gui2/actions/tweak_epub.py
@ -10,7 +10,7 @@ import time

 from PyQt5.Qt import QTimer, QDialog, QDialogButtonBox, QCheckBox, QVBoxLayout, QLabel, Qt

-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, question_dialog
 from calibre.gui2.actions import InterfaceAction


@ -105,13 +105,23 @@ class TweakEpubAction(InterfaceAction):
        from calibre.ebooks.oeb.polish.main import SUPPORTED
        db = self.gui.library_view.model().db
        fmts = db.formats(book_id, index_is_id=True) or ''
-        fmts = [x.upper().strip() for x in fmts.split(',')]
+        fmts = [x.upper().strip() for x in fmts.split(',') if x]
        tweakable_fmts = set(fmts).intersection(SUPPORTED)
        if not tweakable_fmts:
-            return error_dialog(self.gui, _('Cannot edit book'),
-                    _('The book must be in the %s formats to edit.'
-                        '\n\nFirst convert the book to one of these formats.') % (_(' or ').join(SUPPORTED)),
-                    show=True)
+            if not fmts:
+                if not question_dialog(self.gui, _('No editable formats'),
+                    _('Do you want to create an empty EPUB file to edit?')):
+                    return
+                tweakable_fmts = {'EPUB'}
+                self.gui.iactions['Add Books'].add_empty_format_to_book(book_id, 'EPUB')
+                current_idx = self.gui.library_view.currentIndex()
+                if current_idx.isValid():
+                    self.gui.library_view.model().current_changed(current_idx, current_idx)
+            else:
+                return error_dialog(self.gui, _('Cannot edit book'), _(
+                    'The book must be in the %s formats to edit.'
+                    '\n\nFirst convert the book to one of these formats.'
+                ) % (_(' or ').join(SUPPORTED)), show=True)
        from calibre.gui2.tweak_book import tprefs
        tprefs.refresh()  # In case they were changed in a Tweak Book process
        if len(tweakable_fmts) > 1:
--- a/src/calibre/gui2/actions/virtual_library.py
+++ b/src/calibre/gui2/actions/virtual_library.py
@ -4,7 +4,7 @@

 from __future__ import absolute_import, division, print_function, unicode_literals

-from PyQt5.Qt import QToolButton
+from PyQt5.Qt import QToolButton, QAction

 from calibre.gui2.actions import InterfaceAction

@ -24,6 +24,13 @@ class VirtualLibraryAction(InterfaceAction):
    def genesis(self):
        self.menu = m = self.qaction.menu()
        m.aboutToShow.connect(self.about_to_show_menu)
+        self.qs_action = QAction(self.gui)
+        self.gui.addAction(self.qs_action)
+        self.qs_action.triggered.connect(self.gui.choose_vl_triggerred)
+        self.gui.keyboard.register_shortcut(self.unique_name + ' - ' + 'quick-select-vl',
+            _('Quick select Virtual library'), default_keys=('Ctrl+T',),
+            action=self.qs_action, description=_('Quick select a Virtual library'),
+            group=self.action_spec[0])

    def about_to_show_menu(self):
        self.gui.build_virtual_library_menu(self.menu, add_tabs_action=False)
--- a/src/calibre/gui2/bars.py
+++ b/src/calibre/gui2/bars.py
@ -413,6 +413,7 @@ if isosx:
                ia = iactions[what]
                ac = ia.qaction
                if not ac.menu() and hasattr(ia, 'shortcut_action_for_context_menu'):
+                    ia.shortcut_action_for_context_menu.setIcon(ac.icon())
                    ac = ia.shortcut_action_for_context_menu
                m.addAction(CloneAction(ac, m))

@ -506,6 +507,7 @@ else:
                ia = iactions[what]
                ac = ia.qaction
                if not ac.menu() and hasattr(ia, 'shortcut_action_for_context_menu'):
+                    ia.shortcut_action_for_context_menu.setIcon(ac.icon())
                    ac = ia.shortcut_action_for_context_menu
                m.addAction(ac)

--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -212,7 +212,7 @@ def add_format_entries(menu, data, book_info):
        else:
            m.addSeparator()
            m.addAction(_('Add other application for %s files...') % fmt.upper(), partial(book_info.choose_open_with, book_id, fmt))
-            m.addAction(_('Edit Open With applications...'), partial(edit_programs, fmt, book_info))
+            m.addAction(_('Edit Open with applications...'), partial(edit_programs, fmt, book_info))
            menu.addMenu(m)
            menu.ow = m
        if fmt.upper() in SUPPORTED:
@ -279,7 +279,7 @@ def add_item_specific_entries(menu, data, book_info):
 def details_context_menu_event(view, ev, book_info):
    url = view.anchorAt(ev.pos())
    menu = view.createStandardContextMenu()
-    menu.addAction(QIcon(I('edit-copy.png')), _('Copy &all'), partial(copy_all, book_info))
+    menu.addAction(QIcon(I('edit-copy.png')), _('Copy &all'), partial(copy_all, view))
    search_internet_added = False
    if url and url.startswith('action:'):
        data = json_loads(from_hex_bytes(url.split(':', 1)[1]))
--- a/src/calibre/gui2/convert/page_setup.ui
+++ b/src/calibre/gui2/convert/page_setup.ui
@ -96,7 +96,10 @@
       <property name="title">
        <string>Margins</string>
       </property>
-       <layout class="QGridLayout" name="gridLayout">
+       <layout class="QFormLayout" name="formLayout">
+        <property name="fieldGrowthPolicy">
+         <enum>QFormLayout::FieldsStayAtSizeHint</enum>
+        </property>
        <item row="0" column="0">
         <widget class="QLabel" name="label_3">
          <property name="text">
--- a/src/calibre/gui2/dbus_export/menu.py
+++ b/src/calibre/gui2/dbus_export/menu.py
@ -167,7 +167,7 @@ class DBusMenu(QObject):
    def eventFilter(self, obj, ev):
        ac = getattr(obj, 'menuAction', lambda : None)()
        ac_id = self.action_to_id(ac)
-        if ac_id is not None:
+        if ac_id is not None and hasattr(ev, 'action'):
            etype = ev.type()
            if etype == QEvent.ActionChanged:
                ac_id = self.action_to_id(ev.action())
--- a/src/calibre/gui2/dialogs/choose_format.py
+++ b/src/calibre/gui2/dialogs/choose_format.py
@ -41,6 +41,7 @@ class ChooseFormatDialog(QDialog):
        bb.accepted.connect(self.accept), bb.rejected.connect(self.reject)
        h.addStretch(10), h.addWidget(self.buttonBox)

+        formats = list(formats)
        for format in formats:
            self.formats.addItem(QListWidgetItem(file_icon_provider().icon_from_ext(format.lower()),
                                                 format.upper()))
--- a/src/calibre/gui2/dialogs/drm_error.ui
+++ b/src/calibre/gui2/dialogs/drm_error.ui
@ -44,8 +44,7 @@
    <widget class="QLabel" name="msg">
     <property name="text">
      <string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre, 
-          &lt;a href=&quot;https://drmfree.calibre-ebook.com/about#drm&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;A large number of recent, DRM free releases are 
-          available at &lt;a href=&quot;https://drmfree.calibre-ebook.com&quot;&gt;Open Books&lt;/a&gt;.</string>
+          &lt;a href=&quot;https://manual.calibre-ebook.com/drm.html&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;</string>
     </property>
     <property name="wordWrap">
      <bool>true</bool>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -369,6 +369,7 @@ class MyBlockingBusy(QDialog):  # {{{
        if args.clear_series:
            self.progress_next_step_range.emit(0)
            cache.set_field('series', {bid: '' for bid in self.ids})
+            cache.set_field('series_index', {bid:1.0 for bid in self.ids})
            self.progress_finished_cur_step.emit()

        if args.pubdate is not None:
--- a/src/calibre/gui2/dialogs/saved_search_editor.py
+++ b/src/calibre/gui2/dialogs/saved_search_editor.py
@ -94,7 +94,7 @@ class SavedSearchEditor(Dialog):
    def __init__(self, parent, initial_search=None):
        self.initial_search = initial_search
        Dialog.__init__(
-            self, _('Manage saved searches'), 'manage-saved-searches', parent)
+            self, _('Manage Saved searches'), 'manage-saved-searches', parent)

    def setup_ui(self):
        from calibre.gui2.ui import get_gui
--- a/src/calibre/gui2/dialogs/tag_categories.py
+++ b/src/calibre/gui2/dialogs/tag_categories.py
@ -10,7 +10,7 @@ from calibre.gui2.dialogs.tag_categories_ui import Ui_TagCategories
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2 import error_dialog
 from calibre.constants import islinux
-from calibre.utils.icu import sort_key, strcmp
+from calibre.utils.icu import sort_key, strcmp, primary_contains
 from polyglot.builtins import iteritems, unicode_type


@ -72,9 +72,11 @@ class TagCategories(QDialog, Ui_TagCategories):
                           lambda: [t.original_name.replace('|', ',') for t in self.db_categories['authors']],
                           lambda: [t.original_name for t in self.db_categories['series']],
                           lambda: [t.original_name for t in self.db_categories['publisher']],
-                           lambda: [t.original_name for t in self.db_categories['tags']]
+                           lambda: [t.original_name for t in self.db_categories['tags']],
+                           lambda: [t.original_name for t in self.db_categories['languages']]
                          ]
-        category_names  = ['', _('Authors'), ngettext('Series', 'Series', 2), _('Publishers'), _('Tags')]
+        category_names  = ['', _('Authors'), ngettext('Series', 'Series', 2),
+                            _('Publishers'), _('Tags'), _('Languages')]

        for key,cc in iteritems(self.db.custom_field_metadata()):
            if cc['datatype'] in ['text', 'series', 'enumeration']:
@ -106,6 +108,7 @@ class TagCategories(QDialog, Ui_TagCategories):
        self.category_box.currentIndexChanged[int].connect(self.select_category)
        self.category_filter_box.currentIndexChanged[int].connect(
                                                self.display_filtered_categories)
+        self.item_filter_box.textEdited.connect(self.display_filtered_items)
        self.delete_category_button.clicked.connect(self.del_category)
        if islinux:
            self.available_items_box.itemDoubleClicked.connect(self.apply_tags)
@ -168,13 +171,18 @@ class TagCategories(QDialog, Ui_TagCategories):
        w.setToolTip(_('Category lookup name: ') + item.label)
        return w

+    def display_filtered_items(self, text):
+        self.display_filtered_categories(None)
+
    def display_filtered_categories(self, idx):
        idx = idx if idx is not None else self.category_filter_box.currentIndex()
        self.available_items_box.clear()
        self.applied_items_box.clear()
+        item_filter = self.item_filter_box.text()
        for item in self.all_items_sorted:
            if idx == 0 or item.label == self.category_labels[idx]:
                if item.index not in self.applied_items and item.exists:
+                    if primary_contains(item_filter, item.name):
                        self.available_items_box.addItem(self.make_list_widget(item))
        for index in self.applied_items:
            self.applied_items_box.addItem(self.make_list_widget(self.all_items[index]))
--- a/src/calibre/gui2/dialogs/tag_categories.ui
+++ b/src/calibre/gui2/dialogs/tag_categories.ui
@ -64,6 +64,26 @@
         </property>
        </widget>
       </item>
+       <item row="2" column="0">
+        <widget class="QLabel" name="label_filt">
+         <property name="text">
+          <string>Item &amp;filter: </string>
+         </property>
+         <property name="alignment">
+          <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
+         </property>
+         <property name="buddy">
+          <cstring>item_filter_box</cstring>
+         </property>
+        </widget>
+       </item>
+       <item row="2" column="1">
+        <widget class="QLineEdit" name="item_filter_box">
+         <property name="toolTip">
+          <string>Enter text to filter the available items. Case and accents are ignored.</string>
+         </property>
+        </widget>
+       </item>
      </layout>
     </item>
     <item row="0" column="1" colspan="3">
@ -136,6 +156,13 @@
         </property>
        </widget>
       </item>
+       <item row="2" column="0">
+        <widget class="QLabel" name="blank">
+         <property name="text">
+          <string> </string>
+         </property>
+        </widget>
+       </item>
      </layout>
     </item>
    </layout>
@ -152,7 +179,7 @@
       </property>
      </widget>
     </item>
-     <item row="2" column="0">
+     <item row="3" column="0">
      <widget class="QListWidget" name="available_items_box">
       <property name="alternatingRowColors">
        <bool>true</bool>
@ -165,7 +192,7 @@
       </property>
      </widget>
     </item>
-     <item row="2" column="1">
+     <item row="3" column="1">
      <widget class="QToolButton" name="apply_button">
       <property name="toolTip">
        <string>Apply tags to current tag category</string>
@ -189,7 +216,7 @@
       </property>
      </widget>
     </item>
-     <item row="2" column="2">
+     <item row="3" column="2">
      <widget class="QListWidget" name="applied_items_box">
       <property name="alternatingRowColors">
        <bool>true</bool>
@ -199,7 +226,7 @@
       </property>
      </widget>
     </item>
-     <item row="2" column="3">
+     <item row="3" column="3">
      <widget class="QToolButton" name="unapply_button">
       <property name="toolTip">
        <string>Unapply (remove) tag from current tag category</string>
@ -213,7 +240,7 @@
       </property>
      </widget>
     </item>
-     <item row="3" column="0" colspan="4">
+     <item row="4" column="0" colspan="4">
      <widget class="QDialogButtonBox" name="buttonBox">
       <property name="orientation">
        <enum>Qt::Horizontal</enum>
--- a/src/calibre/gui2/dnd.py
+++ b/src/calibre/gui2/dnd.py
@ -198,14 +198,7 @@ def dnd_has_extension(md, extensions, allow_all_extensions=False):
    return bool(exts.intersection(frozenset(extensions)))


-def dnd_get_image(md, image_exts=None):
-    '''
-    Get the image in the QMimeData object md.
-
-    :return: None, None if no image is found
-             QPixmap, None if an image is found, the pixmap is guaranteed not null
-             url, filename if a URL that points to an image is found
-    '''
+def dnd_get_local_image_and_pixmap(md, image_exts=None):
    if md.hasImage():
        for x in md.formats():
            x = unicode_type(x)
@ -214,14 +207,13 @@ def dnd_get_image(md, image_exts=None):
                pmap = QPixmap()
                pmap.loadFromData(cdata)
                if not pmap.isNull():
-                    return pmap, None
-                break
+                    return pmap, cdata
    if md.hasFormat('application/octet-stream'):
        cdata = bytes(md.data('application/octet-stream'))
        pmap = QPixmap()
        pmap.loadFromData(cdata)
        if not pmap.isNull():
-            return pmap, None
+            return pmap, cdata

    if image_exts is None:
        image_exts = image_extensions()
@ -229,23 +221,40 @@ def dnd_get_image(md, image_exts=None):
    # No image, look for an URL pointing to an image
    urls = urls_from_md(md)
    paths = [path_from_qurl(u) for u in urls]
-    # First look for a local file
+    # Look for a local file
    images = [xi for xi in paths if
            posixpath.splitext(unquote(xi))[1][1:].lower() in
            image_exts]
    images = [xi for xi in images if os.path.exists(xi)]
-    p = QPixmap()
    for path in images:
        try:
            with open(path, 'rb') as f:
-                p.loadFromData(f.read())
+                cdata = f.read()
        except Exception:
            continue
+        p = QPixmap()
+        p.loadFromData(cdata)
        if not p.isNull():
-            return p, None
+            return p, cdata

-    # No local images, look for remote ones
+    return None, None

+
+def dnd_get_image(md, image_exts=None):
+    '''
+    Get the image in the QMimeData object md.
+
+    :return: None, None if no image is found
+             QPixmap, None if an image is found, the pixmap is guaranteed not null
+             url, filename if a URL that points to an image is found
+    '''
+    if image_exts is None:
+        image_exts = image_extensions()
+    pmap, data = dnd_get_local_image_and_pixmap(md, image_exts)
+    if pmap is not None:
+        return pmap, None
+    # Look for a remote image
+    urls = urls_from_md(md)
    # First, see if this is from Firefox
    rurl, fname = get_firefox_rurl(md, image_exts)

--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -531,7 +531,7 @@ class VLTabs(QTabBar):  # {{{
            s = m._s = m.addMenu(_('Restore hidden tabs'))
            for x in hidden:
                s.addAction(x, partial(self.restore, x))
-        m.addAction(_('Hide virtual library tabs'), self.disable_bar)
+        m.addAction(_('Hide Virtual library tabs'), self.disable_bar)
        if gprefs['vl_tabs_closable']:
            m.addAction(_('Lock virtual library tabs'), self.lock_tab)
        else:
--- a/src/calibre/gui2/library/alternate_views.py
+++ b/src/calibre/gui2/library/alternate_views.py
@ -1049,12 +1049,15 @@ class GridView(QListView):
    def number_of_columns(self):
        # Number of columns currently visible in the grid
        if self._ncols is None:
+            dpr = self.device_pixel_ratio
+            width = int(dpr * self.delegate.cover_size.width())
+            height = int(dpr * self.delegate.cover_size.height())
            step = max(10, self.spacing())
-            for y in range(step, 500, step):
-                for x in range(step, 500, step):
+            for y in range(step, 2 * height, step):
+                for x in range(step, 2 * width, step):
                    i = self.indexAt(QPoint(x, y))
                    if i.isValid():
-                        for x in range(self.viewport().width() - step, self.viewport().width() - 300, -step):
+                        for x in range(self.viewport().width() - step, self.viewport().width() - width, -step):
                            j = self.indexAt(QPoint(x, y))
                            if j.isValid():
                                self._ncols = j.row() - i.row() + 1
@ -1070,7 +1073,8 @@ class GridView(QListView):
            if not ci.isValid():
                return
            c = ci.row()
-            delta = {Qt.Key_Left: -1, Qt.Key_Right: 1, Qt.Key_Up: -self.number_of_columns(), Qt.Key_Down: self.number_of_columns()}[k]
+            ncols = self.number_of_columns() or 1
+            delta = {Qt.Key_Left: -1, Qt.Key_Right: 1, Qt.Key_Up: -ncols, Qt.Key_Down: ncols}[k]
            n = max(0, min(c + delta, self.model().rowCount(None) - 1))
            if n == c:
                return
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -30,6 +30,7 @@ from calibre.utils.date import (
    local_tz, qt_to_dt, as_local_time, UNDEFINED_DATE, is_date_undefined,
    utcfromtimestamp, parse_only_date, internal_iso_format_string)
 from calibre import strftime
+from calibre.constants import ispy3
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.customize.ui import run_plugins_on_import
 from calibre.gui2.comments_editor import Editor
@ -52,7 +53,7 @@ def save_dialog(parent, title, msg, det_msg=''):


 def clean_text(x):
-    return re.sub(r'\s', ' ', x.strip())
+    return re.sub(r'\s', ' ', x.strip(), flags=re.ASCII if ispy3 else 0)


 '''
@ -221,7 +222,6 @@ class TitleEdit(EnLineEdit, ToMetadataMixin):

    @property
    def current_val(self):
-
        title = clean_text(unicode_type(self.text()))
        if not title:
            title = self.get_default()
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -20,7 +20,7 @@ from PyQt5.Qt import (
    QWidget, QTableView, QGridLayout, QPalette, QTimer, pyqtSignal,
    QAbstractTableModel, QSize, QListView, QPixmap, QModelIndex,
    QAbstractListModel, QRect, QTextBrowser, QStringListModel, QMenu,
-    QCursor, QHBoxLayout, QPushButton, QSizePolicy)
+    QCursor, QHBoxLayout, QPushButton, QSizePolicy, QSplitter)

 from calibre.customize.ui import metadata_plugins
 from calibre.ebooks.metadata import authors_to_string, rating_to_stars
@ -317,8 +317,6 @@ class Comments(HTMLDisplay):  # {{{
    def __init__(self, parent=None):
        HTMLDisplay.__init__(self, parent)
        self.setAcceptDrops(False)
-        self.setMaximumWidth(300)
-        self.setMinimumWidth(300)
        self.wait_timer = QTimer(self)
        self.wait_timer.timeout.connect(self.update_wait)
        self.wait_timer.setInterval(800)
@ -374,13 +372,6 @@ class Comments(HTMLDisplay):  # {{{
        <html>
        '''%(c,)
        self.setHtml(templ%html)
-
-    def sizeHint(self):
-        # This is needed, because on windows the dialog cannot be resized to
-        # so that this widgets height become < sizeHint().height(). Qt sets the
-        # sizeHint to (800, 600), which makes the dialog unusable on smaller
-        # screens.
-        return QSize(800, 300)
 # }}}


@ -454,31 +445,41 @@ class IdentifyWidget(QWidget):  # {{{
        self.abort = Event()
        self.caches = {}

-        self.l = l = QGridLayout()
-        self.setLayout(l)
+        self.l = l = QVBoxLayout(self)

        names = ['<b>'+p.name+'</b>' for p in metadata_plugins(['identify']) if
                p.is_configured()]
        self.top = QLabel('<p>'+_('calibre is downloading metadata from: ') +
            ', '.join(names))
        self.top.setWordWrap(True)
-        l.addWidget(self.top, 0, 0)
+        l.addWidget(self.top)

+        self.splitter = s = QSplitter(self)
+        s.setChildrenCollapsible(False)
+        l.addWidget(s, 100)
        self.results_view = ResultsView(self)
        self.results_view.book_selected.connect(self.emit_book_selected)
        self.get_result = self.results_view.get_result
-        l.addWidget(self.results_view, 1, 0)
+        s.addWidget(self.results_view)

        self.comments_view = Comments(self)
-        l.addWidget(self.comments_view, 1, 1)
+        s.addWidget(self.comments_view)
+        s.setStretchFactor(0, 2)
+        s.setStretchFactor(1, 1)

        self.results_view.show_details_signal.connect(self.comments_view.show_data)

        self.query = QLabel('download starting...')
        self.query.setWordWrap(True)
-        l.addWidget(self.query, 2, 0, 1, 2)
+        l.addWidget(self.query)

        self.comments_view.show_wait()
+        state = gprefs.get('metadata-download-identify-widget-splitter-state')
+        if state is not None:
+            s.restoreState(state)
+
+    def save_state(self):
+        gprefs['metadata-download-identify-widget-splitter-state'] = bytearray(self.splitter.saveState())

    def emit_book_selected(self, book):
        self.book_selected.emit(book, self.caches)
@ -1091,6 +1092,7 @@ class FullFetch(QDialog):  # {{{
    def accept(self):
        # Prevent the usual dialog accept mechanisms from working
        gprefs['metadata_single_gui_geom'] = bytearray(self.saveGeometry())
+        self.identify_widget.save_state()
        if DEBUG_DIALOG:
            if self.stack.currentIndex() == 2:
                return QDialog.accept(self)
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`<svg width="512" height="512" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg"><path d="M464 0H144c-26.5 0-48 21.5-48 48v48H48c-26.5 0-48 21.5-48 48v320c0 26.5 21.5 48 48 48h320c26.5 0 48-21.5 48-48v-48h48c26.5 0 48-21.5 48-48V48c0-26.5-21.5-48-48-48zm-96 464H48V256h320v208zm96-96h-48V144c0-26.5-21.5-48-48-48H144V48h320v320z"/></svg>`