Merge pull request #2 from kovidgoyal/master

Update fork
This commit is contained in:
Jony 2020-03-15 14:04:48 +01:00 committed by GitHub
commit c8bbfc4598
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
146 changed files with 2208 additions and 1559 deletions

View File

@ -20,6 +20,100 @@
# new recipes:
# - title:
- version: 4.12.0
date: 2020-03-06
new features:
- title: "Kobo driver: Add support for new firmware with the series list on the device"
- title: "Automatically extract the source DOCX file from Kindle Create KPF files when adding them to calibre. If you prefer to preserve the KPF file you can disable the KPF Extract plugin in Preferences->Plugins"
- title: "Content server: Add a button to delete all locally cached books."
tickets: [1864305]
- title: "Edit Book: Allow selecting the contents of a tag with Ctrl+Alt+t"
- title: "Viewer: Save current position after 3 seconds of last position change. Useful if the viewer crashes on resume from sleep."
- title: "Viewer: Add a keyboard shortcut (Ctrl+w) to toggle the scrollbar."
tickets: [1864356]
- title: "Viewer: Keyboard shortcuts to change number of columns (Ctrl+[ and Ctrl+])"
bug fixes:
- title: "Fix the Douban metadata download plugin"
tickets: [1853091]
- title: "Viewer: Fix searching in Regex and Whole words mode not working well."
- title: "Viewer: Fix searching for multiple words in fixed layout books not working."
tickets: [1863464]
- title: "RTF Input: Fix handling of RTF files with invalid encoded text."
tickets: [1864719]
- title: "PDF Output: Add a hangcheck for loading HTML if there is no progress for sixty seconds abort"
tickets: [1865380]
- title: 'Viewer: When starting without a book allowing quitting the viewer by clicking the close button on the "Open book" page'
tickets: [1864343]
improved recipes:
- Wired
- ABC News Australia
new recipes:
- title: Spectator Australia
author: James Cridland
- version: 4.11.2
date: 2020-02-21
new features:
- title: "Viewer: Allow right clicking on the scrollbar to easily access commonly used scrolling shortcuts"
- title: "Edit book: Preview panel: Allow right clicking on images to edit them"
- title: "Add a new Quick select action to quickly select a virtual library with a few keystrokes. Activated by Ctrl+t or the Virtual library menu"
- title: "Viewer: Calculate default column widths based on current font size"
- title: "Viewer: Add a control to quit the viewer useful on touchscreens."
tickets: [1862441]
- title: "Viewer: Add shortcut for showing metadata (Ctrl+n)"
tickets: [1862432]
bug fixes:
- title: "4.11.2 fixes a couple of regressions that broke the New bookmark button in the viewer and scrolling in the content server library view. Also fixes calculation of default column widths in viewer not changing when font size is changed."
- title: "Viewer: Fix a regression that broke detection of pop-up footnotes using EPUB 3 markup"
- title: "Viewer: Fix current reading position not preserved when changing preferences and auto scroll is active."
tickets: [1863438]
- title: "Viewer: Fix stopping autoscroll at end of chapter not stopping next chapter jump."
tickets: [1863487]
- title: "Fix for viewer window going off screen even when not restoring window geometry"
- title: "Edit book: Fix syntax highlighting for break-(before|after)"
tickets: [1863020]
- title: "Fix drag and drop of some image files onto edit metadata dialog not working"
tickets: [1862440]
- title: "Conversion pipeline: Fix styles applied via selectors to the <html> element being ignored"
tickets: [1862401]
- title: "Bulk metadata edit: Fix clear series not resetting series index"
- title: "Fix clicking on author name in book details panel to search in Goodreads not working if author has more than two parts in his name"
improved recipes:
- New York Times
- version: 4.10.0
date: 2020-02-07
@ -955,7 +1049,7 @@
- title: "Allow adding files to selected book records from the clipboard. To use copy a file from windows explorer, right click the Add books button and choose: Add files to selected books from clipboard"
tickets: [1815419]
- title: "Tag browser: When right clicking on a saved search add a menu option to search using the raw search expression."
- title: "Tag browser: When right clicking on a Saved search add a menu option to search using the raw search expression."
tickets: [1816274]
- title: "Tag browser: Have pressing the Enter key find the next match."
@ -1029,7 +1123,7 @@
- title: "Content server: Fix editing metadata that affects multiple books causing all the metadata for all the books to become the same."
tickets: [1812781]
- title: "Open With: Fix using .bat files as the program not working."
- title: "Open with: Fix using .bat files as the program not working."
tickets: [1811045]
- title: "ZIP Output: Fix an error when building the ToC on macOS for some books with non-ASCII ToC entries"
@ -2083,7 +2177,7 @@ to appear as Unknown if metadata management was set to manual in calibre."
- title: "Content server: Open links in the comments section from the book details page in new windows."
tickets: [1737644]
- title: "Choose English as the User interface language when a locale related environment variable is set to the C locale"
- title: "Choose English as the user interface language when a locale related environment variable is set to the C locale"
- title: "Linux installer: A nicer error message if the user tries to run the installer on an ARM machine"
@ -2115,7 +2209,7 @@ to appear as Unknown if metadata management was set to manual in calibre."
- title: "Edit book: Pre-select existing cover image (if any) in add cover dialog"
- title: "Make the Manage saved searches dialog a little easier for new users."
- title: "Make the Manage Saved searches dialog a little easier for new users."
tickets: [1733163]
- title: "Add a tweak to control behavior of Enter on the book list"
@ -2132,7 +2226,7 @@ to appear as Unknown if metadata management was set to manual in calibre."
- title: "Content server: Improve rendering of tags/categories with long words on small screens."
tickets: [1734119]
- title: "Fix first added saved search not appearing in Tag browser until calibre restart."
- title: "Fix first added Saved search not appearing in Tag browser until calibre restart."
tickets: [1733151]
- title: "When checking added books for duplicates, also check on the language field. So books with the same title/authors but different languages are not considered duplicates."

View File

@ -10,7 +10,7 @@ reading. It is cross platform, running on Linux, Windows and macOS.
For more information, see the [calibre About page](https://calibre-ebook.com/about)
[![Build Status](https://github.com/kovidgoyal/calibre/workflows/Continuous%20Integration/badge.svg)](https://github.com/kovidgoyal/calibre/actions?workflow=Continuous+Integration)
[![Build Status](https://github.com/kovidgoyal/calibre/workflows/Continuous%20Integration/badge.svg)](https://github.com/kovidgoyal/calibre/actions?query=workflow%3ACI)
## Screenshots

View File

@ -163,7 +163,6 @@ run(const char **ENV_VARS, const char **ENV_VAR_VALS, char *PROGRAM,
char *t = NULL;
int ret = 0, i;
PyObject *site, *mainf, *res;
uint32_t buf_size = PATH_MAX+1;
for (i = 0; i < 3; i++) {
t = rindex(full_exe_path, '/');

View File

@ -0,0 +1 @@
<svg width="512" height="512" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg"><path d="M464 0H144c-26.5 0-48 21.5-48 48v48H48c-26.5 0-48 21.5-48 48v320c0 26.5 21.5 48 48 48h320c26.5 0 48-21.5 48-48v-48h48c26.5 0 48-21.5 48-48V48c0-26.5-21.5-48-48-48zm-96 464H48V256h320v208zm96-96h-48V144c0-26.5-21.5-48-48-48H144V48h320v320z"/></svg>

After

Width:  |  Height:  |  Size: 344 B

View File

@ -1,6 +1,6 @@
.. _catalog_tut:
Creating AZW3 • EPUB • MOBI catalogs
Creating AZW3 • EPUB • MOBI catalogs
=====================================
calibre's Create catalog feature enables you to create a catalog of your library in a variety of formats. This help file describes cataloging options when generating a catalog in AZW3, EPUB and MOBI formats.
@ -19,7 +19,7 @@ If you want only *some* of your library cataloged, you have two options:
* Create a multiple selection of the books you want cataloged. With more than one book selected in calibre's main window, only the selected books will be cataloged.
* Use the Search field or the Tag browser to filter the displayed books. Only the displayed books will be cataloged.
To begin catalog generation, select the menu item :guilabel:`Convert books > Create a catalog of the books in your calibre library`. You may also add a :guilabel:`Create Catalog` button to a toolbar in :guilabel:`Preferences > Interface > Toolbars` for easier access to the Generate catalog dialog.
To begin catalog generation, select the menu item :guilabel:`Convert books > Create a catalog of the books in your calibre library`. You may also add a :guilabel:`Create catalog` button to a toolbar in :guilabel:`Preferences > Interface > Toolbars & menus` for easier access to the Generate catalog dialog.
.. image:: images/catalog_options.png
:alt: Catalog options
@ -125,7 +125,7 @@ Custom catalog covers
.. |cc| image:: images/custom_cover.png
|cc| With the `Generate Cover plugin <https://www.mobileread.com/forums/showthread.php?t=124219>`_ installed, you can create custom covers for your catalog.
|cc| With the `Generate Cover plugin <https://www.mobileread.com/forums/showthread.php?t=124219>`_ installed, you can create custom covers for your catalog.
To install the plugin, go to :guilabel:`Preferences > Advanced > Plugins > Get new plugins`.
Additional help resources
@ -134,4 +134,3 @@ Additional help resources
For more information on calibre's Catalog feature, see the MobileRead forum sticky `Creating Catalogs - Start here <https://www.mobileread.com/forums/showthread.php?t=118556>`_, where you can find information on how to customize the catalog templates, and how to submit a bug report.
To ask questions or discuss calibre's Catalog feature with other users, visit the MobileRead forum `Calibre Catalogs <https://www.mobileread.com/forums/forumdisplay.php?f=236>`_.

View File

@ -136,7 +136,7 @@ for inclusion into the main calibre repository:
git clone git@github.com:<username>/calibre.git
git remote add upstream https://github.com/kovidgoyal/calibre.git
Replace <username> above with your github username. That will get your fork checked out locally.
Replace <username> above with your GitHub username. That will get your fork checked out locally.
* You can make changes and commit them whenever you like. When you are ready to have your work merged, do a::
git push

107
manual/drm.rst Normal file
View File

@ -0,0 +1,107 @@
.. _dmr:
Digital Rights Management (DRM)
===============================================
Digital rights management (DRM) is a generic term for access control
technologies that can be used by hardware manufacturers, publishers, copyright
holders and individuals to try to impose limitations on the usage of digital
content and devices. It is also, sometimes, disparagingly described as Digital
Restrictions Management. The term is used to describe any technology which
inhibits uses (legitimate or otherwise) of digital content that were not
desired or foreseen by the content provider. The term generally doesn't refer
to other forms of copy protection which can be circumvented without modifying
the file or device, such as serial numbers or key-files. It can also refer to
restrictions associated with specific instances of digital works or devices.
DRM technologies attempt to control use of digital media by preventing access,
copying or conversion to other formats by end users. See `wikipedia
<https://en.wikipedia.org/wiki/Digital_rights_management>`_.
What does DRM imply for me personally?
------------------------------------------
When you buy an e-book with DRM you don't really own it but have purchased the
permission to use it in a manner dictated to you by the seller. DRM limits what
you can do with e-books you have "bought". Often people who buy books with DRM
are unaware of the extent of these restrictions. These restrictions prevent you
from reformatting the e-book to your liking, including making stylistic changes
like adjusting the font sizes, although there is software that empowers you to
do such things for non DRM books. People are often surprised that an e-book
they have bought in a particular format cannot be converted to another format
if the e-book has DRM. So if you have an Amazon Kindle and buy a book sold by
Barnes and Nobles, you should know that if that e-book has DRM you will not be
able to read it on your Kindle. Notice that I am talking about a book you buy,
not steal or pirate but BUY.
What does DRM do for authors?
----------------------------------
Publishers of DRMed e-books argue that the DRM is all for the sake of authors
and to protect their artistic integrity and prevent piracy. But DRM does NOT
prevent piracy. People who want to pirate content or use pirated content still
do it and succeed. The three major DRM schemes for e-books today are run by
Amazon, Adobe and Barnes and Noble and all three DRM schemes have been cracked.
All DRM does is inconvenience legitimate users. It can be argued that it
actually harms authors as people who would have bought the book choose to find
a pirated version as they are not willing to put up with DRM. Those that would
pirate in the absence of DRM do so in its presence as well. To reiterate, the
key point is that DRM *does not prevent piracy*. So DRM is not only pointless
and harmful to buyers of e-books but also a waste of money.
DRM and freedom
-------------------
Although digital content can be used to make information as well as creative
works easily available to everyone and empower humanity, this is not in the
interests of some publishers who want to steer people away from this
possibility of freedom simply to maintain their relevance in world developing
so fast that they cant keep up.
Why does calibre not support DRM?
-------------------------------------
calibre is open source software while DRM by its very nature is closed. If
calibre were to support opening or viewing DRM files it could be trivially
modified to be used as a tool for DRM removal which is illegal under today's
laws. Open source software and DRM are a clash of principles. While DRM is all
about controlling the user open source software is about empowering the user.
The two simply can not coexist.
What is calibre's view on content providers?
------------------------------------------------
We firmly believe that authors and other content providers should be
compensated for their efforts, but DRM is not the way to go about it. We are
developing this database of DRM-free e-books from various sources to help you
find DRM-free alternatives and to help independent authors and publishers of
DRM-free e-books publicize their content. We hope you will find this useful and
we request that you do not pirate the content made available to you here.
How can I help fight DRM?
-----------------------------
As somebody who reads and buys e-books you can help fight DRM. Do not buy
e-books with DRM. There are some publishers who publish DRM-free e-books. Make
an effort to see if they carry the e-book you are looking for. If you like
books by certain independent authors that sell DRM-free e-books and you can
afford it make donations to them. This is money well spent as their e-books
tend to be cheaper (there may be exceptions) than the ones you would buy from
publishers of DRMed books and would probably work on all devices you own in the
future saving you the cost of buying the e-book again. Do not discourage
publishers and authors of DRM-free e-books by pirating their content. Content
providers deserve compensation for their efforts. Do not punish them for trying
to make your reading experience better by making available DRM-free e-books. In
the long run this is detrimental to you. If you have bought books from sellers
that carry both DRMed as well as DRM-free books, not knowing if they carry DRM
or not make it a point to leave a comment or review on the website informing
future buyers of its DRM status. Many sellers do not think it important to
clearly indicate to their buyers if an e-book carries DRM or not. `Here
<https://www.defectivebydesign.org/guide/ebooks>` you will find a Guide to
DRM-free living.

View File

@ -802,7 +802,8 @@ The HTML editor has very sophisticated syntax highlighting. Features include:
* The text inside bold, italic and heading tags is made bold/italic
* As you move your cursor through the HTML, the matching HTML tags are
highlighted, and you can jump to the opening or closing tag with the
keyboard shortcuts :kbd:`Ctrl+{` and :kbd:`Ctrl+}`
keyboard shortcuts :kbd:`Ctrl+{` and :kbd:`Ctrl+}`. Similarly, you
can select the contents of a tag with :kbd:`Ctrl+Alt+T`.
* Invalid HTML is highlighted with a red underline
* Spelling errors in the text inside HTML tags and attributes such as title
are highlighted. The spell checking is language aware, based on the value

View File

@ -983,7 +983,7 @@ If you want to backup the calibre configuration/plugins, you have to backup the
How do I use purchased EPUB books with calibre (or what do I do with .acsm files)?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Most purchased EPUB books have `DRM <https://drmfree.calibre-ebook.com/about#drm>`_. This prevents calibre from opening them. You can still use calibre to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with calibre will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to calibre.
Most purchased EPUB books have :doc:`DRM <drm>`. This prevents calibre from opening them. You can still use calibre to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with calibre will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to calibre.
I am getting a "Permission Denied" error?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -94,7 +94,7 @@ Convert books
:class: float-right-img
|cei| E-books can be converted from a number of formats into whatever format your e-book reader prefers.
Many e-books available for purchase will be protected by `Digital Rights Management <https://drmfree.calibre-ebook.com/about#drm>`_ *(DRM)* technology.
Many e-books available for purchase will be protected by :doc:`Digital Rights Management <drm>` *(DRM)* technology.
calibre will not convert these e-books. It is easy to remove the DRM from many formats, but as this may be illegal,
you will have to find tools to liberate your books yourself and then use calibre to convert them.

View File

@ -40,7 +40,7 @@ and then the :guilabel:`Add a custom news source` menu item and then the
.. image:: images/custom_news.png
:align: center
First enter ``calibre Blog`` into the :guilabel:`Recipe title` field. This will be the title of the e-book that will be created from the articles in the above feeds.
First enter ``Calibre Blog`` into the :guilabel:`Recipe title` field. This will be the title of the e-book that will be created from the articles in the above feeds.
The next two fields (:guilabel:`Oldest article` and :guilabel:`Max. number of articles`) allow you some control over how many articles should be downloaded from each feed, and they are pretty self explanatory.

View File

@ -21,7 +21,7 @@ available <https://calibre-ebook.com/help>`_.
.. only:: online
**An e-book version of this user manual is available in** `EPUB format <calibre.epub>`_, `AZW3 (Kindle Fire) format <calibre.azw3>`_ and `PDF format <calibre.pdf>`_.
**An e-book version of this User Manual is available in** `EPUB format <calibre.epub>`_, `AZW3 (Kindle Fire) format <calibre.azw3>`_ and `PDF format <calibre.pdf>`_.
.. rubric:: Sections
@ -41,4 +41,5 @@ available <https://calibre-ebook.com/help>`_.
customize
generated/en/cli-index
develop
drm
glossary

View File

@ -55,7 +55,7 @@ Setup
By now, your question might be "How was all of this setup?" There are three steps: 1) create the custom column, 2) tell calibre that the new column is to be treated as a hierarchy, and 3) add genres.
You create the custom column in the usual way, using Preferences -> Add your own columns. This example uses "#genre" as the lookup name and "Genre" as the column heading. The column type is "Comma-separated text, like tags, shown in the Tag browser."
You create the custom column in the usual way, using Preferences -> Add your own columns. This example uses "#genre" as the lookup name and "Genre" as the column heading. The column type is "Comma-separated text, like tags, shown in the Tag browser."
.. image:: images/sg_cc.jpg
:align: center
@ -98,7 +98,7 @@ The Tag browser search mechanism knows if an item has children. If it does, clic
Restrictions
---------------
If you search for a genre then create a saved search for it, you can use the 'restrict to' box to create a virtual library of books with that genre. This is useful if you want to do other searches within the genre or to manage/update metadata for books in the genre. Continuing our example, you can create a saved search named 'History.Japanese' by first clicking on the genre Japanese in the Tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).
If you search for a genre then create a saved search for it, you can use the 'restrict to' box to create a Virtual library of books with that genre. This is useful if you want to do other searches within the genre or to manage/update metadata for books in the genre. Continuing our example, you can create a Saved search named 'History.Japanese' by first clicking on the genre Japanese in the Tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).
.. image:: images/sg_restrict.jpg
:align: center
@ -110,11 +110,11 @@ After creating the saved search, you can use it as a restriction.
Useful template functions
-------------------------
You might want to use the genre information in a template, such as with save to disk or send to device. The question might then be "How do I get the outermost genre name or names?" A calibre template function, subitems, is provided to make doing this easier.
For example, assume you want to add the outermost genre level to the save-to-disk template to make genre folders, as in "History/The Gathering Storm - Churchill, Winston". To do this, you must extract the first level of the hierarchy and add it to the front along with a slash to indicate that it should make a folder. The template below accomplishes this::
{#genre:subitems(0,1)||/}{title} - {authors}
See :ref:`The template language <templatelangcalibre>` for more information about templates and the :func:`subitems` function.

View File

@ -125,15 +125,15 @@ Dictionary lookup
-------------------
You can look up the meaning of words in the current book by opening the
:guilabel:`Lookup/search panel` via the viewer controls. Then simply double
click on any word and its definition will be displayed in the lookup panel.
:guilabel:`Lookup/search word panel` via the viewer controls. Then simply double
click on any word and its definition will be displayed in the Lookup panel.
Copying text and images
-------------------------
You can select text and images by dragging the content with your mouse and then
right clicking and selecting "Copy" to copy to the clipboard. The copied
right clicking and selecting :guilabel:`Copy` to copy to the clipboard. The copied
material can be pasted into another application as plain text and images.
@ -143,7 +143,7 @@ Non re-flowable content
Some books have very wide content that content be broken up at page boundaries.
For example tables or :code:`<pre>` tags. In such cases, you should switch the
viewer to *flow mode* by pressing :kbd:`Ctrl+m` to read this content.
Alternately, you can also add the following CSS to the Styling section of the
Alternately, you can also add the following CSS to the :guilabel:`Styles` section of the
viewer preferences to force the viewer to break up lines of text in
:code:`<pre>` tags::

View File

@ -44,15 +44,15 @@ selected author.
You can switch back to the full library at any time by once again clicking the
:guilabel:`Virtual library` and selecting the entry named :guilabel:`<None>`.
Virtual libraries are based on *searches*. You can use any search as the
basis of a virtual library. The virtual library will contain only the
books matched by that search. First, type in the search you want to use
in the Search bar or build a search using the :guilabel:`Tag browser`.
When you are happy with the returned results, click the Virtual library
button, choose :guilabel:`Create library` and enter a name for the new virtual
library. The virtual library will then be created based on the search
you just typed in. Searches are very powerful, for examples of the kinds
of things you can do with them, see :ref:`search_interface`.
Virtual libraries are based on *searches*. You can use any search as the
basis of a Virtual library. The Virtual library will contain only the
books matched by that search. First, type in the search you want to use
in the Search bar or build a search using the :guilabel:`Tag browser`.
When you are happy with the returned results, click the :guilabel:`Virtual library`
button, choose :guilabel:`Create library` and enter a name for the new Virtual
library. The Virtual library will then be created based on the search
you just typed in. Searches are very powerful, for examples of the kinds
of things you can do with them, see :ref:`search_interface`.
Examples of useful Virtual libraries
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -82,7 +82,7 @@ You can edit a previously created virtual library or remove it, by clicking the
You can tell calibre that you always want to apply a particular virtual library
when the current library is opened, by going to
:guilabel:`Preferences->Interface->Behavior`.
:guilabel:`Preferences->Interface->Behavior`.
You can quickly use the current search as a temporary virtual library by
clicking the :guilabel:`Virtual library` button and choosing the
@ -103,7 +103,7 @@ example, ``vl:Read`` will find all the books in the *Read* virtual library. The
``vl:Read and vl:"Science Fiction"`` will find all the books that are in both the *Read* and
*Science Fiction* virtual libraries.
The value following ``vl:`` must be the name of a virtual library. If the virtual library name
The value following ``vl:`` must be the name of a virtual library. If the virtual library name
contains spaces then surround it with quotes.
One use for a virtual library search is in the content server. In
@ -124,4 +124,3 @@ saved search that shows you unread books, you can click the :guilabel:`Virtual
Library` button and choose the :guilabel:`Additional restriction` option to
show only unread Historical Fiction books. To learn about saved searches, see
:ref:`saved_searches`.

View File

@ -15,7 +15,8 @@ def classes(classes):
class E1843(BasicNewsRecipe):
title = '1843'
__author__ = 'Kovid Goyal'
language = 'en'
description = 'The ideas, culture and lifestyle magazine from The Economist'
language = 'en_GB'
no_stylesheets = True
remove_javascript = True
oldest_article = 365

View File

@ -13,7 +13,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'20 Minutos new'
title = u'20 Minutos'
publisher = u'Grupo 20 Minutos'
__author__ = 'Luis Hernandez'

View File

@ -1,65 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.20minutos.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
class t20Minutos(BasicNewsRecipe):
title = '20 Minutos'
__author__ = 'Darko Miletic'
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas' # noqa
publisher = '20 Minutos Online SL'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = True
language = 'es'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [dict(attrs={'class': 'mf-viral'})]
remove_attributes = ['border']
feeds = [
(u'Principal', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss'),
(u'Cine', u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss'),
(u'Internacional', u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss'),
(u'Deportes', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss'),
(u'Nacional', u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss'),
(u'Economia', u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss'),
(u'Tecnologia', u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img', alt=False):
item['alt'] = 'image'
return soup

View File

@ -1,43 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class ZiveRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'Abelturd'
language = 'sk'
version = 1
title = u'ZIVE.sk'
publisher = u''
category = u'News, Newspaper'
description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
encoding = 'UTF-8'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
feeds = []
feeds.append((u'V\u0161etky \u010dl\xe1nky',
u'http://www.zive.sk/rss/sc-47/default.aspx'))
preprocess_regexps = [
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL | re.IGNORECASE),
lambda match: ''),
]
remove_tags = []
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={
'class': 'arlist-data-info-author'}), dict(name='div', attrs={'class': 'bbtext font-resizer-area'}), ]
extra_css = '''
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
'''

View File

@ -9,47 +9,53 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
class ABCNews(BasicNewsRecipe):
title = 'ABC News'
__author__ = 'Pat Stapleton, Dean Cording'
description = 'News from Australia'
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
__author__ = 'Pat Stapleton, Dean Cording, James Cridland'
description = 'From the Australian Broadcasting Corporation. The ABC is owned and funded by the Australian Government, but is editorially independent.'
masthead_url = 'https://www.abc.net.au/cm/lb/8212706/data/news-logo-2017---desktop-print-data.png'
cover_url = 'https://www.abc.net.au/news/linkableblob/8413676/data/abc-news-og-data.jpg'
cover_margins = (0,20,'#000000')
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
handle_gzip = True
no_stylesheets = True
use_embedded_content = False
scale_news_images_to_device = True
encoding = 'utf8'
publisher = 'ABC News'
category = 'News, Australia, World'
category = 'Australia,News'
language = 'en_AU'
publication_type = 'newsportal'
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
# Remove annoying map links (inline-caption class is also used for some
# image captions! hence regex to match maps.google)
publication_type = 'newspaper'
extra_css = '.byline{font-size:smaller;margin-bottom:10px;}.inline-caption{display:block;font-size:smaller;text-decoration: none;}'
preprocess_regexps = [(re.compile(
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')] # Remove map links
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
}
keep_only_tags = [dict(attrs={'class': ['article section']})]
remove_tags = [dict(attrs={'class': ['related', 'tags', 'tools', 'attached-content ready',
'inline-content story left', 'inline-content map left contracted', 'published',
remove_tags = [dict(attrs={'class': ['inner', 'attached-content', 'authorpromo', 'related', 'tags', 'tools', 'attached-content ready',
'inline-content story left', 'inline-content map left contracted',
'inline-content full embedYouTube embedded', 'published',
'story-map', 'statepromo', 'topics', ]})]
# inner = key points
# attached-content = related stories
# authorpromo = "Contact NameOfJournalist"
remove_attributes = ['width', 'height']
feeds = [
('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
('Science and Technology',
'http://www.abc.net.au/news/feed/2298/rss.xml'),
]
('Top Stories', 'https://www.abc.net.au/news/feed/45910/rss.xml'),
('Politics', 'https://www.abc.net.au/news/feed/51120/rss.xml'),
('World', 'https://www.abc.net.au/news/feed/6497190/rss.xml'),
('Business', 'https://www.abc.net.au/news/feed/51892/rss.xml'),
('Analysis', 'https://www.abc.net.au/news/feed/7571224/rss.xml'),
('Sport', 'https://www.abc.net.au/news/feed/2942460/rss.xml'),
('Adelaide', 'https://www.abc.net.au/news/feed/8057540/rss.xml'),
('Brisbane', 'https://www.abc.net.au/news/feed/12858/rss.xml'),
('Canberra', 'https://www.abc.net.au/news/feed/6910/rss.xml'),
('Darwin', 'https://www.abc.net.au/news/feed/8057648/rss.xml'),
('Hobart', 'https://www.abc.net.au/news/feed/8054562/rss.xml'),
('Melbourne', 'https://www.abc.net.au/news/feed/21708/rss.xml'),
('Sydney', 'https://www.abc.net.au/news/feed/10232/rss.xml'),
('Perth', 'https://www.abc.net.au/news/feed/24886/rss.xml'),
]

View File

@ -1,26 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
title = u'Ads of the World'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = False
description = 'The best international advertising campaigns'
language = 'en'
__author__ = 'faber1971'
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'id': 'primary'})
]
remove_tags = [
dict(name='ul', attrs={'class': 'links inline'}), dict(name='div', attrs={'class': 'form-item'}), dict(
name='div', attrs={'id': ['options', 'comments']}), dict(name='ul', attrs={'id': 'nodePager'})
]
reverse_article_order = True
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
feeds = [
(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]

View File

@ -1,40 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Dean Cording'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessSpectator(BasicNewsRecipe):
title = 'Business Spectator'
__author__ = 'Dean Cording'
description = 'Australian Business News & commentary delivered the way you want it.'
masthead_url = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
cover_url = masthead_url
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = True
use_embedded_content = False
encoding = 'utf8'
publisher = 'Business Spectator'
category = 'News, Australia, Business'
language = 'en_AU'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
}
feeds = [
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
('Stephen Bartholomeusz',
'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
]

View File

@ -1,30 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
import datetime
class Politics(BasicNewsRecipe):
title = u'Courier Mail'
title = u'The Courier-Mail'
description = 'Breaking news headlines for Brisbane and Queensland, Australia. The Courier-Mail is owned by News Corp Australia.'
language = 'en_AU'
__author__ = 'Krittika Goyal'
__author__ = 'Krittika Goyal, James Cridland'
oldest_article = 3 # days
max_articles_per_feed = 20
use_embedded_content = False
d = datetime.datetime.today()
cover_url='http://mfeeds.news.com.au/smedia/NCCOURIER/NCCM_1_' + d.strftime('%Y_%m_%d') + '_thumb_big.jpg'
masthead_url='https://couriermail.digitaleditions.com.au/images/couriermail-logo.jpg'
no_stylesheets = True
auto_cleanup = True
handle_gzip = True
feeds = [
('Top Stories',
'http://feeds.news.com.au/public/rss/2.0/bcm_top_stories_257.xml'),
('Breaking News',
'http://feeds.news.com.au/public/rss/2.0/bcm_breaking_news_67.xml'),
('Queensland News',
'http://feeds.news.com.au/public/rss/2.0/bcm_queensland_news_70.xml'),
('Technology News',
'http://feeds.news.com.au/public/rss/2.0/bcm_technology_news_66.xml'),
('Entertainment News',
'http://feeds.news.com.au/public/rss/2.0/bcm_entertainment_news_256.xml'),
('Business News',
'http://feeds.news.com.au/public/rss/2.0/bcm_business_news_64.xml'),
('Sport News',
'http://feeds.news.com.au/public/rss/2.0/bcm_sports_news_65.xml'),
('Top Stories', 'http://www.couriermail.com.au/rss'),
('Breaking', 'https://www.couriermail.com.au/news/breaking-news/rss'),
('Queensland', 'https://www.couriermail.com.au/news/queensland/rss'),
('Technology', 'https://www.couriermail.com.au/technology/rss'),
('Entertainment', 'https://www.couriermail.com.au/entertainment/rss'),
('Finance','https://www.couriermail.com.au/business/rss'),
('Sport', 'https://www.couriermail.com.au/sport/rss'),
]
# This isn't perfect, but works rather better than it once did. To do - remove links to subscription content.

View File

@ -1,3 +1,9 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
@ -15,10 +21,12 @@ class GlasgowHerald(BasicNewsRecipe):
auto_cleanup = True
feeds = [
(u'News', u'http://www.heraldscotland.com/cmlink/1.758'),
(u'Sport', u'http://www.heraldscotland.com/cmlink/1.761'),
(u'Business', u'http://www.heraldscotland.com/cmlink/1.763'),
(u'Life & Style', u'http://www.heraldscotland.com/cmlink/1.770'),
(u'Arts & Entertainment',
u'http://www.heraldscotland.com/cmlink/1.768',),
(u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
(u'News', u'https://www.heraldscotland.com/news/rss/'),
(u'Sport', u'https://www.heraldscotland.com/sport/rss/'),
(u'Business', u'https://www.heraldscotland.com/business_hq/rss/'),
(u'Lifestyle', u'https://www.heraldscotland.com/life_style/rss/'),
(u'Arts & Entertainment', u'https://www.heraldscotland.com/arts_ents/rss/',),
(u'Politics', u'https://www.heraldscotland.com/politics/rss/'),
(u'Columnists', u'https://www.heraldscotland.com/opinion/columnists/rss/')
]

BIN
recipes/icons/1843.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 661 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 332 B

After

Width:  |  Height:  |  Size: 717 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 512 B

After

Width:  |  Height:  |  Size: 835 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 222 B

After

Width:  |  Height:  |  Size: 479 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 921 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 331 B

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

BIN
recipes/icons/the_age.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 788 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 430 B

View File

@ -2,13 +2,9 @@
import re
from collections import defaultdict
from pprint import pformat
from calibre.utils.date import strptime, utcnow
from calibre.web.feeds.news import BasicNewsRecipe
DT_EPOCH = strptime('1970-01-01', '%Y-%m-%d', assume_utc=True)
DIR_COLLECTIONS = [['world'],
['nation'],
['politics'],
@ -29,84 +25,22 @@ DIR_COLLECTIONS = [['world'],
['travel'],
['fashion']]
SECTIONS=['THE WORLD',
'THE NATION',
'POLITICS',
'OPINION',
'CALIFORNIA',
'OBITUARIES',
'BUSINESS',
'HOLLYWOOD',
'SPORTS',
'ENTERTAINMENT',
'MOVIES',
'TELEVISION',
'BOOKS',
'FOOD',
'HEALTH',
'SCIENCE AND TECHNOLOGY',
'HOME',
'TRAVEL',
'FASHION',
'NEWSLETTERS'
'OTHER']
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
def absurl(url):
if url.startswith('/'):
url = 'http://www.latimes.com' + url
url = 'https://www.latimes.com' + url
return url
def check_words(words):
return lambda x: x and frozenset(words.split()).intersection(x.split())
def what_section(url):
if re.compile(r'^https?://www[.]latimes[.]com/local/obituaries').search(url):
return 'OBITUARIES'
elif re.compile(r'^https?://www[.]latimes[.]com/business/hollywood').search(url):
return 'HOLLYWOOD'
elif re.compile(r'^https?://www[.]latimes[.]com/entertainment/movies').search(url):
return 'MOVIES'
elif re.compile(r'^https?://www[.]latimes[.]com/entertainment/tv').search(url):
return 'TELEVISION'
elif re.compile(r'^https?://www[.]latimes[.]com/business/technology').search(url):
return 'SCIENCE AND TECHNOLOGY'
elif re.compile(r'^https?://www[.]latimes[.]com/world').search(url):
return 'THE WORLD'
elif re.compile(r'^https?://www[.]latimes[.]com/nation').search(url):
return 'THE NATION'
elif re.compile(r'^https?://www[.]latimes[.]com/politics').search(url):
return 'POLITICS'
elif re.compile(r'^https?://www[.]latimes[.]com/opinion').search(url):
return 'OPINION'
elif re.compile(r'^https?://www[.]latimes[.]com/(?:local|style)').search(url):
return 'CALIFORNIA'
elif re.compile(r'^https?://www[.]latimes[.]com/business').search(url):
return 'BUSINESS'
elif re.compile(r'^https?://www[.]latimes[.]com/sports').search(url):
return 'SPORTS'
elif re.compile(r'^https?://www[.]latimes[.]com/entertainment').search(url):
return 'ENTERTAINMENT'
elif re.compile(r'^https?://www[.]latimes[.]com/books').search(url):
return 'BOOKS'
elif re.compile(r'^https?://www[.]latimes[.]com/food').search(url):
return 'FOOD'
elif re.compile(r'^https?://www[.]latimes[.]com/health').search(url):
return 'HEALTH'
elif re.compile(r'^https?://www[.]latimes[.]com/science').search(url):
return 'SCIENCE AND TECHNOLOGY'
elif re.compile(r'^https?://www[.]latimes[.]com/home').search(url):
return 'HOME'
elif re.compile(r'^https?://www[.]latimes[.]com/travel').search(url):
return 'TRAVEL'
elif re.compile(r'^https?://www[.]latimes[.]com/fashion').search(url):
return 'FASHION'
elif re.compile(r'^https?://www[.]latimes[.]com/newsletter').search(url):
return 'NEWSLETTERS'
else:
return 'OTHER'
parts = url.split('/')
return parts[-4].capitalize()
class LATimes(BasicNewsRecipe):
@ -126,32 +60,25 @@ class LATimes(BasicNewsRecipe):
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
keep_only_tags = [
dict(name='header', attrs={'id': 'top'}),
dict(name='article'),
dict(name='div', attrs={'id': 'liveblog-story-wrapper'})
classes('ArticlePage-breadcrumbs ArticlePage-headline ArticlePage-mainContent'),
]
remove_tags= [
dict(name='div', attrs={'class': check_words(
'hidden-tablet hidden-mobile hidden-desktop pb-f-ads-dfp')})
]
remove_tags_after = [
dict(name='div', attrs={'class': check_words('pb-f-article-body')})
classes('ArticlePage-actions Enhancement hidden-tablet hidden-mobile hidden-desktop pb-f-ads-dfp')
]
def parse_index(self):
index = 'http://www.latimes.com/'
pat = r'^(?:https?://www[.]latimes[.]com)?/[^#]+20[0-9]{6}-(?:html)?story[.]html'
index = 'https://www.latimes.com/'
pat = r'^https://www\.latimes\.com/[^/]+?/story/20\d{2}-\d{2}-\d{2}/\S+'
articles = self.find_articles(index, pat)
for collection in DIR_COLLECTIONS:
if self.test:
continue
topdir = collection.pop(0)
index = 'http://www.latimes.com/' + topdir + '/'
pat = r'^(?:https?://www[.]latimes[.]com)?/' + \
topdir + '/[^#]+20[0-9]{6}-(?:html)?story[.]html'
articles += self.find_articles(index, pat)
collection_index = index + topdir + '/'
articles += self.find_articles(collection_index, pat)
for subdir in collection:
sub_index = index + subdir + '/'
sub_index = collection_index + subdir + '/'
articles += self.find_articles(sub_index, pat)
feeds = defaultdict(list)
@ -159,12 +86,7 @@ class LATimes(BasicNewsRecipe):
section = what_section(article['url'])
feeds[section].append(article)
keys = []
for key in SECTIONS:
if key in feeds.keys():
keys.append(key)
self.log(pformat(dict(feeds)))
return [(k, feeds[k]) for k in keys]
return [(k, feeds[k]) for k in sorted(feeds)]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
@ -190,16 +112,6 @@ class LATimes(BasicNewsRecipe):
alinks = [a for a in alinks if len(
a.contents) == 1 and a.find(text=True, recursive=False)]
articles = [
{'title': a.find(text=True), 'url': absurl(a['href'])} for a in alinks]
date_rx = re.compile(
r'^https?://www[.]latimes[.]com/[^#]+-(?P<date>20[0-9]{6})-(?:html)?story[.]html')
for article in articles:
mdate = date_rx.match(article['url'])
if mdate is not None:
try:
article['timestamp'] = (strptime(mdate.group('date'),'%Y%m%d') - DT_EPOCH).total_seconds()
except Exception:
article['timestamp'] = (utcnow() - DT_EPOCH).total_seconds()
article['url'] = mdate.group(0)
{'title': self.tag_to_string(a), 'url': absurl(a['href'])} for a in alinks]
self.log('Found: ', len(articles), ' articles.\n')
return articles

View File

@ -17,7 +17,7 @@ class AListApart (BasicNewsRecipe):
oldest_article = 120
remove_empty_feeds = True
encoding = 'utf8'
cover_url = u'http://alistapart.com/pix/alalogo.gif'
cover_url = u'https://alistapart.com/wp-content/uploads/2019/03/cropped-icon_navigation-laurel-512.jpg'
def get_extra_css(self):
if not self.extra_css:

View File

@ -1,3 +1,8 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
import json
from calibre.web.feeds.news import BasicNewsRecipe
from collections import defaultdict
@ -49,28 +54,23 @@ class Newsweek(BasicNewsRecipe):
a = li.xpath('descendant::a[@href]')[0]
url = href_to_url(a, add_piano=True)
self.timefmt = self.tag_to_string(a)
img = li.xpath('descendant::a[@href]//img[@data-src]')[0]
self.cover_url = img.get('data-src').partition('?')[0]
img = li.xpath('descendant::a[@href]//source[@type="image/jpeg"]/@srcset')[0]
self.cover_url = img.partition('?')[0]
self.log('Found cover url:', self.cover_url)
root = self.index_to_soup(url, as_tree=True)
features = []
try:
div = root.xpath('//div[@class="magazine-features"]')[0]
except IndexError:
pass
else:
for a in div.xpath('descendant::div[@class="h1"]//a[@href]'):
title = self.tag_to_string(a)
article = a.xpath('ancestor::article')[0]
desc = ''
s = article.xpath('descendant::div[@class="summary"]')
if s:
desc = self.tag_to_string(s[0])
features.append({'title': title, 'url': href_to_url(a), 'description': desc})
self.log(title, href_to_url(a))
for article in root.xpath('//div[@class="magazine-features"]//article'):
a = article.xpath('descendant::a[@class="article-link"]')[0]
title = self.tag_to_string(a)
url = href_to_url(a)
desc = ''
s = article.xpath('descendant::div[@class="summary"]')
if s:
desc = self.tag_to_string(s[0])
features.append({'title': title, 'url': href_to_url(a), 'description': desc})
self.log(title, url)
index = []
if features:
index.append(('Features', features))
index = [('Features', features)]
sections = defaultdict(list)
for widget in ('editor-pick',):
self.parse_widget(widget, sections)
@ -79,30 +79,18 @@ class Newsweek(BasicNewsRecipe):
return index
def parse_widget(self, widget, sections):
root = self.index_to_soup('https://d.newsweek.com/widget/' + widget, as_tree=True)
div = root.xpath('//div')[0]
href_xpath = 'descendant::*[local-name()="h1" or local-name()="h2" or local-name()="h3" or local-name()="h4"]/a[@href]'
for a in div.xpath(href_xpath):
title = self.tag_to_string(a)
article = a.xpath('ancestor::article')[0]
desc = ''
s = article.xpath('descendant::div[@class="summary"]')
if s:
desc = self.tag_to_string(s[0])
sec = article.xpath('descendant::div[@class="category"]')
if sec:
sec = self.tag_to_string(sec[0])
else:
sec = 'Articles'
sections[sec].append(
{'title': title, 'url': href_to_url(a), 'description': desc})
self.log(title, href_to_url(a))
if desc:
self.log('\t' + desc)
self.log('')
def print_version(self, url):
return url + '?piano_d=1'
raw = self.index_to_soup('https://d.newsweek.com/json/' + widget, raw=True)
data = json.loads(raw)['items']
for item in data:
title = item['title']
url = BASE + item['link']
self.log(title, url)
sections[item['label']].append(
{
'title': title,
'url': url,
'description': item['description'],
})
def preprocess_html(self, soup):
# Parallax images in the articles are loaded as background images

View File

@ -90,6 +90,7 @@ class NewYorkTimes(BasicNewsRecipe):
compress_news_images = True
compress_news_images_auto_size = 5
remove_attributes = ['style']
conversion_options = {'flow_size': 0}
remove_tags = [
dict(attrs={'aria-label':'tools'.split()}),
@ -266,14 +267,19 @@ class NewYorkTimes(BasicNewsRecipe):
if article.get('description'):
self.log('\t\t', article['description'])
container = soup.find(itemtype='http://schema.org/CollectionPage')
container.find('header').extract()
div = container.find('div')
for section in div.findAll('section'):
for ol in section.findAll('ol'):
for article in self.parse_article_group(ol):
log(article)
yield article
cid = slug.split('/')[-1]
if cid == 'dining':
cid = 'food'
try:
container = soup.find(id='collection-{}'.format(cid)).find('section')
except AttributeError:
container = None
if container is None:
raise ValueError('Failed to find articles container for slug: {}'.format(slug))
for ol in container.findAll('ol'):
for article in self.parse_article_group(ol):
log(article)
yield article
def parse_web_sections(self):
self.read_nyt_metadata()

View File

@ -90,6 +90,7 @@ class NewYorkTimes(BasicNewsRecipe):
compress_news_images = True
compress_news_images_auto_size = 5
remove_attributes = ['style']
conversion_options = {'flow_size': 0}
remove_tags = [
dict(attrs={'aria-label':'tools'.split()}),
@ -266,14 +267,19 @@ class NewYorkTimes(BasicNewsRecipe):
if article.get('description'):
self.log('\t\t', article['description'])
container = soup.find(itemtype='http://schema.org/CollectionPage')
container.find('header').extract()
div = container.find('div')
for section in div.findAll('section'):
for ol in section.findAll('ol'):
for article in self.parse_article_group(ol):
log(article)
yield article
cid = slug.split('/')[-1]
if cid == 'dining':
cid = 'food'
try:
container = soup.find(id='collection-{}'.format(cid)).find('section')
except AttributeError:
container = None
if container is None:
raise ValueError('Failed to find articles container for slug: {}'.format(slug))
for ol in container.findAll('ol'):
for article in self.parse_article_group(ol):
log(article)
yield article
def parse_web_sections(self):
self.read_nyt_metadata()

View File

@ -1,72 +0,0 @@
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class AdjectiveSpecies(BasicNewsRecipe):
title = u'Российская Газета'
__author__ = 'bug_me_not'
cover_url = u'http://img.rg.ru/img/d/logo2012.png'
description = 'Российская Газета'
publisher = 'Правительство Российской Федерации'
category = 'news'
language = 'ru'
no_stylesheets = True
remove_javascript = True
oldest_article = 300
max_articles_per_feed = 100
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'ar-citate'})
remove_tags = [dict(name='div', attrs={'class': 'insert_left'}),
dict(name='a', attrs={'href': '#comments'}),
dict(name='div', attrs={'class': 'clear'}),
dict(name='div', attrs={'class': 'ar-citate'}),
dict(name='div', attrs={'class': 'ar-social red'}),
dict(name='div', attrs={'class': 'clear clear-head'}), ]
feeds = [
(u'Все материалы', u'http://www.rg.ru/tema/rss.xml'),
(u'Еженедельный выпуск',
u'http://www.rg.ru/tema/izd-subbota/rss.xml'),
(u'Государство',
u'http://www.rg.ru/tema/gos/rss.xml'),
(u'Экономика',
u'http://www.rg.ru/tema/ekonomika/rss.xml'),
(u'Бизнес',
u'http://www.rg.ru/tema/izd-biznes/rss.xml'),
(u'В мире', u'http://www.rg.ru/tema/mir/rss.xml'),
(u'Происшествия',
u'http://www.rg.ru/tema/bezopasnost/rss.xml'),
(u'Общество',
u'http://www.rg.ru/tema/obshestvo/rss.xml'),
(u'Культура',
u'http://www.rg.ru/tema/kultura/rss.xml'),
(u'Спорт', u'http://www.rg.ru/tema/sport/rss.xml'),
(u'Документы', u'http://rg.ru/tema/doc-any/rss.xml'),
(u'РГ: Башкортостан',
u'http://www.rg.ru/org/filial/bashkortostan/rss.xml'),
(u'РГ: Волга-Кама',
u'http://www.rg.ru/org/filial/volga-kama/rss.xml'),
(u'РГ: Восточная Сибирь',
u'http://www.rg.ru/org/filial/enisey/rss.xml'),
(u'РГ: Дальний Восток',
u'http://www.rg.ru/org/filial/dvostok/rss.xml'),
(u'РГ: Кубань. Северный Кавказ',
u'http://www.rg.ru/org/filial/kuban/rss.xml'),
(u'РГ: Пермский край',
u'http://www.rg.ru/org/filial/permkray/rss.xml'),
(u'РГ: Приволжье',
u'http://www.rg.ru/org/filial/privolzhe/rss.xml'),
(u'РГ: Северо-Запад',
u'http://www.rg.ru/org/filial/szapad/rss.xml'),
(u'РГ: Сибирь',
u'http://www.rg.ru/org/filial/sibir/rss.xml'),
(u'РГ: Средняя Волга',
u'http://www.rg.ru/org/filial/svolga/rss.xml'),
(u'РГ: Урал и Западная Сибирь',
u'http://www.rg.ru/org/filial/ural/rss.xml'),
(u'РГ: Центральная Россия',
u'http://www.rg.ru/org/filial/roscentr/rss.xml'),
(u'РГ: Юг России',
u'http://www.rg.ru/org/filial/jugrossii/rss.xml'),
]

View File

@ -0,0 +1,51 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
'''
https://www.spectator.com.au/
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class SpectatorAU(BasicNewsRecipe):
title = 'Spectator Australia'
__author__ = 'Pat Stapleton, Dean Cording, James Cridland'
description = 'Spectator Australia is an Australian edition of The Spectator, first published in the UK in July 1828.'
masthead_url = 'https://www.spectator.com.au/content/themes/spectator-australia/assets/images/spec-aus-logo.png'
cover_url = 'https://spectator.imgix.net/content/uploads/2015/10/Spectator-Australia-Logo.jpg'
oldest_article = 7
handle_gzip = True
no_stylesheets = True
use_embedded_content = False
scale_news_images_to_device = True
encoding = 'utf8'
publisher = 'Spectator Australia'
category = 'Australia,News'
language = 'en_AU'
publication_type = 'newspaper'
extra_css = '.article-header__author{margin-bottom:20px;}'
conversion_options = {
'comments': description,
'tags': category,
'language': language,
'publisher': publisher,
'linearize_tables': False
}
keep_only_tags = [dict(attrs={'class': ['article']})]
remove_tags = [
dict(
attrs={
'class': [
'big-author', 'article-header__category', 'margin-menu',
'related-stories', 'disqus_thread', 'middle-promo',
'show-comments', 'article-tags'
]
}
),
dict(name=['h4', 'hr'])
]
remove_attributes = ['width', 'height']
feeds = [
('Spectator Australia', 'https://www.spectator.com.au/feed/'),
]

View File

@ -1,10 +1,19 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import re
from mechanize import Request
from calibre.web.feeds.recipes import BasicNewsRecipe
def class_sel(cls):
def f(x):
return x and cls in x.split()
return f
def absolutize(url):
return 'https://spectator.co.uk' + url
class Spectator(BasicNewsRecipe):
@ -15,52 +24,100 @@ class Spectator(BasicNewsRecipe):
language = 'en'
no_stylesheets = True
keep_only_tags = dict(name='div', attrs={
'class': ['article-header__text', 'featured-image', 'article-content']})
remove_tags = [
dict(name='div', attrs={'id': ['disqus_thread']}),
dict(attrs={'class': ['middle-promo',
'sharing', 'mejs-player-holder']}),
dict(name='a', onclick=lambda x: x and '__gaTracker' in x and 'outbound-article' in x),
]
remove_tags_after = [
dict(name='hr', attrs={'class': 'sticky-clear'}),
]
def parse_spec_section(self, div):
h2 = div.find('h2')
sectitle = self.tag_to_string(h2)
self.log('Section:', sectitle)
articles = []
for div in div.findAll('div', id=lambda x: x and x.startswith('post-')):
h2 = div.find('h2', attrs={'class': class_sel('term-item__title')})
if h2 is None:
h2 = div.find(attrs={'class': class_sel('news-listing__title')})
title = self.tag_to_string(h2)
a = h2.find('a')
url = a['href']
desc = ''
self.log('\tArticle:', title)
p = div.find(attrs={'class': class_sel('term-item__excerpt')})
if p is not None:
desc = self.tag_to_string(p)
articles.append({'title': title, 'url': url, 'description': desc})
return sectitle, articles
use_embedded_content = True
def parse_index(self):
soup = self.index_to_soup('https://www.spectator.co.uk/magazine/')
a = soup.find('a', attrs={'class': 'issue-details__cover-link'})
self.timefmt = ' [%s]' % a['title']
self.cover_url = a['href']
if self.cover_url.startswith('//'):
self.cover_url = 'http:' + self.cover_url
br = self.get_browser()
main_js = br.open_novisit('https://spectator.co.uk/main.js').read().decode('utf-8')
data = {}
fields = ('apiKey', 'apiSecret', 'contentEnvironment', 'siteUrl', 'magazineIssueContentUrl', 'contentUrl')
pat = r'this.({})\s*=\s*"(.+?)"'.format('|'.join(fields))
for m in re.finditer(pat, main_js):
data[m.group(1)] = m.group(2)
self.log('Got Spectator data:', data)
headers = {
'api_key': data['apiKey'],
'origin': data['siteUrl'],
'access_token': data['apiSecret'],
'Accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
'Accept-encoding': 'gzip, deflate',
'Accept': '*/*',
}
feeds = []
def make_url(utype, query, includes=(), limit=None):
ans = data[utype] + '/entries?environment=' + data['contentEnvironment']
if limit is not None:
ans += '&limit={}'.format(limit)
for inc in includes:
ans += '&include[]=' + inc
ans += '&query=' + json.dumps(query)
return ans
div = soup.find(attrs={'class': class_sel('content-area')})
for x in div.findAll(attrs={'class': class_sel('magazine-section-holder')}):
title, articles = self.parse_spec_section(x)
if articles:
feeds.append((title, articles))
return feeds
def get_result(url):
self.log('Fetching:', url)
req = Request(url, headers=headers)
raw = br.open_novisit(req).read().decode('utf-8')
return json.loads(raw)['entries']
# Get current issue
url = data['magazineIssueContentUrl'] + '/entries?environment=' + data['contentEnvironment'] + "&desc=issue_date&limit=1&only[BASE][]=url"
result = get_result(url)
slug = result[0]['url']
uid = result[0]['uid'] # noqa
date = slug.split('/')[-1]
self.log('Downloading issue:', date)
# Cover information
url = make_url(
'magazineIssueContentUrl',
{'url': slug},
limit=1
)
self.cover_url = get_result(url)[0]['magazine_cover']['url']
self.log('Found cover:', self.cover_url)
# List of articles
url = make_url(
'contentUrl',
{
"magazine_content_production_only.magazine_issue": {
"$in_query": {"url": slug},
"_content_type_uid": "magazine_issue"
},
"_content_type_uid": "article"
},
includes=(
'topic', 'magazine_content_production_only.magazine_issue',
'magazine_content_production_only.magazine_subsection', 'author'
)
)
result = get_result(url)
articles = {}
for entry in result:
title = entry['title']
url = absolutize(entry['url'])
blocks = []
a = blocks.append
byline = entry.get('byline') or ''
if byline:
a('<h3>{}</h3>'.format(byline))
if entry.get('author'):
for au in reversed(entry['author']):
au = entry['author'][0]
cac = ''
if au.get('caricature'):
cac = '<img src="{}">'.format(au['caricature']['url'])
a('<div>{} <a href="{}>{}</a></div>'.format(cac, absolutize(au['url']), au['title']))
if entry.get('hero_image'):
hi = entry['hero_image'][0]
a('<div style="text-align: center"><img src="{}"></div>'.format(hi['url']))
if hi.get('description'):
a('<div style="text-align: center; font-size: smaller">{}</div>'.format(hi['description']))
a(entry['text_body'])
section = 'Unknown'
if entry.get('topic'):
topic = entry['topic'][0]
section = topic['title']
articles.setdefault(section, []).append({
'title': title, 'url': url, 'description': byline, 'content': '\n\n'.join(blocks)})
return [(sec, articles[sec]) for sec in sorted(articles)]

View File

@ -15,7 +15,7 @@ class TheBaffler(BasicNewsRecipe):
__author__ = 'Jose Ortiz'
description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
' stories, poems and art. They publish six print issues annually.')
language = 'en_US'
language = 'en'
encoding = 'UTF-8'
no_javascript = True
no_stylesheets = True

View File

@ -1,207 +0,0 @@
#!/usr/bin/env python2
u'''
Ведомости
'''
from calibre.web.feeds.feedparser import parse
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VedomostiRecipe(BasicNewsRecipe):
title = u'Ведомости'
__author__ = 'Nikolai Kotchetkov'
publisher = 'vedomosti.ru'
category = 'press, Russia'
description = u'Ежедневная деловая газета'
oldest_article = 3
max_articles_per_feed = 100
masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
# Add feed names if you want them to be sorted (feeds of this list appear
# first)
sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
encoding = 'cp1251'
language = 'ru'
no_stylesheets = True
remove_javascript = True
recursions = 0
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [dict(name='td', attrs={'class': ['second_content']})]
remove_tags_after = [dict(name='div', attrs={'class': 'article_text'})]
remove_tags = [
dict(name='div', attrs={'class': ['sep', 'choice', 'articleRightTbl']})]
feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
# base URL for relative links
base_url = u'http://www.vedomosti.ru'
extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
'.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
'.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
'.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
'.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
'.article_desc {font-size: 1em; font-style:italic;}'
def parse_index(self):
try:
feedData = parse(self.feeds[0])
if not feedData:
raise NotImplementedError
self.log("parse_index: Feed loaded successfully.")
try:
if feedData.feed.title:
self.title = feedData.feed.title
self.log("parse_index: Title updated to: ", self.title)
except Exception:
pass
try:
if feedData.feed.description:
self.description = feedData.feed.description
self.log("parse_index: Description updated to: ",
self.description)
except Exception:
pass
def get_virtual_feed_articles(feed):
if feed in feeds:
return feeds[feed][1]
self.log("Adding new feed: ", feed)
articles = []
feeds[feed] = (feed, articles)
return articles
feeds = {}
# Iterate feed items and distribute articles using tags
for item in feedData.entries:
link = item.get('link', '')
title = item.get('title', '')
if '' == link or '' == title:
continue
article = {'title': title, 'url': link, 'description': item.get(
'description', ''), 'date': item.get('date', ''), 'content': ''}
if not item.get('tags'): # noqa
get_virtual_feed_articles('_default').append(article)
continue
for tag in item.tags:
addedToDefault = False
term = tag.get('term', '')
if '' == term:
if (not addedToDefault):
get_virtual_feed_articles(
'_default').append(article)
continue
get_virtual_feed_articles(term).append(article)
# Get feed list
# Select sorted feeds first of all
result = []
for feedName in self.sortOrder:
if (not feeds.get(feedName)):
continue
result.append(feeds[feedName])
del feeds[feedName]
result = result + feeds.values()
return result
except Exception as err:
self.log(err)
raise NotImplementedError
def preprocess_html(self, soup):
return self.adeify_images(soup)
def postprocess_html(self, soup, first_fetch):
# Find article
contents = soup.find('div', {'class': ['article_text']})
if not contents:
self.log('postprocess_html: article div not found!')
return soup
contents.extract()
# Find title
title = soup.find('h1')
if title:
contents.insert(0, title)
# Find article image
newstop = soup.find('div', {'class': ['newstop']})
if newstop:
img = newstop.find('img')
if img:
imgDiv = new_tag(soup, 'div')
imgDiv['class'] = 'article_img'
if img.get('width'):
del(img['width'])
if img.get('height'):
del(img['height'])
# find description
element = img.parent.nextSibling
img.extract()
imgDiv.insert(0, img)
while element:
if not isinstance(element, Tag):
continue
nextElement = element.nextSibling
if 'p' == element.name:
element.extract()
element['class'] = 'article_img_desc'
imgDiv.insert(len(imgDiv.contents), element)
element = nextElement
contents.insert(1, imgDiv)
# find article abstract
abstract = soup.find('p', {'class': ['subhead']})
if abstract:
abstract['class'] = 'article_desc'
contents.insert(2, abstract)
# Find article authors
authorsDiv = soup.find('div', {'class': ['autors']})
if authorsDiv:
authorsP = authorsDiv.find('p')
if authorsP:
authorsP['class'] = 'article_authors'
contents.insert(len(contents.contents), authorsP)
# Fix urls that use relative path
urls = contents.findAll('a', href=True)
if urls:
for url in urls:
if '/' == url['href'][0]:
url['href'] = self.base_url + url['href']
body = soup.find('td', {'class': ['second_content']})
if body:
body.replaceWith(contents)
self.log('Result: ', soup.prettify())
return soup

View File

@ -4,6 +4,7 @@ __copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
www.wired.com
'''
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
@ -80,3 +81,17 @@ class WiredDailyNews(BasicNewsRecipe):
articles.extend(self.parse_wired_index_page(baseurl.format(pagenum), seen))
return [('Magazine Articles', articles)]
# Wired changes the content it delivers based on cookies, so the
# following ensures that we send no cookies
def get_browser(self, *args, **kwargs):
return self
def clone_browser(self, *args, **kwargs):
return self.get_browser()
def open_novisit(self, *args, **kwargs):
br = browser()
return br.open_novisit(*args, **kwargs)
open = open_novisit

View File

@ -4,6 +4,7 @@ __copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
www.wired.com
'''
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
@ -66,3 +67,17 @@ class WiredDailyNews(BasicNewsRecipe):
def get_article_url(self, article):
return article.get('link', None)
# Wired changes the content it delivers based on cookies, so the
# following ensures that we send no cookies
def get_browser(self, *args, **kwargs):
return self
def clone_browser(self, *args, **kwargs):
return self.get_browser()
def open_novisit(self, *args, **kwargs):
br = browser()
return br.open_novisit(*args, **kwargs)
open = open_novisit

View File

@ -40,7 +40,7 @@
/* blocks */
html, div, map, dt, isindex, form {
div, map, dt, isindex, form {
display: block;
}

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, json, subprocess, errno, hashlib
import os, json, subprocess, errno, hashlib
from setup import Command, build_cache_dir, edit_file, dump_json
@ -82,10 +82,8 @@ class Check(Command):
p = subprocess.Popen(['rapydscript', 'lint', f])
return p.wait() != 0
if ext == '.yaml':
sys.path.insert(0, self.wn_path)
import whats_new
whats_new.render_changelog(self.j(self.d(self.SRC), 'Changelog.yaml'))
sys.path.remove(self.wn_path)
p = subprocess.Popen(['python', self.j(self.wn_path, 'whats_new.py'), f])
return p.wait() != 0
def run(self, opts):
self.fhash_cache = {}

View File

@ -102,7 +102,7 @@ class SourceForge(Base): # {{{
for i in range(5):
try:
check_call([
'rsync', '-h', '-z', '--progress', '-e', 'ssh -x', x,
'rsync', '-h', '-zz', '--progress', '-e', 'ssh -x', x,
'%s,%s@frs.sourceforge.net:%s' %
(self.username, self.project, self.rdir + '/')
])

View File

@ -10,7 +10,6 @@ import bz2
import errno
import glob
import gzip
import HTMLParser
import io
import json
import os
@ -22,8 +21,6 @@ import subprocess
import sys
import tempfile
import time
import urllib2
import urlparse
import zipfile
import zlib
from collections import namedtuple
@ -33,6 +30,24 @@ from email.utils import parsedate
from functools import partial
from multiprocessing.pool import ThreadPool
from xml.sax.saxutils import escape, quoteattr
try:
from html import unescape as u
except ImportError:
from HTMLParser import HTMLParser
u = HTMLParser().unescape
try:
from urllib.parse import parse_qs, urlparse
except ImportError:
from urlparse import parse_qs, urlparse
try:
from urllib.error import URLError
from urllib.request import urlopen, Request, build_opener
except Exception:
from urllib2 import urlopen, Request, build_opener, URLError
# }}}
USER_AGENT = 'calibre mirror'
@ -44,15 +59,13 @@ INDEX = MR_URL + 'showpost.php?p=1362767&postcount=1'
# INDEX = 'file:///t/raw.html'
IndexEntry = namedtuple('IndexEntry', 'name url donate history uninstall deprecated thread_id')
u = HTMLParser.HTMLParser().unescape
socket.setdefaulttimeout(30)
def read(url, get_info=False): # {{{
if url.startswith("file://"):
return urllib2.urlopen(url).read()
opener = urllib2.build_opener()
return urlopen(url).read()
opener = build_opener()
opener.addheaders = [
('User-Agent', USER_AGENT),
('Accept-Encoding', 'gzip,deflate'),
@ -62,7 +75,7 @@ def read(url, get_info=False): # {{{
try:
res = opener.open(url)
break
except urllib2.URLError as e:
except URLError as e:
if not isinstance(e.reason, socket.timeout) or i == 9:
raise
time.sleep(random.randint(10, 45))
@ -82,7 +95,7 @@ def read(url, get_info=False): # {{{
def url_to_plugin_id(url, deprecated):
query = urlparse.parse_qs(urlparse.urlparse(url).query)
query = parse_qs(urlparse(url).query)
ans = (query['t'] if 't' in query else query['p'])[0]
if deprecated:
ans += '-deprecated'
@ -149,11 +162,13 @@ def convert_node(fields, x, names={}, import_data=None):
return x.s.decode('utf-8') if isinstance(x.s, bytes) else x.s
elif name == 'Num':
return x.n
elif name == 'Constant':
return x.value
elif name in {'Set', 'List', 'Tuple'}:
func = {'Set':set, 'List':list, 'Tuple':tuple}[name]
return func(map(conv, x.elts))
return func(list(map(conv, x.elts)))
elif name == 'Dict':
keys, values = map(conv, x.keys), map(conv, x.values)
keys, values = list(map(conv, x.keys)), list(map(conv, x.values))
return dict(zip(keys, values))
elif name == 'Call':
if len(x.args) != 1 and len(x.keywords) != 0:
@ -182,7 +197,7 @@ def get_import_data(name, mod, zf, names):
if mod in names:
raw = zf.open(names[mod]).read()
module = ast.parse(raw, filename='__init__.py')
top_level_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(module))
top_level_assigments = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'Assign']
for node in top_level_assigments:
targets = {getattr(t, 'id', None) for t in node.targets}
targets.discard(None)
@ -196,9 +211,9 @@ def get_import_data(name, mod, zf, names):
def parse_metadata(raw, namelist, zf):
module = ast.parse(raw, filename='__init__.py')
top_level_imports = filter(lambda x:x.__class__.__name__ == 'ImportFrom', ast.iter_child_nodes(module))
top_level_classes = tuple(filter(lambda x:x.__class__.__name__ == 'ClassDef', ast.iter_child_nodes(module)))
top_level_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(module))
top_level_imports = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'ImportFrom']
top_level_classes = tuple(x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'ClassDef')
top_level_assigments = [x for x in ast.iter_child_nodes(module) if x.__class__.__name__ == 'Assign']
defaults = {
'name':'', 'description':'',
'supported_platforms':['windows', 'osx', 'linux'],
@ -226,7 +241,7 @@ def parse_metadata(raw, namelist, zf):
plugin_import_found |= inames
else:
all_imports.append((mod, [n.name for n in names]))
imported_names[n.asname or n.name] = mod
imported_names[names[-1].asname or names[-1].name] = mod
if not plugin_import_found:
return all_imports
@ -245,7 +260,7 @@ def parse_metadata(raw, namelist, zf):
names[x] = val
def parse_class(node):
class_assigments = filter(lambda x:x.__class__.__name__ == 'Assign', ast.iter_child_nodes(node))
class_assigments = [x for x in ast.iter_child_nodes(node) if x.__class__.__name__ == 'Assign']
found = {}
for node in class_assigments:
targets = {getattr(t, 'id', None) for t in node.targets}
@ -337,7 +352,7 @@ def update_plugin_from_entry(plugin, entry):
def fetch_plugin(old_index, entry):
lm_map = {plugin['thread_id']:plugin for plugin in old_index.values()}
raw = read(entry.url)
raw = read(entry.url).decode('utf-8', 'replace')
url, name = parse_plugin_zip_url(raw)
if url is None:
raise ValueError('Failed to find zip file URL for entry: %s' % repr(entry))
@ -346,9 +361,9 @@ def fetch_plugin(old_index, entry):
if plugin is not None:
# Previously downloaded plugin
lm = datetime(*tuple(map(int, re.split(r'\D', plugin['last_modified'])))[:6])
request = urllib2.Request(url)
request = Request(url)
request.get_method = lambda : 'HEAD'
with closing(urllib2.urlopen(request)) as response:
with closing(urlopen(request)) as response:
info = response.info()
slm = datetime(*parsedate(info.get('Last-Modified'))[:6])
if lm >= slm:
@ -413,7 +428,7 @@ def fetch_plugins(old_index):
src = plugin['file']
plugin['file'] = src.partition('_')[-1]
os.rename(src, plugin['file'])
raw = bz2.compress(json.dumps(ans, sort_keys=True, indent=4, separators=(',', ': ')))
raw = bz2.compress(json.dumps(ans, sort_keys=True, indent=4, separators=(',', ': ')).encode('utf-8'))
atomic_write(raw, PLUGINS)
# Cleanup any extra .zip files
all_plugin_files = {p['file'] for p in ans.values()}
@ -503,7 +518,7 @@ h1 { text-align: center }
name, count = x
return '<tr><td>%s</td><td>%s</td></tr>\n' % (escape(name), count)
pstats = map(plugin_stats, sorted(stats.items(), reverse=True, key=lambda x:x[1]))
pstats = list(map(plugin_stats, sorted(stats.items(), reverse=True, key=lambda x:x[1])))
stats = '''\
<!DOCTYPE html>
<html>

View File

@ -100,8 +100,12 @@ class POT(Command): # {{{
root = json.load(f)
entries = root['639-3']
ans = []
for x in sorted(entries, key=lambda x:(x.get('name') or '').lower()):
name = x.get('name')
def name_getter(x):
return x.get('inverted_name') or x.get('name')
for x in sorted(entries, key=lambda x:name_getter(x).lower()):
name = name_getter(x)
if name:
ans.append(u'msgid "{}"'.format(name))
ans.append('msgstr ""')
@ -849,7 +853,7 @@ class ISO639(Command): # {{{
threeb = unicode_type(threeb)
if threeb is None:
continue
name = x.get('name')
name = x.get('inverted_name') or x.get('name')
if name:
name = unicode_type(name)
if not name or name[0] in '!~=/\'"':

View File

@ -123,7 +123,7 @@ def get_fosshub_data():
def send_data(loc):
subprocess.check_call([
'rsync', '--inplace', '--delete', '-r', '-z', '-h', '--progress', '-e',
'rsync', '--inplace', '--delete', '-r', '-zz', '-h', '--progress', '-e',
'ssh -x', loc + '/', '%s@%s:%s' % (STAGING_USER, STAGING_HOST, STAGING_DIR)
])

View File

@ -6,7 +6,7 @@ from polyglot.builtins import map, unicode_type, environ_item, hasenv, getenv, a
import sys, locale, codecs, os, importlib, collections
__appname__ = 'calibre'
numeric_version = (4, 10, 1)
numeric_version = (4, 12, 0)
__version__ = '.'.join(map(unicode_type, numeric_version))
git_version = None
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -9,7 +9,7 @@ from calibre import guess_type
from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase)
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata
from calibre.ebooks.metadata.archive import ArchiveExtract, KPFExtract, get_comic_metadata
from calibre.ebooks.html.to_zip import HTML2ZIP
plugins = []
@ -124,7 +124,7 @@ class TXT2TXTZ(FileTypePlugin):
return path_to_ebook
plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,]
plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract]
# }}}
# Metadata reader plugins {{{
@ -1738,15 +1738,6 @@ class StoreNextoStore(StoreBase):
affiliate = True
class StoreOpenBooksStore(StoreBase):
name = 'Open Books'
description = 'Comprehensive listing of DRM free e-books from a variety of sources provided by users of calibre.'
actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore'
drm_free_only = True
headquarters = 'US'
class StoreOzonRUStore(StoreBase):
name = 'OZON.ru'
description = 'e-books from OZON.ru'
@ -1910,7 +1901,6 @@ plugins += [
StoreMillsBoonUKStore,
StoreMobileReadStore,
StoreNextoStore,
StoreOpenBooksStore,
StoreOzonRUStore,
StorePragmaticBookshelfStore,
StorePublioStore,

View File

@ -26,7 +26,7 @@ from calibre.db.tables import VirtualTable
from calibre.db.write import get_series_values, uniq
from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata
from calibre.ebooks import check_ebook_format
from calibre.ebooks.metadata import string_to_authors, author_to_author_sort
from calibre.ebooks.metadata import string_to_authors, author_to_author_sort, authors_to_sort_string
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
@ -1297,6 +1297,7 @@ class Cache(object):
if set_title and mi.title:
path_changed = True
set_field('title', mi.title)
authors_changed = False
if set_authors:
path_changed = True
if not mi.authors:
@ -1305,6 +1306,7 @@ class Cache(object):
for a in mi.authors:
authors += string_to_authors(a)
set_field('authors', authors)
authors_changed = True
if path_changed:
self._update_path({book_id})
@ -1339,7 +1341,13 @@ class Cache(object):
if val is not None:
protected_set_field(field, val)
for field in ('author_sort', 'publisher', 'series', 'tags', 'comments',
val = mi.get('author_sort', None)
if authors_changed and (not val or mi.is_null('author_sort')):
val = authors_to_sort_string(mi.authors)
if authors_changed or (force_changes and val is not None) or not mi.is_null('author_sort'):
protected_set_field('author_sort', val)
for field in ('publisher', 'series', 'tags', 'comments',
'languages', 'pubdate'):
val = mi.get(field, None)
if (force_changes and val is not None) or not mi.is_null(field):

View File

@ -13,7 +13,7 @@ from calibre import prints
from calibre.db.cli.utils import str_width
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.date import isoformat
from polyglot.builtins import iteritems, unicode_type, map
from polyglot.builtins import as_bytes, iteritems, map, unicode_type
readonly = True
version = 0 # change this if you change signature of implementation()
@ -203,6 +203,8 @@ def do_list(
)
with ColoredStream(sys.stdout, fg='green'):
prints(''.join(titles))
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
linesep = as_bytes(os.linesep)
wrappers = [TextWrapper(x - 1).wrap if x > 1 else lambda y: y for x in widths]
@ -213,12 +215,12 @@ def do_list(
lines = max(map(len, text))
for l in range(lines):
for i, field in enumerate(text):
ft = text[i][l] if l < len(text[i]) else u''
sys.stdout.write(ft.encode('utf-8'))
ft = text[i][l] if l < len(text[i]) else ''
stdout.write(ft.encode('utf-8'))
if i < len(text) - 1:
filler = (u'%*s' % (widths[i] - str_width(ft) - 1, u''))
sys.stdout.write((filler + separator).encode('utf-8'))
print()
filler = ('%*s' % (widths[i] - str_width(ft) - 1, ''))
stdout.write((filler + separator).encode('utf-8'))
stdout.write(linesep)
def option_parser(get_parser, args):

View File

@ -49,8 +49,9 @@ def main(opts, args, dbctx):
if mi is None:
raise SystemExit('Id #%d is not present in database.' % id)
if opts.as_opf:
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
mi = OPFCreator(getcwd(), mi)
mi.render(sys.stdout)
mi.render(stdout)
else:
prints(unicode_type(mi))

View File

@ -11,6 +11,7 @@ from functools import partial
from io import BytesIO
from calibre.ebooks.metadata import author_to_author_sort, title_sort
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import UNDEFINED_DATE
from calibre.db.tests.base import BaseTest, IMG
from polyglot.builtins import iteritems, itervalues, unicode_type
@ -421,13 +422,13 @@ class WritingTest(BaseTest):
cache.set_metadata(2, mi)
nmi = cache.get_metadata(2, get_cover=True, cover_as_data=True)
ae(oldmi.cover_data, nmi.cover_data)
self.compare_metadata(nmi, oldmi, exclude={'last_modified', 'format_metadata'})
self.compare_metadata(nmi, oldmi, exclude={'last_modified', 'format_metadata', 'formats'})
cache.set_metadata(1, mi2, force_changes=True)
nmi2 = cache.get_metadata(1, get_cover=True, cover_as_data=True)
# The new code does not allow setting of #series_index to None, instead
# it is reset to 1.0
ae(nmi2.get_extra('#series'), 1.0)
self.compare_metadata(nmi2, oldmi2, exclude={'last_modified', 'format_metadata', '#series_index'})
self.compare_metadata(nmi2, oldmi2, exclude={'last_modified', 'format_metadata', '#series_index', 'formats'})
cache = self.init_cache(self.cloned_library)
mi = cache.get_metadata(1)
@ -436,6 +437,12 @@ class WritingTest(BaseTest):
cache.set_metadata(3, mi)
self.assertEqual(set(otags), set(cache.field_for('tags', 3)), 'case changes should not be allowed in set_metadata')
# test that setting authors without author sort results in an
# auto-generated authors sort
mi = Metadata('empty', ['a1', 'a2'])
cache.set_metadata(1, mi)
self.assertEqual('a1 & a2', cache.field_for('author_sort', 1))
# }}}
def test_conversion_options(self): # {{{

View File

@ -7,16 +7,14 @@ import os, time, sys
from functools import cmp_to_key
from calibre.constants import preferred_encoding, DEBUG, ispy3
from calibre import isbytestring, force_unicode
from calibre.utils.icu import sort_key
from calibre import isbytestring
from calibre.ebooks.metadata.book.base import Metadata
from calibre.devices.usbms.books import Book as Book_
from calibre.devices.usbms.books import CollectionsBookList
from calibre.devices.usbms.books import Book as Book_, CollectionsBookList, none_cmp
from calibre.utils.config_base import prefs
from calibre.devices.usbms.driver import debug_print
from calibre.ebooks.metadata import author_to_author_sort
from polyglot.builtins import unicode_type, string_or_bytes, iteritems, itervalues, cmp
from polyglot.builtins import unicode_type, iteritems, itervalues
class Book(Book_):
@ -72,6 +70,7 @@ class Book(Book_):
self.can_put_on_shelves = True
self.kobo_series = None
self.kobo_series_number = None # Kobo stores the series number as string. And it can have a leading "#".
self.kobo_series_id = None
self.kobo_subtitle = None
if thumbnail_name is not None:
@ -86,6 +85,10 @@ class Book(Book_):
# If we don't have a content Id, we don't know what type it is.
return self.contentID and self.contentID.startswith("file")
@property
def has_kobo_series(self):
return self.kobo_series is not None
@property
def is_purchased_kepub(self):
return self.contentID and not self.contentID.startswith("file")
@ -104,6 +107,8 @@ class Book(Book_):
fmt('Content ID', self.contentID)
if self.kobo_series:
fmt('Kobo Series', self.kobo_series + ' #%s'%self.kobo_series_number)
if self.kobo_series_id:
fmt('Kobo Series ID', self.kobo_series_id)
if self.kobo_subtitle:
fmt('Subtitle', self.kobo_subtitle)
if self.mime:
@ -292,24 +297,6 @@ class KTCollectionsBookList(CollectionsBookList):
# Sort collections
result = {}
def none_cmp(xx, yy):
x = xx[1]
y = yy[1]
if x is None and y is None:
# No sort_key needed here, because defaults are ascii
return cmp(xx[2], yy[2])
if x is None:
return 1
if y is None:
return -1
if isinstance(x, string_or_bytes) and isinstance(y, string_or_bytes):
x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
c = cmp(x, y)
if c != 0:
return c
# same as above -- no sort_key needed here
return cmp(xx[2], yy[2])
for category, lpaths in iteritems(collections):
books = sorted(itervalues(lpaths), key=cmp_to_key(none_cmp))
result[category] = [x[0] for x in books]

View File

@ -83,7 +83,7 @@ class KOBO(USBMS):
dbversion = 0
fwversion = (0,0,0)
supported_dbversion = 156
supported_dbversion = 158
has_kepubs = False
supported_platforms = ['windows', 'osx', 'linux']
@ -1349,7 +1349,7 @@ class KOBOTOUCH(KOBO):
' Based on the existing Kobo driver by %s.') % KOBO.author
# icon = I('devices/kobotouch.jpg')
supported_dbversion = 157
supported_dbversion = 158
min_supported_dbversion = 53
min_dbversion_series = 65
min_dbversion_externalid = 65
@ -1357,11 +1357,12 @@ class KOBOTOUCH(KOBO):
min_dbversion_images_on_sdcard = 77
min_dbversion_activity = 77
min_dbversion_keywords = 82
min_dbversion_seriesid = 136
# Starting with firmware version 3.19.x, the last number appears to be is a
# build number. A number will be recorded here but it can be safely ignored
# when testing the firmware version.
max_supported_fwversion = (4, 19, 14114)
max_supported_fwversion = (4, 20, 14601)
# The following document firwmare versions where new function or devices were added.
# Not all are used, but this feels a good place to record it.
min_fwversion_shelves = (2, 0, 0)
@ -1377,11 +1378,13 @@ class KOBOTOUCH(KOBO):
min_librah20_fwversion = (4, 16, 13337) # "Reviewers" release.
min_fwversion_epub_location = (4, 17, 13651) # ePub reading location without full contentid.
min_fwversion_dropbox = (4, 18, 13737) # The Forma only at this point.
min_fwversion_serieslist = (4, 20, 14601) # Series list needs the SeriesID to be set.
has_kepubs = True
booklist_class = KTCollectionsBookList
book_class = Book
kobo_series_dict = {}
MAX_PATH_LEN = 185 # 250 - (len(" - N3_LIBRARY_SHELF.parsed") + len("F:\.kobo\images\"))
KOBO_EXTRA_CSSFILE = 'kobo_extra.css'
@ -1610,7 +1613,8 @@ class KOBOTOUCH(KOBO):
bl_cache[b.lpath] = idx
def update_booklist(prefix, path, ContentID, ContentType, MimeType, ImageID,
title, authors, DateCreated, Description, Publisher, series, seriesnumber,
title, authors, DateCreated, Description, Publisher,
series, seriesnumber, SeriesID, SeriesNumberFloat,
ISBN, Language, Subtitle,
readstatus, expired, favouritesindex, accessibility, isdownloaded,
userid, bookshelves
@ -1747,10 +1751,16 @@ class KOBOTOUCH(KOBO):
bl[idx].kobo_metadata = kobo_metadata
bl[idx].kobo_series = series
bl[idx].kobo_series_number = seriesnumber
bl[idx].kobo_series_id = SeriesID
bl[idx].kobo_subtitle = Subtitle
bl[idx].can_put_on_shelves = allow_shelves
bl[idx].mime = MimeType
if not bl[idx].is_sideloaded and bl[idx].has_kobo_series and SeriesID is not None:
if show_debug:
debug_print('KoboTouch:update_booklist - Have purchased kepub with series, saving SeriesID=', SeriesID)
self.kobo_series_dict[series] = SeriesID
if lpath in playlist_map:
bl[idx].device_collections = playlist_map.get(lpath,[])
bl[idx].current_shelves = bookshelves
@ -1800,10 +1810,16 @@ class KOBOTOUCH(KOBO):
book.kobo_metadata = kobo_metadata
book.kobo_series = series
book.kobo_series_number = seriesnumber
book.kobo_series_id = SeriesID
book.kobo_subtitle = Subtitle
book.can_put_on_shelves = allow_shelves
# debug_print('KoboTouch:update_booklist - title=', title, 'book.device_collections', book.device_collections)
if not book.is_sideloaded and book.has_kobo_series and SeriesID is not None:
if show_debug:
debug_print('KoboTouch:update_booklist - Have purchased kepub with series, saving SeriesID=', SeriesID)
self.kobo_series_dict[series] = SeriesID
if bl.add_book(book, replace_metadata=False):
changed = True
if show_debug:
@ -1863,6 +1879,10 @@ class KOBOTOUCH(KOBO):
columns += ", Series, SeriesNumber, ___UserID, ExternalId, Subtitle"
else:
columns += ', null as Series, null as SeriesNumber, ___UserID, null as ExternalId, null as Subtitle'
if self.supports_series_list:
columns += ", SeriesID, SeriesNumberFloat"
else:
columns += ', null as SeriesID, null as SeriesNumberFloat'
where_clause = ''
if self.supports_kobo_archive() or self.supports_overdrive():
@ -1957,7 +1977,8 @@ class KOBOTOUCH(KOBO):
prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
changed = update_booklist(prefix, path, row['ContentID'], row['ContentType'], row['MimeType'], row['ImageId'],
row['Title'], row['Attribution'], row['DateCreated'], row['Description'], row['Publisher'],
row['Series'], row['SeriesNumber'], row['ISBN'], row['Language'], row['Subtitle'],
row['Series'], row['SeriesNumber'], row['SeriesID'], row['SeriesNumberFloat'],
row['ISBN'], row['Language'], row['Subtitle'],
row['ReadStatus'], row['___ExpirationStatus'],
int(row['FavouritesIndex']), row['Accessibility'], row['IsDownloaded'],
row['___UserID'], bookshelves
@ -1972,6 +1993,7 @@ class KOBOTOUCH(KOBO):
self.dump_bookshelves(connection)
else:
debug_print("KoboTouch:books - automatically managing metadata")
debug_print("KoboTouch:books - self.kobo_series_dict=", self.kobo_series_dict)
# Remove books that are no longer in the filesystem. Cache contains
# indices into the booklist if book not in filesystem, None otherwise
# Do the operation in reverse order so indices remain valid
@ -3127,21 +3149,29 @@ class KOBOTOUCH(KOBO):
kobo_series_number = None
series_number_changed = not (kobo_series_number == newmi.series_index)
if series_changed or series_number_changed:
if newmi.series is not None:
new_series = newmi.series
try:
new_series_number = "%g" % newmi.series_index
except:
new_series_number = None
else:
new_series = None
if newmi.series is not None:
new_series = newmi.series
try:
new_series_number = "%g" % newmi.series_index
except:
new_series_number = None
else:
new_series = None
new_series_number = None
if series_changed or series_number_changed:
update_values.append(new_series)
set_clause += ', Series = ? '
update_values.append(new_series_number)
set_clause += ', SeriesNumber = ? '
if self.supports_series_list and book.is_sideloaded:
series_id = self.kobo_series_dict.get(new_series, new_series)
if not book.kobo_series_id == series_id or series_changed or series_number_changed:
update_values.append(series_id)
set_clause += ', SeriesID = ? '
update_values.append(new_series_number)
set_clause += ', SeriesNumberFloat = ? '
debug_print("KoboTouch:set_core_metadata Setting SeriesID - new_series='%s', series_id='%s'" % (new_series, series_id))
if not series_only:
if not (newmi.title == kobo_metadata.title):
@ -3537,6 +3567,10 @@ class KOBOTOUCH(KOBO):
def supports_series(self):
return self.dbversion >= self.min_dbversion_series
@property
def supports_series_list(self):
return self.dbversion >= self.min_dbversion_seriesid and self.fwversion >= self.min_fwversion_serieslist
def supports_kobo_archive(self):
return self.dbversion >= self.min_dbversion_archive

View File

@ -36,7 +36,7 @@ from calibre.utils.filenames import ascii_filename as sanitize, shorten_componen
from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as
unpublish_zeroconf, get_all_ips)
from calibre.utils.socket_inheritance import set_socket_inherit
from polyglot.builtins import unicode_type, iteritems, itervalues
from polyglot.builtins import as_bytes, unicode_type, iteritems, itervalues
from polyglot import queue
@ -100,7 +100,7 @@ class ConnectionListener(Thread):
s = self.driver._json_encode(
self.driver.opcodes['CALIBRE_BUSY'],
{'otherDevice': d.get_gui_name()})
self.driver._send_byte_string(device_socket, (b'%d' % len(s)) + s)
self.driver._send_byte_string(device_socket, (b'%d' % len(s)) + as_bytes(s))
sock.close()
except queue.Empty:
pass
@ -636,7 +636,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
s = self._json_encode(self.opcodes[op], arg)
if print_debug_info and extra_debug:
self._debug('send string', s)
self._send_byte_string(self.device_socket, (b'%d' % len(s)) + s)
self._send_byte_string(self.device_socket, (b'%d' % len(s)) + as_bytes(s))
if not wait_for_response:
return None, None
return self._receive_from_client(print_debug_info=print_debug_info)
@ -841,10 +841,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
json_metadata = defaultdict(dict)
json_metadata[key]['book'] = self.json_codec.encode_book_metadata(book['book'])
json_metadata[key]['last_used'] = book['last_used']
result = json.dumps(json_metadata, indent=2, default=to_json)
fd.write("%0.7d\n"%(len(result)+1))
result = as_bytes(json.dumps(json_metadata, indent=2, default=to_json))
fd.write(("%0.7d\n"%(len(result)+1)).encode('ascii'))
fd.write(result)
fd.write('\n')
fd.write(b'\n')
count += 1
self._debug('wrote', count, 'entries, purged', purged, 'entries')

View File

@ -20,6 +20,31 @@ from calibre.utils.icu import sort_key
from polyglot.builtins import string_or_bytes, iteritems, itervalues, cmp
def none_cmp(xx, yy):
x = xx[1]
y = yy[1]
if x is None and y is None:
# No sort_key needed here, because defaults are ascii
return cmp(xx[2], yy[2])
if x is None:
return 1
if y is None:
return -1
if isinstance(x, string_or_bytes) and isinstance(y, string_or_bytes):
x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
try:
c = cmp(x, y)
except TypeError:
c = 0
if c != 0:
return c
# same as above -- no sort_key needed here
try:
return cmp(xx[2], yy[2])
except TypeError:
return 0
class Book(Metadata):
def __init__(self, prefix, lpath, size=None, other=None):
@ -280,30 +305,6 @@ class CollectionsBookList(BookList):
# Sort collections
result = {}
def none_cmp(xx, yy):
x = xx[1]
y = yy[1]
if x is None and y is None:
# No sort_key needed here, because defaults are ascii
return cmp(xx[2], yy[2])
if x is None:
return 1
if y is None:
return -1
if isinstance(x, string_or_bytes) and isinstance(y, string_or_bytes):
x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y))
try:
c = cmp(x, y)
except TypeError:
c = 0
if c != 0:
return c
# same as above -- no sort_key needed here
try:
return cmp(xx[2], yy[2])
except TypeError:
return 0
for category, lpaths in iteritems(collections):
books = sorted(itervalues(lpaths), key=cmp_to_key(none_cmp))
result[category] = [x[0] for x in books]

View File

@ -38,7 +38,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx']
'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']
def return_raster_image(path):

View File

@ -28,8 +28,12 @@ class DJVUInput(InputFormatPlugin):
from calibre.ebooks.djvu.djvu import DJVUFile
x = DJVUFile(stream)
x.get_text(stdout)
raw_text = stdout.getvalue()
if not raw_text:
raise ValueError('The DJVU file contains no text, only images, probably page scans.'
' calibre only supports conversion of DJVU files with actual text in them.')
html = convert_basic(stdout.getvalue().replace(b"\n", b' ').replace(
html = convert_basic(raw_text.replace(b"\n", b' ').replace(
b'\037', b'\n\n'))
# Run the HTMLized text through the html processing plugin.
from calibre.customize.ui import plugin_for_input_format

View File

@ -40,6 +40,29 @@ def archive_type(stream):
return ans
class KPFExtract(FileTypePlugin):
name = 'KPF Extract'
author = 'Kovid Goyal'
description = _('Extract the source DOCX file from Amazon Kindle Create KPF files.'
' Note this will not contain any edits made in the Kindle Create program itself.')
file_types = {'kpf'}
supported_platforms = ['windows', 'osx', 'linux']
on_import = True
def run(self, archive):
from calibre.utils.zipfile import ZipFile
with ZipFile(archive, 'r') as zf:
fnames = zf.namelist()
candidates = [x for x in fnames if x.lower().endswith('.docx')]
if not candidates:
return archive
of = self.temporary_file('_kpf_extract.docx')
with closing(of):
of.write(zf.read(candidates[0]))
return of.name
class ArchiveExtract(FileTypePlugin):
name = 'Archive Extract'
author = 'Kovid Goyal'

View File

@ -5,7 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from polyglot.builtins import iteritems
from polyglot.urllib import quote_plus
from polyglot.urllib import quote, quote_plus
AUTHOR_SEARCHES = {
'goodreads':
@ -48,17 +48,21 @@ all_book_searches = BOOK_SEARCHES.__iter__
all_author_searches = AUTHOR_SEARCHES.__iter__
def qquote(val):
def qquote(val, use_plus=True):
if not isinstance(val, bytes):
val = val.encode('utf-8')
ans = quote_plus(val)
ans = quote_plus(val) if use_plus else quote(val)
if isinstance(ans, bytes):
ans = ans.decode('utf-8')
return ans
def specialised_quote(template, val):
return qquote(val, 'goodreads.com' not in template)
def url_for(template, data):
return template.format(**{k: qquote(v) for k, v in iteritems(data)})
return template.format(**{k: specialised_quote(template, v) for k, v in iteritems(data)})
def url_for_author_search(key, **kw):

View File

@ -343,8 +343,8 @@ class Source(Plugin):
if authors:
# Leave ' in there for Irish names
remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;,]')
remove_pat = re.compile(r'[!@#$%^&*()()「」{}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;,,。;:]')
if only_first_author:
authors = authors[:1]
for au in authors:
@ -384,7 +384,7 @@ class Source(Plugin):
# Remove hyphens only if they have whitespace before them
(r'(\s-)', ' '),
# Replace other special chars with a space
(r'''[:,;!@$%^&*(){}.`~"\s\[\]/]''', ' '),
(r'''[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”''', ' '),
]]
for pat, repl in title_patterns:

View File

@ -3,38 +3,36 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import time
from functools import partial
try:
from queue import Empty, Queue
except ImportError:
from Queue import Empty, Queue
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Option, Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre import as_unicode
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'db': 'https://www.douban.com/xmlns/',
'gd': 'http://schemas.google.com/g/2005'
}
'openSearch': 'http://a9.com/-/spec/opensearchrss/1.0/',
'atom': 'http://www.w3.org/2005/Atom',
'db': 'https://www.douban.com/xmlns/',
'gd': 'http://schemas.google.com/g/2005'
}
def get_details(browser, url, timeout): # {{{
try:
if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '':
if Douban.DOUBAN_API_KEY:
url = url + "?apikey=" + Douban.DOUBAN_API_KEY
raw = browser.open_novisit(url, timeout=timeout).read()
except Exception as e:
gc = getattr(e, 'getcode', lambda : -1)
gc = getattr(e, 'getcode', lambda: -1)
if gc() != 403:
raise
# Douban is throttling us, wait a little
@ -42,150 +40,124 @@ def get_details(browser, url, timeout): # {{{
raw = browser.open_novisit(url, timeout=timeout).read()
return raw
# }}}
class Douban(Source):
name = 'Douban Books'
author = 'Li Fanxi'
version = (2, 1, 2)
author = 'Li Fanxi, xcffl, jnozsc'
version = (3, 1, 0)
minimum_calibre_version = (2, 80, 0)
description = _('Downloads metadata and covers from Douban.com. '
'Useful only for Chinese language books.')
description = _(
'Downloads metadata and covers from Douban.com. '
'Useful only for Chinese language books.'
)
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags',
'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
'identifier:douban']) # language currently disabled
touched_fields = frozenset([
'title', 'authors', 'tags', 'pubdate', 'comments', 'publisher',
'identifier:isbn', 'rating', 'identifier:douban'
]) # language currently disabled
supports_gzip_transfer_encoding = True
cached_cover_url_is_reliable = True
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
DOUBAN_API_KEY = '0df993c66c0c636e29ecbb5344252a4a'
DOUBAN_API_URL = 'https://api.douban.com/v2/book/search'
DOUBAN_BOOK_URL = 'https://book.douban.com/subject/%s/'
options = (
Option('include_subtitle_in_title', 'bool', True, _('Include subtitle in book title:'),
_('Whether to append subtitle in the book title.')),
Option(
'include_subtitle_in_title', 'bool', True,
_('Include subtitle in book title:'),
_('Whether to append subtitle in the book title.')
),
)
def to_metadata(self, browser, log, entry_, timeout): # {{{
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
subtitle = XPath("descendant::db:attribute[@name='subtitle']")
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
booktag = XPath("descendant::db:tag/attribute::name")
rating = XPath("descendant::gd:rating/attribute::average")
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
douban_id = entry_.get('id')
title = entry_.get('title')
description = entry_.get('summary')
# subtitle = entry_.get('subtitle') # TODO: std metada doesn't have this field
publisher = entry_.get('publisher')
isbn = entry_.get('isbn13') # ISBN11 is obsolute, use ISBN13
pubdate = entry_.get('pubdate')
authors = entry_.get('author')
book_tags = entry_.get('tags')
rating = entry_.get('rating')
cover_url = entry_.get('images', {}).get('large')
series = entry_.get('series')
def get_text(extra, x):
try:
ans = x(extra)
if ans:
ans = ans[0].text
if ans and ans.strip():
return ans.strip()
except:
log.exception('Programming error:')
return None
id_url = entry_id(entry_)[0].text.replace('http://', 'https://')
douban_id = id_url.split('/')[-1]
title_ = ': '.join([x.text for x in title(entry_)]).strip()
subtitle = ': '.join([x.text for x in subtitle(entry_)]).strip()
if self.prefs['include_subtitle_in_title'] and len(subtitle) > 0:
title_ = title_ + ' - ' + subtitle
authors = [x.text.strip() for x in creator(entry_) if x.text]
if not authors:
authors = [_('Unknown')]
if not id_url or not title:
if not douban_id or not title:
# Silently discard this entry
return None
mi = Metadata(title_, authors)
mi.identifiers = {'douban':douban_id}
try:
log.info(id_url)
raw = get_details(browser, id_url, timeout)
feed = etree.fromstring(
xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0],
parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
)
extra = entry(feed)[0]
except:
log.exception('Failed to get additional details for', mi.title)
return mi
mi.comments = get_text(extra, description)
mi.publisher = get_text(extra, publisher)
mi = Metadata(title, authors)
mi.identifiers = {'douban': douban_id}
mi.publisher = publisher
mi.comments = description
# mi.subtitle = subtitle
# ISBN
isbns = []
for x in [t.text for t in isbn(extra)]:
if check_isbn(x):
isbns.append(x)
if isinstance(isbn, (type(''), bytes)):
if check_isbn(isbn):
isbns.append(isbn)
else:
for x in isbn:
if check_isbn(x):
isbns.append(x)
if isbns:
mi.isbn = sorted(isbns, key=len)[-1]
mi.all_isbns = isbns
# Tags
try:
btags = [x for x in booktag(extra) if x]
tags = []
for t in btags:
atags = [y.strip() for y in t.split('/')]
for tag in atags:
if tag not in tags:
tags.append(tag)
except:
log.exception('Failed to parse tags:')
tags = []
if tags:
mi.tags = [x.replace(',', ';') for x in tags]
mi.tags = [tag['name'] for tag in book_tags]
# pubdate
pubdate = get_text(extra, date)
if pubdate:
try:
default = utcnow().replace(day=15)
mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
except:
log.error('Failed to parse pubdate %r'%pubdate)
log.error('Failed to parse pubdate %r' % pubdate)
# Ratings
if rating(extra):
if rating:
try:
mi.rating = float(rating(extra)[0]) / 2.0
mi.rating = float(rating['average']) / 2.0
except:
log.exception('Failed to parse rating')
mi.rating = 0
# Cover
mi.has_douban_cover = None
u = cover_url(extra)
u = cover_url
if u:
u = u[0].replace('/spic/', '/lpic/')
# If URL contains "book-default", the book doesn't have a cover
if u.find('book-default') == -1:
mi.has_douban_cover = u
# Series
if series:
mi.series = series['title']
return mi
# }}}
def get_book_url(self, identifiers): # {{{
db = identifiers.get('douban', None)
if db is not None:
return ('douban', db, self.DOUBAN_BOOK_URL%db)
return ('douban', db, self.DOUBAN_BOOK_URL % db)
# }}}
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
@ -193,9 +165,9 @@ class Douban(Source):
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
SEARCH_URL = 'https://api.douban.com/book/subjects?'
ISBN_URL = 'https://api.douban.com/book/subject/isbn/'
SUBJECT_URL = 'https://api.douban.com/book/subject/'
SEARCH_URL = 'https://api.douban.com/v2/book/search?count=10&'
ISBN_URL = 'https://api.douban.com/v2/book/isbn/'
SUBJECT_URL = 'https://api.douban.com/v2/book/'
q = ''
t = None
@ -208,16 +180,18 @@ class Douban(Source):
q = subject
t = 'subject'
elif title or authors:
def build_term(prefix, parts):
return ' '.join(x for x in parts)
title_tokens = list(self.get_title_tokens(title))
if title_tokens:
q += build_term('title', title_tokens)
author_tokens = list(self.get_author_tokens(authors,
only_first_author=True))
author_tokens = list(
self.get_author_tokens(authors, only_first_author=True)
)
if author_tokens:
q += ((' ' if q != '' else '') +
build_term('author', author_tokens))
q += ((' ' if q != '' else '') + build_term('author', author_tokens))
t = 'search'
q = q.strip()
if isinstance(q, type(u'')):
@ -231,24 +205,40 @@ class Douban(Source):
url = SUBJECT_URL + q
else:
url = SEARCH_URL + urlencode({
'q': q,
})
'q': q,
})
if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
if t == "isbn" or t == "subject":
url = url + "?apikey=" + self.DOUBAN_API_KEY
else:
url = url + "&apikey=" + self.DOUBAN_API_KEY
return url
# }}}
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
def download_cover(
self,
log,
result_queue,
abort, # {{{
title=None,
authors=None,
identifiers={},
timeout=30,
get_best_cover=False
):
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
self.identify(
log,
rq,
abort,
title=title,
authors=authors,
identifiers=identifiers
)
if abort.is_set():
return
results = []
@ -257,8 +247,11 @@ class Douban(Source):
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
results.sort(
key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers
)
)
for mi in results:
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
@ -291,11 +284,18 @@ class Douban(Source):
url = self.cached_identifier_to_cover_url(db)
return url
# }}}
def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout):
from lxml import etree
def get_all_details(
self,
br,
log,
entries,
abort, # {{{
result_queue,
timeout
):
for relevance, i in enumerate(entries):
try:
ans = self.to_metadata(br, log, i, timeout)
@ -305,29 +305,31 @@ class Douban(Source):
for isbn in getattr(ans, 'all_isbns', []):
self.cache_isbn_to_identifier(isbn, db)
if ans.has_douban_cover:
self.cache_identifier_to_cover_url(db,
ans.has_douban_cover)
self.cache_identifier_to_cover_url(db, ans.has_douban_cover)
self.clean_downloaded_metadata(ans)
result_queue.put(ans)
except:
log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
log.exception('Failed to get metadata for identify entry:', i)
if abort.is_set():
break
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
def identify(
self,
log,
result_queue,
abort,
title=None,
authors=None, # {{{
identifiers={},
timeout=30
):
import json
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
query = self.create_query(
log, title=title, authors=authors, identifiers=identifiers
)
if not query:
log.error('Insufficient metadata to construct query')
return
@ -335,45 +337,56 @@ class Douban(Source):
try:
raw = br.open_novisit(query, timeout=timeout).read()
except Exception as e:
log.exception('Failed to make identify query: %r'%query)
log.exception('Failed to make identify query: %r' % query)
return as_unicode(e)
try:
parser = etree.XMLParser(recover=True, no_network=True)
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
strip_encoding_pats=True)[0], parser=parser)
entries = entry(feed)
j = json.loads(raw)
except Exception as e:
log.exception('Failed to parse identify results')
return as_unicode(e)
if 'books' in j:
entries = j['books']
else:
entries = []
entries.append(j)
if not entries and identifiers and title and authors and \
not abort.is_set():
return self.identify(log, result_queue, abort, title=title,
authors=authors, timeout=timeout)
return self.identify(
log,
result_queue,
abort,
title=title,
authors=authors,
timeout=timeout
)
# There is no point running these queries in threads as douban
# throttles requests returning 403 Forbidden errors
self.get_all_details(br, log, entries, abort, result_queue, timeout)
return None
# }}}
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
test_identify_plugin(Douban.name,
[
(
{'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
'authors':['刘慈欣']},
[title_test('三体', exact=True),
authors_test(['刘慈欣'])]
),
(
{'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
[title_test('Linux内核修炼之道', exact=False)]
),
])
from calibre.ebooks.metadata.sources.test import (
test_identify_plugin, title_test, authors_test
)
test_identify_plugin(
Douban.name, [
({
'identifiers': {
'isbn': '9787536692930'
},
'title': '三体',
'authors': ['刘慈欣']
}, [title_test('三体', exact=True),
authors_test(['刘慈欣'])]),
({
'title': 'Linux内核修炼之道',
'authors': ['任桥伟']
}, [title_test('Linux内核修炼之道', exact=False)]),
]
)
# }}}

View File

@ -6,14 +6,14 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct, datetime, os, numbers
import struct, datetime, os, numbers, binascii
from calibre.utils.date import utc_tz
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.utils import get_trailing_data
from polyglot.builtins import as_bytes, iteritems, range, unicode_type
from polyglot.builtins import iteritems, range, unicode_type
# PalmDB {{{
@ -210,7 +210,7 @@ class EXTHRecord(object):
else:
self.data, = struct.unpack(b'>L', self.data)
elif self.type in {209, 300}:
self.data = as_bytes(self.data.encode('hex'))
self.data = binascii.hexlify(self.data)
def __str__(self):
return '%s (%d): %r'%(self.name, self.type, self.data)

View File

@ -10,7 +10,7 @@ import shutil, os, re, struct, textwrap, io
from lxml import html, etree
from calibre import (xml_entity_to_unicode, entity_to_unicode)
from calibre import xml_entity_to_unicode, entity_to_unicode, guess_type
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.ebooks import DRMError, unit_convert
from calibre.ebooks.chardet import strip_encoding_declarations
@ -21,7 +21,7 @@ from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.mobi.reader.headers import BookHeader
from calibre.utils.img import save_cover_data_to
from calibre.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF
from calibre.utils.imghdr import what
from polyglot.builtins import iteritems, unicode_type, range, map
@ -178,7 +178,7 @@ class MobiReader(object):
self.processed_html = strip_encoding_declarations(self.processed_html)
self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
self.processed_html)
self.extract_images(processed_records, output_dir)
image_name_map = self.extract_images(processed_records, output_dir)
self.replace_page_breaks()
self.cleanup_html()
@ -272,7 +272,7 @@ class MobiReader(object):
head.insert(0, title)
head.text = '\n\t'
self.upshift_markup(root)
self.upshift_markup(root, image_name_map)
guides = root.xpath('//guide')
guide = guides[0] if guides else None
metadata_elems = root.xpath('//metadata')
@ -389,8 +389,9 @@ class MobiReader(object):
raw += unit
return raw
def upshift_markup(self, root):
def upshift_markup(self, root, image_name_map=None):
self.log.debug('Converting style information to CSS...')
image_name_map = image_name_map or {}
size_map = {
'xx-small': '0.5',
'x-small': '1',
@ -510,10 +511,11 @@ class MobiReader(object):
recindex = attrib.pop(attr, None) or recindex
if recindex is not None:
try:
recindex = '%05d'%int(recindex)
except:
recindex = int(recindex)
except Exception:
pass
attrib['src'] = 'images/%s.jpg' % recindex
else:
attrib['src'] = 'images/' + image_name_map.get(recindex, '%05d.jpg' % recindex)
for attr in ('width', 'height'):
if attr in attrib:
val = attrib[attr]
@ -674,7 +676,7 @@ class MobiReader(object):
for i in getattr(self, 'image_names', []):
path = os.path.join(bp, 'images', i)
added.add(path)
manifest.append((path, 'image/jpeg'))
manifest.append((path, guess_type(path)[0] or 'image/jpeg'))
if cover_copied is not None:
manifest.append((cover_copied, 'image/jpeg'))
@ -870,6 +872,7 @@ class MobiReader(object):
os.makedirs(output_dir)
image_index = 0
self.image_names = []
image_name_map = {}
start = getattr(self.book_header, 'first_image_index', -1)
if start > self.num_sections or start < 0:
# BAEN PRC files have bad headers
@ -882,18 +885,36 @@ class MobiReader(object):
image_index += 1
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
# This record is a known non image type, not need to try to
# This record is a known non image type, no need to try to
# load the image
continue
path = os.path.join(output_dir, '%05d.jpg' % image_index)
try:
if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
continue
save_cover_data_to(data, path, minify_to=(10000, 10000))
imgfmt = what(None, data)
except Exception:
continue
if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
continue
if imgfmt == 'jpeg':
imgfmt = 'jpg'
if imgfmt == 'gif':
try:
data = gif_data_to_png_data(data)
imgfmt = 'png'
except AnimatedGIF:
pass
path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
image_name_map[image_index] = os.path.basename(path)
if imgfmt == 'png':
with open(path, 'wb') as f:
f.write(data)
else:
try:
save_cover_data_to(data, path, minify_to=(10000, 10000))
except Exception:
continue
self.image_names.append(os.path.basename(path))
return image_name_map
def test_mbp_regex():

View File

@ -10,7 +10,7 @@ import struct, string, zlib, os
from collections import OrderedDict
from io import BytesIO
from calibre.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image
from calibre.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
from calibre.utils.imghdr import what
from calibre.ebooks import normalize
from polyglot.builtins import unicode_type, range, as_bytes, map
@ -417,13 +417,8 @@ def to_base(num, base=32, min_num_digits=None):
def mobify_image(data):
'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
fmt = what(None, data)
if fmt == 'png':
from PIL import Image
im = Image.open(BytesIO(data))
buf = BytesIO()
im.save(buf, 'gif')
data = buf.getvalue()
data = png_data_to_gif_data(data)
return data
# Font records {{{

View File

@ -16,7 +16,7 @@ from lxml import etree
from calibre import my_unichr
from calibre.ebooks.oeb.base import XHTML_NS, extract
from calibre.ebooks.mobi.utils import to_base, PolyglotDict
from polyglot.builtins import iteritems, unicode_type
from polyglot.builtins import iteritems, unicode_type, as_bytes
CHUNK_SIZE = 8192
@ -397,7 +397,7 @@ class Chunker(object):
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
return ':off:'.join((pos, fid)).encode('utf-8')
placeholder_map = {k:to_placeholder(v) for k, v in
placeholder_map = {as_bytes(k):to_placeholder(v) for k, v in
iteritems(self.placeholder_map)}
# Now update the links

View File

@ -222,7 +222,12 @@ class Pool(object):
self.working = False
def shutdown(self):
tuple(map(sip.delete, self.workers))
def safe_delete(x):
if not sip.isdeleted(x):
sip.delete(x)
tuple(map(safe_delete, self.workers))
self.workers = []

View File

@ -48,16 +48,18 @@ def run_checks(container):
xml_items, html_items, raster_images, stylesheets = [], [], [], []
for name, mt in iteritems(container.mime_map):
items = None
decode = False
if mt in XML_TYPES:
items = xml_items
elif mt in OEB_DOCS:
items = html_items
elif mt in OEB_STYLES:
decode = True
items = stylesheets
elif is_raster_image(mt):
items = raster_images
if items is not None:
items.append((name, mt, container.open(name, 'rb').read()))
items.append((name, mt, container.raw_data(name, decode=decode)))
errors.extend(run_checkers(check_html_size, html_items))
errors.extend(run_checkers(check_xml_parsing, xml_items))
errors.extend(run_checkers(check_xml_parsing, html_items))

View File

@ -351,7 +351,7 @@ class CSSFlattener(object):
value = 0.0
cssdict[property] = "%0.5fem" % (value / fsize)
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id, recurse=True):
if not isinstance(node.tag, string_or_bytes) \
or namespace(node.tag) != XHTML_NS:
return
@ -569,8 +569,9 @@ class CSSFlattener(object):
del node.attrib['class']
if 'style' in node.attrib:
del node.attrib['style']
for child in node:
self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
if recurse:
for child in node:
self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
def flatten_head(self, item, href, global_href):
html = item.data
@ -660,9 +661,9 @@ class CSSFlattener(object):
stylizer = self.stylizers[item]
if self.specializer is not None:
self.specializer(item, stylizer)
body = html.find(XHTML('body'))
fsize = self.context.dest.fbase
self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id)
self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False)
self.flatten_node(html.find(XHTML('body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
items = sorted(((key, val) for (val, key) in iteritems(styles)), key=lambda x:numeric_sort_key(x[0]))
# :hover must come after link and :active must come after :hover
psels = sorted(pseudo_styles, key=lambda x :

View File

@ -18,7 +18,7 @@ from operator import attrgetter, itemgetter
from html5_parser import parse
from PyQt5.Qt import (
QApplication, QMarginsF, QObject, QPageLayout, QTimer, QUrl, pyqtSignal
QApplication, QMarginsF, QObject, QPageLayout, Qt, QTimer, QUrl, pyqtSignal
)
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile
@ -39,6 +39,7 @@ from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
from calibre.utils.logging import default_log
from calibre.utils.monotonic import monotonic
from calibre.utils.podofo import (
dedup_type3_fonts, get_podofo, remove_unused_fonts, set_metadata_implementation
)
@ -49,6 +50,7 @@ from polyglot.builtins import (
from polyglot.urllib import urlparse
OK, KILL_SIGNAL = range(0, 2)
HANG_TIME = 60 # seconds
# }}}
@ -172,10 +174,26 @@ class Renderer(QWebEnginePage):
self.titleChanged.connect(self.title_changed)
self.loadStarted.connect(self.load_started)
self.loadProgress.connect(self.load_progress)
self.loadFinished.connect(self.load_finished)
self.load_hang_check_timer = t = QTimer(self)
self.load_started_at = 0
t.setTimerType(Qt.VeryCoarseTimer)
t.setInterval(HANG_TIME * 1000)
t.setSingleShot(True)
t.timeout.connect(self.on_load_hang)
def load_started(self):
self.load_started_at = monotonic()
self.load_complete = False
self.load_hang_check_timer.start()
def load_progress(self, amt):
self.load_hang_check_timer.start()
def on_load_hang(self):
self.log(self.log_prefix, 'Loading not complete after {} seconds, aborting.'.format(int(monotonic() - self.load_started_at)))
self.load_finished(False)
def title_changed(self, title):
if self.wait_for_title and title == self.wait_for_title and self.load_complete:
@ -187,6 +205,7 @@ class Renderer(QWebEnginePage):
def load_finished(self, ok):
self.load_complete = True
self.load_hang_check_timer.stop()
if not ok:
self.working = False
self.work_done.emit(self, 'Load of {} failed'.format(self.url().toString()))
@ -900,7 +919,7 @@ def fonts_are_identical(fonts):
return True
def merge_font(fonts):
def merge_font(fonts, log):
# choose the largest font as the base font
fonts.sort(key=lambda f: len(f['Data'] or b''), reverse=True)
base_font = fonts[0]
@ -913,7 +932,7 @@ def merge_font(fonts):
cmaps = list(filter(None, (f['ToUnicode'] for f in t0_fonts)))
if cmaps:
t0_font['ToUnicode'] = as_bytes(merge_cmaps(cmaps))
base_font['sfnt'], width_for_glyph_id, height_for_glyph_id = merge_truetype_fonts_for_pdf(*(f['sfnt'] for f in descendant_fonts))
base_font['sfnt'], width_for_glyph_id, height_for_glyph_id = merge_truetype_fonts_for_pdf(tuple(f['sfnt'] for f in descendant_fonts), log)
widths = []
arrays = tuple(filter(None, (f['W'] for f in descendant_fonts)))
if arrays:
@ -928,7 +947,7 @@ def merge_font(fonts):
return t0_font, base_font, references_to_drop
def merge_fonts(pdf_doc):
def merge_fonts(pdf_doc, log):
all_fonts = pdf_doc.list_fonts(True)
base_font_map = {}
@ -957,7 +976,7 @@ def merge_fonts(pdf_doc):
items = []
for name, fonts in iteritems(base_font_map):
if mergeable(fonts):
t0_font, base_font, references_to_drop = merge_font(fonts)
t0_font, base_font, references_to_drop = merge_font(fonts, log)
for ref in references_to_drop:
replacements[ref] = t0_font['Reference']
data = base_font['sfnt']()[0]
@ -1227,7 +1246,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
page_number_display_map, page_layout, page_margins_map,
pdf_metadata, report_progress, toc if has_toc else None)
merge_fonts(pdf_doc)
merge_fonts(pdf_doc, log)
num_removed = dedup_type3_fonts(pdf_doc)
if num_removed:
log('Removed', num_removed, 'duplicated Type3 glyphs')

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals, absolute_import, print_function, division
import os, sys
from codecs import EncodedFile
from calibre.ebooks.rtf2xml import copy, check_encoding
from calibre.ptempfile import better_mktemp
@ -274,15 +273,10 @@ class ConvertToTags:
if self.__convert_utf or self.__bad_encoding:
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file)
file_encoding = "utf-8"
if self.__bad_encoding:
file_encoding = "us-ascii"
with open_for_read(self.__file) as read_obj:
with open_for_write(self.__write_to) as write_obj:
write_objenc = EncodedFile(write_obj, self.__encoding,
file_encoding, 'replace')
for line in read_obj:
write_objenc.write(line)
write_obj.write(line)
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -978,13 +978,16 @@ class Application(QApplication):
if not geom:
return
restored = widget.restoreGeometry(geom)
self.ensure_window_on_screen(widget)
return restored
def ensure_window_on_screen(self, widget):
screen_rect = self.desktop().availableGeometry(widget)
if not widget.geometry().intersects(screen_rect):
w = min(widget.width(), screen_rect.width() - 10)
h = min(widget.height(), screen_rect.height() - 10)
widget.resize(w, h)
widget.move((screen_rect.width() - w) // 2, (screen_rect.height() - h) // 2)
return restored
def setup_ui_font(self):
f = QFont(QApplication.font())

View File

@ -232,21 +232,25 @@ class AddAction(InterfaceAction):
return
for id_ in ids:
from calibre.ebooks.oeb.polish.create import create_book
pt = PersistentTemporaryFile(suffix='.' + format_)
pt.close()
try:
mi = db.new_api.get_metadata(id_, get_cover=False,
get_user_categories=False, cover_as_data=False)
create_book(mi, pt.name, fmt=format_)
db.add_format_with_hooks(id_, format_, pt.name, index_is_id=True, notify=True)
finally:
os.remove(pt.name)
self.add_empty_format_to_book(id_, format_)
current_idx = self.gui.library_view.currentIndex()
if current_idx.isValid():
view.model().current_changed(current_idx, current_idx)
def add_empty_format_to_book(self, book_id, fmt):
from calibre.ebooks.oeb.polish.create import create_book
db = self.gui.current_db
pt = PersistentTemporaryFile(suffix='.' + fmt.lower())
pt.close()
try:
mi = db.new_api.get_metadata(book_id, get_cover=False,
get_user_categories=False, cover_as_data=False)
create_book(mi, pt.name, fmt=fmt.lower())
db.add_format_with_hooks(book_id, fmt, pt.name, index_is_id=True, notify=True)
finally:
os.remove(pt.name)
def add_archive(self, single):
paths = choose_files(
self.gui, 'recursive-archive-add', _('Choose archive file'),

View File

@ -312,7 +312,7 @@ class EditMetadataAction(InterfaceAction):
intro_msg=_('The downloaded metadata is on the left and the original metadata'
' is on the right. If a downloaded value is blank or unknown,'
' the original value is used.'),
action_button=(_('&View Book'), I('view.png'), self.gui.iactions['View'].view_historical),
action_button=(_('&View book'), I('view.png'), self.gui.iactions['View'].view_historical),
db=db
)
if d.exec_() == d.Accepted:

View File

@ -13,6 +13,7 @@ from PyQt5.Qt import QIcon, QSize
from calibre.gui2 import error_dialog
from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.utils.localization import localize_user_manual_link
class StoreAction(InterfaceAction):
@ -146,8 +147,9 @@ class StoreAction(InterfaceAction):
'buying from. Be sure to double check that any books you get '
'will work with your e-book reader, especially if the book you '
'are buying has '
'<a href="https://drmfree.calibre-ebook.com/about#drm">DRM</a>.'
)), 'about_get_books_msg',
'<a href="{}">DRM</a>.'
).format(localize_user_manual_link(
'https://manual.calibre-ebook.com/drm.html'))), 'about_get_books_msg',
parent=self.gui, show_cancel_button=False,
confirm_msg=_('Show this message again'),
pixmap='dialog_information.png', title=_('About Get books'))

View File

@ -10,7 +10,7 @@ import time
from PyQt5.Qt import QTimer, QDialog, QDialogButtonBox, QCheckBox, QVBoxLayout, QLabel, Qt
from calibre.gui2 import error_dialog
from calibre.gui2 import error_dialog, question_dialog
from calibre.gui2.actions import InterfaceAction
@ -105,13 +105,23 @@ class TweakEpubAction(InterfaceAction):
from calibre.ebooks.oeb.polish.main import SUPPORTED
db = self.gui.library_view.model().db
fmts = db.formats(book_id, index_is_id=True) or ''
fmts = [x.upper().strip() for x in fmts.split(',')]
fmts = [x.upper().strip() for x in fmts.split(',') if x]
tweakable_fmts = set(fmts).intersection(SUPPORTED)
if not tweakable_fmts:
return error_dialog(self.gui, _('Cannot edit book'),
_('The book must be in the %s formats to edit.'
'\n\nFirst convert the book to one of these formats.') % (_(' or ').join(SUPPORTED)),
show=True)
if not fmts:
if not question_dialog(self.gui, _('No editable formats'),
_('Do you want to create an empty EPUB file to edit?')):
return
tweakable_fmts = {'EPUB'}
self.gui.iactions['Add Books'].add_empty_format_to_book(book_id, 'EPUB')
current_idx = self.gui.library_view.currentIndex()
if current_idx.isValid():
self.gui.library_view.model().current_changed(current_idx, current_idx)
else:
return error_dialog(self.gui, _('Cannot edit book'), _(
'The book must be in the %s formats to edit.'
'\n\nFirst convert the book to one of these formats.'
) % (_(' or ').join(SUPPORTED)), show=True)
from calibre.gui2.tweak_book import tprefs
tprefs.refresh() # In case they were changed in a Tweak Book process
if len(tweakable_fmts) > 1:

View File

@ -4,7 +4,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from PyQt5.Qt import QToolButton
from PyQt5.Qt import QToolButton, QAction
from calibre.gui2.actions import InterfaceAction
@ -24,6 +24,13 @@ class VirtualLibraryAction(InterfaceAction):
def genesis(self):
self.menu = m = self.qaction.menu()
m.aboutToShow.connect(self.about_to_show_menu)
self.qs_action = QAction(self.gui)
self.gui.addAction(self.qs_action)
self.qs_action.triggered.connect(self.gui.choose_vl_triggerred)
self.gui.keyboard.register_shortcut(self.unique_name + ' - ' + 'quick-select-vl',
_('Quick select Virtual library'), default_keys=('Ctrl+T',),
action=self.qs_action, description=_('Quick select a Virtual library'),
group=self.action_spec[0])
def about_to_show_menu(self):
self.gui.build_virtual_library_menu(self.menu, add_tabs_action=False)

View File

@ -413,6 +413,7 @@ if isosx:
ia = iactions[what]
ac = ia.qaction
if not ac.menu() and hasattr(ia, 'shortcut_action_for_context_menu'):
ia.shortcut_action_for_context_menu.setIcon(ac.icon())
ac = ia.shortcut_action_for_context_menu
m.addAction(CloneAction(ac, m))
@ -506,6 +507,7 @@ else:
ia = iactions[what]
ac = ia.qaction
if not ac.menu() and hasattr(ia, 'shortcut_action_for_context_menu'):
ia.shortcut_action_for_context_menu.setIcon(ac.icon())
ac = ia.shortcut_action_for_context_menu
m.addAction(ac)

View File

@ -212,7 +212,7 @@ def add_format_entries(menu, data, book_info):
else:
m.addSeparator()
m.addAction(_('Add other application for %s files...') % fmt.upper(), partial(book_info.choose_open_with, book_id, fmt))
m.addAction(_('Edit Open With applications...'), partial(edit_programs, fmt, book_info))
m.addAction(_('Edit Open with applications...'), partial(edit_programs, fmt, book_info))
menu.addMenu(m)
menu.ow = m
if fmt.upper() in SUPPORTED:
@ -279,7 +279,7 @@ def add_item_specific_entries(menu, data, book_info):
def details_context_menu_event(view, ev, book_info):
url = view.anchorAt(ev.pos())
menu = view.createStandardContextMenu()
menu.addAction(QIcon(I('edit-copy.png')), _('Copy &all'), partial(copy_all, book_info))
menu.addAction(QIcon(I('edit-copy.png')), _('Copy &all'), partial(copy_all, view))
search_internet_added = False
if url and url.startswith('action:'):
data = json_loads(from_hex_bytes(url.split(':', 1)[1]))

View File

@ -96,7 +96,10 @@
<property name="title">
<string>Margins</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<layout class="QFormLayout" name="formLayout">
<property name="fieldGrowthPolicy">
<enum>QFormLayout::FieldsStayAtSizeHint</enum>
</property>
<item row="0" column="0">
<widget class="QLabel" name="label_3">
<property name="text">

View File

@ -167,7 +167,7 @@ class DBusMenu(QObject):
def eventFilter(self, obj, ev):
ac = getattr(obj, 'menuAction', lambda : None)()
ac_id = self.action_to_id(ac)
if ac_id is not None:
if ac_id is not None and hasattr(ev, 'action'):
etype = ev.type()
if etype == QEvent.ActionChanged:
ac_id = self.action_to_id(ev.action())

View File

@ -41,6 +41,7 @@ class ChooseFormatDialog(QDialog):
bb.accepted.connect(self.accept), bb.rejected.connect(self.reject)
h.addStretch(10), h.addWidget(self.buttonBox)
formats = list(formats)
for format in formats:
self.formats.addItem(QListWidgetItem(file_icon_provider().icon_from_ext(format.lower()),
format.upper()))

View File

@ -44,8 +44,7 @@
<widget class="QLabel" name="msg">
<property name="text">
<string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre,
&lt;a href=&quot;https://drmfree.calibre-ebook.com/about#drm&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;A large number of recent, DRM free releases are
available at &lt;a href=&quot;https://drmfree.calibre-ebook.com&quot;&gt;Open Books&lt;/a&gt;.</string>
&lt;a href=&quot;https://manual.calibre-ebook.com/drm.html&quot;&gt;click here&lt;/a&gt;.&lt;p&gt;</string>
</property>
<property name="wordWrap">
<bool>true</bool>

View File

@ -369,6 +369,7 @@ class MyBlockingBusy(QDialog): # {{{
if args.clear_series:
self.progress_next_step_range.emit(0)
cache.set_field('series', {bid: '' for bid in self.ids})
cache.set_field('series_index', {bid:1.0 for bid in self.ids})
self.progress_finished_cur_step.emit()
if args.pubdate is not None:

View File

@ -94,7 +94,7 @@ class SavedSearchEditor(Dialog):
def __init__(self, parent, initial_search=None):
self.initial_search = initial_search
Dialog.__init__(
self, _('Manage saved searches'), 'manage-saved-searches', parent)
self, _('Manage Saved searches'), 'manage-saved-searches', parent)
def setup_ui(self):
from calibre.gui2.ui import get_gui

View File

@ -10,7 +10,7 @@ from calibre.gui2.dialogs.tag_categories_ui import Ui_TagCategories
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2 import error_dialog
from calibre.constants import islinux
from calibre.utils.icu import sort_key, strcmp
from calibre.utils.icu import sort_key, strcmp, primary_contains
from polyglot.builtins import iteritems, unicode_type
@ -72,9 +72,11 @@ class TagCategories(QDialog, Ui_TagCategories):
lambda: [t.original_name.replace('|', ',') for t in self.db_categories['authors']],
lambda: [t.original_name for t in self.db_categories['series']],
lambda: [t.original_name for t in self.db_categories['publisher']],
lambda: [t.original_name for t in self.db_categories['tags']]
lambda: [t.original_name for t in self.db_categories['tags']],
lambda: [t.original_name for t in self.db_categories['languages']]
]
category_names = ['', _('Authors'), ngettext('Series', 'Series', 2), _('Publishers'), _('Tags')]
category_names = ['', _('Authors'), ngettext('Series', 'Series', 2),
_('Publishers'), _('Tags'), _('Languages')]
for key,cc in iteritems(self.db.custom_field_metadata()):
if cc['datatype'] in ['text', 'series', 'enumeration']:
@ -106,6 +108,7 @@ class TagCategories(QDialog, Ui_TagCategories):
self.category_box.currentIndexChanged[int].connect(self.select_category)
self.category_filter_box.currentIndexChanged[int].connect(
self.display_filtered_categories)
self.item_filter_box.textEdited.connect(self.display_filtered_items)
self.delete_category_button.clicked.connect(self.del_category)
if islinux:
self.available_items_box.itemDoubleClicked.connect(self.apply_tags)
@ -168,14 +171,19 @@ class TagCategories(QDialog, Ui_TagCategories):
w.setToolTip(_('Category lookup name: ') + item.label)
return w
def display_filtered_items(self, text):
self.display_filtered_categories(None)
def display_filtered_categories(self, idx):
idx = idx if idx is not None else self.category_filter_box.currentIndex()
self.available_items_box.clear()
self.applied_items_box.clear()
item_filter = self.item_filter_box.text()
for item in self.all_items_sorted:
if idx == 0 or item.label == self.category_labels[idx]:
if item.index not in self.applied_items and item.exists:
self.available_items_box.addItem(self.make_list_widget(item))
if primary_contains(item_filter, item.name):
self.available_items_box.addItem(self.make_list_widget(item))
for index in self.applied_items:
self.applied_items_box.addItem(self.make_list_widget(self.all_items[index]))

View File

@ -33,7 +33,7 @@
<cstring>category_box</cstring>
</property>
</widget>
</item>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="category_box">
<property name="toolTip">
@ -64,6 +64,26 @@
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_filt">
<property name="text">
<string>Item &amp;filter: </string>
</property>
<property name="alignment">
<set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>item_filter_box</cstring>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="item_filter_box">
<property name="toolTip">
<string>Enter text to filter the available items. Case and accents are ignored.</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="1" colspan="3">
@ -136,6 +156,13 @@
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="blank">
<property name="text">
<string> </string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
@ -152,7 +179,7 @@
</property>
</widget>
</item>
<item row="2" column="0">
<item row="3" column="0">
<widget class="QListWidget" name="available_items_box">
<property name="alternatingRowColors">
<bool>true</bool>
@ -165,7 +192,7 @@
</property>
</widget>
</item>
<item row="2" column="1">
<item row="3" column="1">
<widget class="QToolButton" name="apply_button">
<property name="toolTip">
<string>Apply tags to current tag category</string>
@ -189,7 +216,7 @@
</property>
</widget>
</item>
<item row="2" column="2">
<item row="3" column="2">
<widget class="QListWidget" name="applied_items_box">
<property name="alternatingRowColors">
<bool>true</bool>
@ -199,7 +226,7 @@
</property>
</widget>
</item>
<item row="2" column="3">
<item row="3" column="3">
<widget class="QToolButton" name="unapply_button">
<property name="toolTip">
<string>Unapply (remove) tag from current tag category</string>
@ -213,7 +240,7 @@
</property>
</widget>
</item>
<item row="3" column="0" colspan="4">
<item row="4" column="0" colspan="4">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>

View File

@ -198,14 +198,7 @@ def dnd_has_extension(md, extensions, allow_all_extensions=False):
return bool(exts.intersection(frozenset(extensions)))
def dnd_get_image(md, image_exts=None):
'''
Get the image in the QMimeData object md.
:return: None, None if no image is found
QPixmap, None if an image is found, the pixmap is guaranteed not null
url, filename if a URL that points to an image is found
'''
def dnd_get_local_image_and_pixmap(md, image_exts=None):
if md.hasImage():
for x in md.formats():
x = unicode_type(x)
@ -214,14 +207,13 @@ def dnd_get_image(md, image_exts=None):
pmap = QPixmap()
pmap.loadFromData(cdata)
if not pmap.isNull():
return pmap, None
break
return pmap, cdata
if md.hasFormat('application/octet-stream'):
cdata = bytes(md.data('application/octet-stream'))
pmap = QPixmap()
pmap.loadFromData(cdata)
if not pmap.isNull():
return pmap, None
return pmap, cdata
if image_exts is None:
image_exts = image_extensions()
@ -229,23 +221,40 @@ def dnd_get_image(md, image_exts=None):
# No image, look for an URL pointing to an image
urls = urls_from_md(md)
paths = [path_from_qurl(u) for u in urls]
# First look for a local file
# Look for a local file
images = [xi for xi in paths if
posixpath.splitext(unquote(xi))[1][1:].lower() in
image_exts]
images = [xi for xi in images if os.path.exists(xi)]
p = QPixmap()
for path in images:
try:
with open(path, 'rb') as f:
p.loadFromData(f.read())
cdata = f.read()
except Exception:
continue
p = QPixmap()
p.loadFromData(cdata)
if not p.isNull():
return p, None
return p, cdata
# No local images, look for remote ones
return None, None
def dnd_get_image(md, image_exts=None):
'''
Get the image in the QMimeData object md.
:return: None, None if no image is found
QPixmap, None if an image is found, the pixmap is guaranteed not null
url, filename if a URL that points to an image is found
'''
if image_exts is None:
image_exts = image_extensions()
pmap, data = dnd_get_local_image_and_pixmap(md, image_exts)
if pmap is not None:
return pmap, None
# Look for a remote image
urls = urls_from_md(md)
# First, see if this is from Firefox
rurl, fname = get_firefox_rurl(md, image_exts)

View File

@ -531,7 +531,7 @@ class VLTabs(QTabBar): # {{{
s = m._s = m.addMenu(_('Restore hidden tabs'))
for x in hidden:
s.addAction(x, partial(self.restore, x))
m.addAction(_('Hide virtual library tabs'), self.disable_bar)
m.addAction(_('Hide Virtual library tabs'), self.disable_bar)
if gprefs['vl_tabs_closable']:
m.addAction(_('Lock virtual library tabs'), self.lock_tab)
else:

View File

@ -1049,12 +1049,15 @@ class GridView(QListView):
def number_of_columns(self):
# Number of columns currently visible in the grid
if self._ncols is None:
dpr = self.device_pixel_ratio
width = int(dpr * self.delegate.cover_size.width())
height = int(dpr * self.delegate.cover_size.height())
step = max(10, self.spacing())
for y in range(step, 500, step):
for x in range(step, 500, step):
for y in range(step, 2 * height, step):
for x in range(step, 2 * width, step):
i = self.indexAt(QPoint(x, y))
if i.isValid():
for x in range(self.viewport().width() - step, self.viewport().width() - 300, -step):
for x in range(self.viewport().width() - step, self.viewport().width() - width, -step):
j = self.indexAt(QPoint(x, y))
if j.isValid():
self._ncols = j.row() - i.row() + 1
@ -1070,7 +1073,8 @@ class GridView(QListView):
if not ci.isValid():
return
c = ci.row()
delta = {Qt.Key_Left: -1, Qt.Key_Right: 1, Qt.Key_Up: -self.number_of_columns(), Qt.Key_Down: self.number_of_columns()}[k]
ncols = self.number_of_columns() or 1
delta = {Qt.Key_Left: -1, Qt.Key_Right: 1, Qt.Key_Up: -ncols, Qt.Key_Down: ncols}[k]
n = max(0, min(c + delta, self.model().rowCount(None) - 1))
if n == c:
return

View File

@ -30,6 +30,7 @@ from calibre.utils.date import (
local_tz, qt_to_dt, as_local_time, UNDEFINED_DATE, is_date_undefined,
utcfromtimestamp, parse_only_date, internal_iso_format_string)
from calibre import strftime
from calibre.constants import ispy3
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.customize.ui import run_plugins_on_import
from calibre.gui2.comments_editor import Editor
@ -52,7 +53,7 @@ def save_dialog(parent, title, msg, det_msg=''):
def clean_text(x):
return re.sub(r'\s', ' ', x.strip())
return re.sub(r'\s', ' ', x.strip(), flags=re.ASCII if ispy3 else 0)
'''
@ -221,7 +222,6 @@ class TitleEdit(EnLineEdit, ToMetadataMixin):
@property
def current_val(self):
title = clean_text(unicode_type(self.text()))
if not title:
title = self.get_default()

View File

@ -20,7 +20,7 @@ from PyQt5.Qt import (
QWidget, QTableView, QGridLayout, QPalette, QTimer, pyqtSignal,
QAbstractTableModel, QSize, QListView, QPixmap, QModelIndex,
QAbstractListModel, QRect, QTextBrowser, QStringListModel, QMenu,
QCursor, QHBoxLayout, QPushButton, QSizePolicy)
QCursor, QHBoxLayout, QPushButton, QSizePolicy, QSplitter)
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata import authors_to_string, rating_to_stars
@ -317,8 +317,6 @@ class Comments(HTMLDisplay): # {{{
def __init__(self, parent=None):
HTMLDisplay.__init__(self, parent)
self.setAcceptDrops(False)
self.setMaximumWidth(300)
self.setMinimumWidth(300)
self.wait_timer = QTimer(self)
self.wait_timer.timeout.connect(self.update_wait)
self.wait_timer.setInterval(800)
@ -374,13 +372,6 @@ class Comments(HTMLDisplay): # {{{
<html>
'''%(c,)
self.setHtml(templ%html)
def sizeHint(self):
# This is needed, because on windows the dialog cannot be resized to
# so that this widgets height become < sizeHint().height(). Qt sets the
# sizeHint to (800, 600), which makes the dialog unusable on smaller
# screens.
return QSize(800, 300)
# }}}
@ -454,31 +445,41 @@ class IdentifyWidget(QWidget): # {{{
self.abort = Event()
self.caches = {}
self.l = l = QGridLayout()
self.setLayout(l)
self.l = l = QVBoxLayout(self)
names = ['<b>'+p.name+'</b>' for p in metadata_plugins(['identify']) if
p.is_configured()]
self.top = QLabel('<p>'+_('calibre is downloading metadata from: ') +
', '.join(names))
self.top.setWordWrap(True)
l.addWidget(self.top, 0, 0)
l.addWidget(self.top)
self.splitter = s = QSplitter(self)
s.setChildrenCollapsible(False)
l.addWidget(s, 100)
self.results_view = ResultsView(self)
self.results_view.book_selected.connect(self.emit_book_selected)
self.get_result = self.results_view.get_result
l.addWidget(self.results_view, 1, 0)
s.addWidget(self.results_view)
self.comments_view = Comments(self)
l.addWidget(self.comments_view, 1, 1)
s.addWidget(self.comments_view)
s.setStretchFactor(0, 2)
s.setStretchFactor(1, 1)
self.results_view.show_details_signal.connect(self.comments_view.show_data)
self.query = QLabel('download starting...')
self.query.setWordWrap(True)
l.addWidget(self.query, 2, 0, 1, 2)
l.addWidget(self.query)
self.comments_view.show_wait()
state = gprefs.get('metadata-download-identify-widget-splitter-state')
if state is not None:
s.restoreState(state)
def save_state(self):
gprefs['metadata-download-identify-widget-splitter-state'] = bytearray(self.splitter.saveState())
def emit_book_selected(self, book):
self.book_selected.emit(book, self.caches)
@ -1091,6 +1092,7 @@ class FullFetch(QDialog): # {{{
def accept(self):
# Prevent the usual dialog accept mechanisms from working
gprefs['metadata_single_gui_geom'] = bytearray(self.saveGeometry())
self.identify_widget.save_state()
if DEBUG_DIALOG:
if self.stack.currentIndex() == 2:
return QDialog.accept(self)

Some files were not shown because too many files have changed in this diff Show More