diff --git a/Changelog.yaml b/Changelog.yaml
index 1e3d4905b1..c57974bec3 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -4,6 +4,243 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
+- version: 0.7.23
+ date: 2010-10-08
+
+ new features:
+ - title: "Drag and drop to Tag Browser. You can use this to conveniently add tags, set series/publisher etc for a group of books"
+
+ - title: "Allow switching of library even when a device is connected"
+
+ - title: "Support for the PD Novel running Kobo"
+
+ - title: "Move check library integrity from preferences to drop down menu accessed by clicking arrow next to calibre icon"
+
+ - title: "Nicer, non-blocking update available notification"
+
+ - title: "E-book viewer: If you choose to remeber last used window size, the state of the Table of Contents view is also remembered"
+ tickets: [7082]
+
+ - title: "Allow moving as well as copying of books to another library"
+
+ - title: "Apple devices: Add support for plugboards"
+
+ - title: "Allow DJVU to be sent to the DR1000"
+
+ bug fixes:
+ - title: "Searching: Fix search expression parser to allow use of escaped double quotes in the search expression"
+
+ - title: "When saving cover images don't re-encode the image data unless absolutely neccessary. This prevents information loss due to JPEG re-compression"
+
+ - title: "Fix regression that broke setting of metadata for some MOBI/AZW/PRC files"
+
+ - title: "Fix regression in last release that could cause download of metadata for multiple files to only download the metadata for a few of them"
+ tickets: [7071]
+
+ - title: "MOBI Output: More tweaking of the margin handling to yield results closer to the input document."
+
+ - title: "Device drivers: Fix regression that could cause geenration of invalid metadata.calibre cache files"
+
+ - title: "Fix saving to disk with ISBN in filename"
+ tickets: [7090]
+
+ - title: "Fix another regression in the ISBNdb.com metadata download plugin"
+
+ - title: "Fix dragging to not interfere with multi-selection. Also dont allow drag and drop from the library to itself"
+
+ - title: "CHM input: handle another class of broken CHM files"
+ tickets: [7058]
+
+
+ new recipes:
+ - title: "Communications of the Association for Computing Machinery"
+ author: jonmisurda
+
+ - title: "Anand Tech"
+ author: "Oliver Niesner"
+
+ - title: "gsp.ro"
+ author: "bucsie"
+
+ - title: "Il Fatto Quotidiano"
+ author: "egilh"
+
+ - title: "Serbian Literature blog and Rusia Hoy"
+ author: "Darko Miletic"
+
+ - title: "Medscape"
+ author: "Tony Stegall"
+
+
+ improved recipes:
+ - The Age
+ - Australian
+ - Wiki news
+ - Times Online
+ - New Yorker
+ - Guardian
+ - Sueddeutsche
+ - HNA
+ - Revista Muy Interesante
+
+- version: 0.7.22
+ date: 2010-10-03
+
+ new features:
+ - title: "Drag and drop books from your calibre library"
+ type: major
+ description: >
+ "You can now drag and drop books from your calibre library. You can drag them to the desktop or to a file explorer, to copy them to your computer. You can drag them to the
+ device icon in calibre to send them to the device. You can also drag and drop books from the device view in calibre to the calibre library icon or the operating
+ system to copy them from the device."
+
+ - title: "There were many minor bug fixes for various bugs caused by the major changes in 0.7.21. So if you have updated to 0.7.21, it is highly recommended you update to 0.7.22"
+
+ - title: "Driver for the VelocityMicro ebook reader device"
+
+ - title: "Add a tweak to control how articles in titles are processed during sorting"
+
+ - title: "Add a new format type 'device_db' to plugboards to control the metadata displayed in book lists on SONY devices."
+
+ bug fixes:
+ - title: "Fix ISBN not being read from filenames in 0.7.21"
+ tickets: [7054]
+
+ - title: "Fix instant Search for text not found causes unhandled exception when conversion jobs are running"
+ tickets: [7043]
+
+ - title: "Fix removing a publisher causes an error in 0.7.21"
+ tickets: [7046]
+
+ - title: "MOBI Output: Fix some images being distorted in 0.7.21"
+ tickets: [7049]
+
+ - title: "Fix regression that broke bulk conversion of books without covers in 0.7.21"
+
+ - title: "Fix regression that broke add and set_metadata commands in calibredb in 0.7.21"
+
+ - title: "Workaround for Qt bug in file open dialogs in linux that causes multiple file selection to ignore files with two or more spaces in the file name"
+
+ - title: "Conversion pipeline: Fix regression in 0.7.21 that broke conversion of LIT/EPUB documents that specified no title in their OPF files"
+
+ - title: "Fix regression that broke iPad driver in 0.7.21"
+
+ improved recipes:
+ - Washington Post
+
+
+- version: 0.7.21
+ date: 2010-10-01
+
+ new features:
+ - title: "Automatic backup of the calibre metadata database"
+ type: major
+ description: >
+ "calibre now automatically backups up the metadata for each book in the library into an individual OPF file in that books' folder. This means that if the calibre metadata database is corrupted, for example by a hard disk failure, you can reconstruct it from these OPF files, without losing any metadata. For the moment, only the backup is implemented, restore will be implemented in the future. The backup happens automatically in the background while calibre is running. The first time you start calibre, all the books will need to be backed up, so you may notice calibre running a little slower than usual."
+
+ - title: "Virtual columns"
+ type: major
+ description: >
+ "You can now add virtual columns to the calibre book list. These are built fro other columns using templates and can be used to, for example, create columns to show the books isbn and avaialbale formats. You can do this by right clicking on a column header and select 'Add your own columns'"
+
+ - title: "calibre templates now much more powerful"
+ type: major
+ description: >
+ "The templates used in calibre in send to device and save to disk have now beome much ore powerful. They can use conditinal text and functions to transforms the replacement text. Also they now have access t metadata in user defined columns. For details see the tutorials section of the User Manual."
+
+ - title: "Metadata plugboards: Allow you to perform sophisticated transformations on the metadata of a book when exporting it from the calibre library."
+ type: major
+ description: >
+ "For example, you can add the series informtion to the title when sendig books to a device. This functionality is accessed from Preferences->Import/Export->Metadata plugboards"
+
+ - title: "User defined columns are now fully integrated into calibre"
+ type: major
+ description: >
+ "User defined columns can nw be used everywhere. In the content server, Search and Replace, to create ondevice collections, and in the save to disk and send to device templates for creating filenames. In addition, user defined metadata is saved to an read back from EPUB/OPF files."
+
+ - title: "Driver for the jetBook Mini"
+
+ - title: "Add tweaks to control which custom columns the content server displays."
+
+ - title: "Bulk downloading of metadata/covers now shows progress and can be canceled"
+
+ - title: "New plugin to download covers from douban.com. It is disabled by default and must be enabled via Preferences->Advanced->Plugins->Cover download plugins"
+
+ - title: "Add option to change titles to title case in the Bulk metadata edit dialog"
+
+ - title: "Add option to bulk metadata edit dialog to force series renumbering to start with a specified value"
+
+ bug fixes:
+ - title: "Fix various bugs that could lead to stale files being left in the calbre library when editing title/author metadata on windows"
+
+ - title: "Fix various regression in the preprocess and de-hyphenation code that broke conversion of some files, especially PDF ones."
+
+ - title: "Alex driver: Fix books not being placed in sub directories. Send covers. And allow sending of FB2"
+ tickets: [6956]
+
+ - title: "MOBI Output: Fix bug that could caused left margins in the MOBI file to have twice the size of the left margins in the input document, when viewed on the pathetic Kindle MOBI renderer"
+
+ - title: "MOBI Input: Interpret blockquotes as having a left margin of 2em not 1em to reflect recent Amazon practice"
+
+ - title: "MOBI Output: Remove transparencies from images. Pathetic Kindle MOBI renderer strikes again"
+
+ - title: "Revert removal of inline toc from news downloaded in MOBI format as this makes it unusable with the pathetic Kindle For PC application"
+
+ - title: "Content server: Remove special characters from filenames in download links to accomodate broken browsers like the one in the Kindle"
+
+ - title: "Conversion pipeline: When rescaling images, dont replace gif image data with jpeg data"
+
+ - title: "EPUB Input: Ignore OPF files in the EPUB whose names start with a period"
+
+ - title: "RTF Output: Handle a larger set of broken images in the input document"
+ tickets: [7003]
+
+ - title: "epub-fix: Handle dates before 1900"
+ tickets: [7002]
+
+ - title: "Welcome wizard: Prevent the user from choosing a non empty folder as her calibre library"
+
+ - title: "Automatically enable the Douban metadata download plugins if the user choose chinese as the interface language in the welcome wizard"
+
+ - title: "Linux DBUS notifier: Fix causing freezes on some DBUS implementations"
+ tickets: [6969]
+
+ - title: "Workaround for windows limitation when reading from network sockets. Should fix issues with large files in calibre libraries on network shares."
+ tickets: [3248]
+
+ new recipes:
+ - title: "BBC Sport"
+ author: "limawhiskey"
+
+ - title: "Revista Muy Interesante "
+ author: "Jefferson Frantz"
+
+ - title: "El Universo - Ecuador and Frederik Pohl's Blog"
+ author: "Darko Miletic"
+
+ - title: "Science News"
+ author: "Starson17"
+
+ - title: "Various Belgian news sources"
+ author: "Lionel Bergeret"
+
+ - title: "Oriental Daily"
+ author: "Larry Chan"
+
+ - title: "Rmf24 - Opinie"
+ author: "Tomasz Dlugosz"
+
+ - title: "Jerusalem Post - French and Howto Geek"
+ author: "Tony Stegall"
+
+
+ improved recipes:
+ - Peter Schiff
+ - Telegraph UK
+ - AJC
+ - Boortz
+ - Scientific American
+
- version: 0.7.20
date: 2010-09-24
diff --git a/imgsrc/console.svg b/imgsrc/console.svg
new file mode 100644
index 0000000000..0d502bb1da
--- /dev/null
+++ b/imgsrc/console.svg
@@ -0,0 +1,4339 @@
+
+
+
diff --git a/imgsrc/lookfeel.svg b/imgsrc/lookfeel.svg
index aeee997941..81a68e2088 100644
--- a/imgsrc/lookfeel.svg
+++ b/imgsrc/lookfeel.svg
@@ -15,13 +15,341 @@
sodipodi:version="0.32"
inkscape:version="0.44"
version="1.0"
- sodipodi:docbase="/home/david/Progetti/oxygen-svn/Oxygen/theme/svg/actions"
- sodipodi:docname="indent.svg"
- inkscape:export-filename="/home/pinheiro/artwork/Oxygen/theme/svg/actions/small/22x22/text_center.png"
- inkscape:export-xdpi="90"
- inkscape:export-ydpi="90">
+ sodipodi:docbase="/home/pinheiro/artwork/Oxygen/theme/svg/actions"
+ sodipodi:docname="color_fill.svg">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -34,62 +362,483 @@
offset="1"
style="stop-color:#000000;stop-opacity:0;" />
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ cx="63.912209"
+ id="radialGradient3297"
+ xlink:href="#linearGradient3291"
+ inkscape:collect="always" />
+ id="linearGradient3207">
-
-
-
+ style="stop-color:#252525;stop-opacity:0;" />
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+ inkscape:window-width="1270"
+ inkscape:window-height="731"
+ inkscape:window-x="54"
+ inkscape:window-y="283">
+
+
@@ -126,215 +873,299 @@
image/svg+xml
-
-
-
- Oxygen team
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+ style="opacity:0.3647541;fill:url(#radialGradient8823);fill-opacity:1.0;fill-rule:nonzero;stroke:none;stroke-width:0.30000001;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:0.33333333"
+ id="path8815"
+ sodipodi:cx="63.84724"
+ sodipodi:cy="121.47017"
+ sodipodi:rx="63.84724"
+ sodipodi:ry="6.5298314"
+ d="M 127.69448 121.47017 A 63.84724 6.5298314 0 1 1 0,121.47017 A 63.84724 6.5298314 0 1 1 127.69448 121.47017 z" />
+ x="116"
+ y="77" />
+ x="-141.37"
+ y="33.53812" />
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id="path1957"
+ d="M 59.474254,68.01234 L 84.311353,109.91412 C 99.16229,103.24193 116.63784,87.654203 119.47193,77.30627 L 95.28071,34.66616 C 92.2947,49.841373 74.123308,61.005415 59.474254,68.01234 z "
+ style="fill:url(#radialGradient2853);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
+ id="path8825"
+ d="M 120.02654,75.206335 C 120.32266,75.733559 121.39183,77.6115 121.60911,77.998357 C 126.44873,86.614931 115.85079,101.69876 97.928243,111.67853 C 80.005674,121.65831 61.552839,122.77092 56.713214,114.15435 C 56.451639,113.68864 55.201826,111.4498 54.8271,110.78262 C 54.951247,112.14901 55.311402,113.41316 55.967372,114.58104 C 58.475106,119.04589 64.53045,120.96675 72.146166,120.51066 C 79.761882,120.05454 89.00973,117.22886 98.089034,112.17327 C 107.16832,107.11767 114.41592,100.79093 118.79317,94.569017 C 123.17043,88.347104 124.66168,82.207073 122.15393,77.742226 C 121.60724,76.768888 120.88864,75.938059 120.02654,75.206335 z "
+ style="opacity:0.78278689;fill:url(#linearGradient8872);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1.26253247;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1" />
+ style="opacity:0.78278689;fill:url(#linearGradient8870);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1.26253247;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
+ d="M 119.58033,74.691495 C 119.87645,75.218718 120.9456,77.096666 121.16289,77.483521 C 126.00251,86.10009 115.40457,101.18392 97.482022,111.1637 C 79.559453,121.14345 61.106618,122.25609 56.266992,113.6395 C 56.005419,113.1738 54.755605,110.93497 54.380878,110.26779 C 54.505026,111.63417 54.865181,112.89832 55.521152,114.0662 C 58.028884,118.53105 64.084228,120.45191 71.699944,119.99581 C 79.315661,119.53971 88.563508,116.71402 97.642812,111.65843 C 106.7221,106.60283 113.9697,100.27609 118.34695,94.054178 C 122.72421,87.832263 124.21545,81.692233 121.7077,77.227385 C 121.16102,76.254047 120.44241,75.423219 119.58033,74.691495 z "
+ id="path8827" />
+ style="fill:#b90000;fill-opacity:1;fill-rule:evenodd;stroke:#ae0000;stroke-width:0.75208664px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="M 15.373706,125.69402 C 22.221465,125.74933 27.996107,124.98911 29.405976,125.0239 C 38.732442,125.25394 61.077222,120.9265 52.141073,115.60666 C 42.378446,109.79479 15.545252,109.95091 5.0973772,115.55487 C -6.3045902,121.67058 8.5270618,125.63869 15.373706,125.69402 z "
+ id="path7755"
+ sodipodi:nodetypes="csssz" />
-
-
-
-
-
-
+ sodipodi:nodetypes="csssz"
+ id="path7898"
+ d="M 15.322403,126.05314 C 22.170162,126.10845 27.970817,125.31015 29.380324,125.35737 C 38.792057,125.67268 61.866384,121.20263 52.397589,115.35014 C 42.733011,109.37665 15.545252,109.43788 4.7382546,115.34966 C -6.6129262,121.55912 8.4757588,125.99781 15.322403,126.05314 z "
+ style="opacity:0.88524631;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient3300);stroke-width:0.077;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:none" />
+ style="opacity:0.43852461;fill:url(#linearGradient2863);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
+ d="M 84.311353,109.91412 C 99.16229,103.24193 116.63784,87.654203 119.47193,77.30627 C 102.01223,47.067434 69.038917,83.579128 84.311353,109.91412 z "
+ id="path2855"
+ sodipodi:nodetypes="ccc" />
+
+
+
+
+
+
+
diff --git a/imgsrc/plugboard.svg b/imgsrc/plugboard.svg
new file mode 100644
index 0000000000..b8451a6b3a
--- /dev/null
+++ b/imgsrc/plugboard.svg
@@ -0,0 +1,300 @@
+
+
+
+
diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js
index 9ee61b1866..f5bf478e0c 100644
--- a/resources/content_server/gui.js
+++ b/resources/content_server/gui.js
@@ -59,14 +59,48 @@ function render_book(book) {
title = title.slice(0, title.length-2);
title += ' ({0} MB) '.format(size);
}
- if (tags) title += 'Tags=[{0}] '.format(tags);
+ title += ''
+ if (tags) {
+ t = tags.split(':&:', 2);
+ m = parseInt(t[0]);
+ tall = t[1].split(',');
+ t = t[1].split(',', m);
+ if (tall.length > m) t[m] = '...'
+ title += 'Tags=[{0}] '.format(t.join(','));
+ }
custcols = book.attr("custcols").split(',')
for ( i = 0; i < custcols.length; i++) {
if (custcols[i].length > 0) {
vals = book.attr(custcols[i]).split(':#:', 2);
+ if (vals[0].indexOf('#T#') == 0) { //startswith
+ vals[0] = vals[0].substr(3, vals[0].length)
+ t = vals[1].split(':&:', 2);
+ m = parseInt(t[0]);
+ t = t[1].split(',', m);
+ if (t.length == m) t[m] = '...';
+ vals[1] = t.join(',');
+ }
title += '{0}=[{1}] '.format(vals[0], vals[1]);
}
}
+ title += ''
+ title += ''
+ if (tags) {
+ t = tags.split(':&:', 2);
+ title += 'Tags=[{0}] '.format(t[1]);
+ }
+ custcols = book.attr("custcols").split(',')
+ for ( i = 0; i < custcols.length; i++) {
+ if (custcols[i].length > 0) {
+ vals = book.attr(custcols[i]).split(':#:', 2);
+ if (vals[0].indexOf('#T#') == 0) { //startswith
+ vals[0] = vals[0].substr(3, vals[0].length)
+ vals[1] = (vals[1].split(':&:', 2))[1];
+ }
+ title += '{0}=[{1}] '.format(vals[0], vals[1]);
+ }
+ }
+ title += ''
title += ''.format(id);
title += '
{0}
'.format(comments)
// Render authors cell
@@ -170,11 +204,15 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
var cover = row.find('img').attr('src');
var collapsed = row.find('.comments').css('display') == 'none';
$("#book_list tbody tr * .comments").css('display', 'none');
+ $("#book_list tbody tr * .tagdata_short").css('display', 'inherit');
+ $("#book_list tbody tr * .tagdata_long").css('display', 'none');
$('#cover_pane').css('visibility', 'hidden');
if (collapsed) {
row.find('.comments').css('display', 'inherit');
$('#cover_pane img').attr('src', cover);
$('#cover_pane').css('visibility', 'visible');
+ row.find(".tagdata_short").css('display', 'none');
+ row.find(".tagdata_long").css('display', 'inherit');
}
});
diff --git a/resources/content_server/mobile.css b/resources/content_server/mobile.css
index 9be755b954..0022b2a134 100644
--- a/resources/content_server/mobile.css
+++ b/resources/content_server/mobile.css
@@ -13,6 +13,8 @@
font-size: 1.25em;
border: 1px solid black;
text-color: black;
+ text-decoration: none;
+ margin-right: 0.5em;
background-color: #ddd;
border-top: 1px solid ThreeDLightShadow;
border-right: 1px solid ButtonShadow;
@@ -70,6 +72,7 @@ div.navigation {
padding-right: 0em;
overflow: hidden;
text-align: center;
+ text-decoration: none;
}
#logo {
diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index 71bf2c6c37..48845da920 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -83,6 +83,16 @@ title_series_sorting = 'library_order'
# strictly_alphabetic, it would remain "The Client".
save_template_title_series_sorting = 'library_order'
+# Set the list of words that are to be considered 'articles' when computing the
+# title sort strings. The list is a regular expression, with the articles
+# separated by 'or' bars. Comparisons are case insensitive, and that cannot be
+# changed. Changes to this tweak won't have an effect until the book is modified
+# in some way. If you enter an invalid pattern, it is silently ignored.
+# To disable use the expression: '^$'
+# Default: '^(A|The|An)\s+'
+title_sort_articles=r'^(A|The|An)\s+'
+
+
# Specify a folder that calibre should connect to at startup using
# connect_to_folder. This must be a full path to the folder. If the folder does
# not exist when calibre starts, it is ignored. If there are '\' characters in
@@ -93,6 +103,37 @@ save_template_title_series_sorting = 'library_order'
auto_connect_to_folder = ''
+# Specify renaming rules for sony collections. Collections on Sonys are named
+# depending upon whether the field is standard or custom. A collection derived
+# from a standard field is named for the value in that field. For example, if
+# the standard 'series' column contains the name 'Darkover', then the series
+# will be named 'Darkover'. A collection derived from a custom field will have
+# the name of the field added to the value. For example, if a custom series
+# column named 'My Series' contains the name 'Darkover', then the collection
+# will be named 'Darkover (My Series)'. If two books have fields that generate
+# the same collection name, then both books will be in that collection. This
+# tweak lets you specify for a standard or custom field the value to be put
+# inside the parentheses. You can use it to add a parenthetical description to a
+# standard field, for example 'Foo (Tag)' instead of the 'Foo'. You can also use
+# it to force multiple fields to end up in the same collection. For example, you
+# could force the values in 'series', '#my_series_1', and '#my_series_2' to
+# appear in collections named 'some_value (Series)', thereby merging all of the
+# fields into one set of collections. The syntax of this tweak is
+# {'field_lookup_name':'name_to_use', 'lookup_name':'name', ...}
+# Example 1: I want three series columns to be merged into one set of
+# collections. If the column lookup names are 'series', '#series_1' and
+# '#series_2', and if I want nothing in the parenthesis, then the value to use
+# in the tweak value would be:
+# sony_collection_renaming_rules={'series':'', '#series_1':'', '#series_2':''}
+# Example 2: I want the word '(Series)' to appear on collections made from
+# series, and the word '(Tag)' to appear on collections made from tags. Use:
+# sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
+# Example 3: I want 'series' and '#myseries' to be merged, and for the
+# collection name to have '(Series)' appended. The renaming rule is:
+# sony_collection_renaming_rules={'series':'Series', '#myseries':'Series'}
+sony_collection_renaming_rules={}
+
+
# Create search terms to apply a query across several built-in search terms.
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
# Example: create the term 'myseries' that when used as myseries:foo would
@@ -114,6 +155,24 @@ add_new_book_tags_when_importing_books = False
# Set the maximum number of tags to show per book in the content server
max_content_server_tags_shown=5
+# Set custom metadata fields that the content server will or will not display.
+# content_server_will_display is a list of custom fields to be displayed.
+# content_server_wont_display is a list of custom fields not to be displayed.
+# wont_display has priority over will_display.
+# The special value '*' means all custom fields.
+# Defaults:
+# content_server_will_display = ['*']
+# content_server_wont_display = ['']
+# Examples:
+# To display only the custom fields #mytags and #genre:
+# content_server_will_display = ['#mytags', '#genre']
+# content_server_wont_display = ['']
+# To display all fields except #mycomments:
+# content_server_will_display = ['*']
+# content_server_wont_display['#mycomments']
+content_server_will_display = ['*']
+content_server_wont_display = ['']
+
# Set the maximum number of sort 'levels' that calibre will use to resort the
# library after certain operations such as searches or device insertion. Each
diff --git a/resources/images/console.png b/resources/images/console.png
new file mode 100644
index 0000000000..168f0ccb2a
Binary files /dev/null and b/resources/images/console.png differ
diff --git a/resources/images/lookfeel.png b/resources/images/lookfeel.png
index 4d3690bf8d..7da9f08e64 100644
Binary files a/resources/images/lookfeel.png and b/resources/images/lookfeel.png differ
diff --git a/resources/images/news/anandtech.png b/resources/images/news/anandtech.png
new file mode 100644
index 0000000000..19270d99a9
Binary files /dev/null and b/resources/images/news/anandtech.png differ
diff --git a/resources/images/news/eluniverso_ec.png b/resources/images/news/eluniverso_ec.png
new file mode 100644
index 0000000000..c5a8918bcd
Binary files /dev/null and b/resources/images/news/eluniverso_ec.png differ
diff --git a/resources/images/news/rusiahoy.png b/resources/images/news/rusiahoy.png
new file mode 100644
index 0000000000..6fbdefa6a5
Binary files /dev/null and b/resources/images/news/rusiahoy.png differ
diff --git a/resources/images/news/science_news_recent_issues.png b/resources/images/news/science_news_recent_issues.png
new file mode 100644
index 0000000000..355fb8c3a6
Binary files /dev/null and b/resources/images/news/science_news_recent_issues.png differ
diff --git a/resources/images/plugboard.png b/resources/images/plugboard.png
new file mode 100644
index 0000000000..db9e8e89f0
Binary files /dev/null and b/resources/images/plugboard.png differ
diff --git a/resources/recipes/ajc.recipe b/resources/recipes/ajc.recipe
index ea989b4b4c..031fe13170 100644
--- a/resources/recipes/ajc.recipe
+++ b/resources/recipes/ajc.recipe
@@ -16,6 +16,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'The AJC'
timefmt = ' [%a,%d %B %Y %I:%M %p]'
__author__ = 'TonytheBookworm'
+ language = 'en'
description = 'News from Atlanta and USA'
publisher = 'The Atlanta Journal'
category = 'news, politics, USA'
diff --git a/resources/recipes/anandtech.recipe b/resources/recipes/anandtech.recipe
new file mode 100644
index 0000000000..aa10084070
--- /dev/null
+++ b/resources/recipes/anandtech.recipe
@@ -0,0 +1,32 @@
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal '
+
+'''
+Fetch Anandtech.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class anan(BasicNewsRecipe):
+
+ title = 'Anandtech'
+ description = 'comprehensive Hardware Tests'
+ __author__ = 'Oliver Niesner'
+ use_embedded_content = False
+ language = 'en'
+ timefmt = ' [%d %b %Y]'
+ max_articles_per_feed = 40
+ no_stylesheets = True
+ remove_javascript = True
+ encoding = 'utf-8'
+
+ remove_tags=[dict(name='a', attrs={'style':'width:110px; margin-top:0px;text-align:center;'}),
+ dict(name='a', attrs={'style':'width:110px; margin-top:0px; margin-right:20px;text-align:center;'})]
+
+ feeds = [ ('Anandtech', 'http://www.anandtech.com/rss/')]
+
+ def print_version(self,url):
+ return url.replace('/show/', '/print/')
+
+
diff --git a/resources/recipes/bbc_sport.recipe b/resources/recipes/bbc_sport.recipe
new file mode 100644
index 0000000000..a861ed0b50
--- /dev/null
+++ b/resources/recipes/bbc_sport.recipe
@@ -0,0 +1,65 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, limawhiskey '
+'''
+news.bbc.co.uk/sport/
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BBC(BasicNewsRecipe):
+ title = 'BBC Sport'
+ __author__ = 'limawhiskey, Darko Miletic, Starson17'
+ description = 'Sports news from UK. A fast version that does not download pictures'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf8'
+ publisher = 'BBC'
+ category = 'sport, news, UK, world'
+ language = 'en_GB'
+ publication_type = 'newsportal'
+ extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+ preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')]
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ ,'linearize_tables': True
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':['ds','mxb']}),
+ dict(attrs={'class':['story-body','storybody']})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['storyextra', 'share-help', 'embedded-hyper', \
+ 'story-feature wide ', 'story-feature narrow', 'cap', 'caption', 'q1', 'sihf', \
+ 'mva', 'videoInStoryC', 'sharesb', 'mvtb']}),
+ dict(name=['img']), dict(name=['br'])
+ ]
+
+ remove_attributes = ['width','height']
+
+ feeds = [
+ ('Sport Front Page', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
+ ('Football', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml'),
+ ('Cricket', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml'),
+ ('Formula 1', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/formula_one/rss.xml'),
+ ('Commonwealth Games', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/commonwealth_games/delhi_2010/rss.xml'),
+ ('Golf', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml'),
+ ('Rugby Union', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml'),
+ ('Rugby League', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml'),
+ ('Tennis', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml'),
+ ('Motorsport', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml'),
+ ('Boxing', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml'),
+ ('Athletics', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml'),
+ ('Snooker', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml'),
+ ('Horse Racing', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml'),
+ ('Cycling', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml'),
+ ('Disability Sport', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml'),
+ ('Other Sport', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml'),
+ ('Olympics 2012', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/olympics/london_2012/rss.xml'),
+ ]
diff --git a/resources/recipes/cacm.recipe b/resources/recipes/cacm.recipe
new file mode 100644
index 0000000000..1618bae742
--- /dev/null
+++ b/resources/recipes/cacm.recipe
@@ -0,0 +1,37 @@
+import datetime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1286242553(BasicNewsRecipe):
+ title = u'CACM'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ needs_subscription = True
+ feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')]
+ language = 'en'
+ __author__ = 'jonmisurda'
+ no_stylesheets = True
+ remove_tags = [
+ dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \
+ 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']})
+ ]
+ cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
+
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('https://cacm.acm.org/login')
+ br.select_form(nr=1)
+ br['current_member[user]'] = self.username
+ br['current_member[passwd]'] = self.password
+ br.submit()
+ return br
+
+ def get_cover_url(self):
+ now = datetime.datetime.now()
+
+ cover_url = None
+ soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month))
+ cover_item = soup.find('img',attrs={'alt':'magazine cover image'})
+ if cover_item:
+ cover_url = cover_item['src']
+ return cover_url
diff --git a/resources/recipes/china_economic_net.recipe b/resources/recipes/china_economic_net.recipe
index 825ea007c2..6fcb2e36ed 100644
--- a/resources/recipes/china_economic_net.recipe
+++ b/resources/recipes/china_economic_net.recipe
@@ -26,7 +26,7 @@ class AdvancedUserRecipe1278162597(BasicNewsRecipe):
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
- language = 'zh-cn'
+ language = 'zh_CN'
encoding = 'gb2312'
conversion_options = {'linearize_tables':True}
diff --git a/resources/recipes/cinebel_be.recipe b/resources/recipes/cinebel_be.recipe
new file mode 100644
index 0000000000..ec76bfc894
--- /dev/null
+++ b/resources/recipes/cinebel_be.recipe
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Lionel Bergeret '
+'''
+cinebel.be
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Cinebel(BasicNewsRecipe):
+ title = u'Cinebel'
+ __author__ = u'Lionel Bergeret'
+ description = u'Cinema news from Belgium in French'
+ publisher = u'cinebel.be'
+ category = 'news, cinema, movie, Belgium'
+ oldest_article = 3
+ encoding = 'utf8'
+ language = 'fr_BE'
+
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ use_embedded_content = False
+ timefmt = ' [%d %b %Y]'
+
+ keep_only_tags = [
+ dict(name = 'span', attrs = {'class': 'movieMainTitle'})
+ ,dict(name = 'div', attrs = {'id': 'filmPoster'})
+ ,dict(name = 'div', attrs = {'id': 'filmDefinition'})
+ ,dict(name = 'div', attrs = {'id': 'synopsis'})
+ ]
+
+ feeds = [
+ (u'Les sorties de la semaine' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=0' )
+ ,(u'Top 10' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2' )
+ ]
+
+ def get_cover_url(self):
+ cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif'
+ return cover_url
diff --git a/resources/recipes/dhnet_be.recipe b/resources/recipes/dhnet_be.recipe
new file mode 100644
index 0000000000..ef4d1736e3
--- /dev/null
+++ b/resources/recipes/dhnet_be.recipe
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Lionel Bergeret '
+'''
+dhnet.be
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DHNetBe(BasicNewsRecipe):
+ title = u'La Derniere Heure'
+ __author__ = u'Lionel Bergeret'
+ description = u'News from Belgium in French'
+ publisher = u'dhnet.be'
+ category = 'news, Belgium'
+ oldest_article = 3
+ language = 'fr_BE'
+
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ use_embedded_content = False
+ timefmt = ' [%d %b %Y]'
+
+ keep_only_tags = [
+ dict(name = 'div', attrs = {'id': 'articleText'})
+ ,dict(name = 'div', attrs = {'id': 'articlePicureAndLinks'})
+ ]
+
+ feeds = [
+ (u'La Une' , u'http://www.dhnet.be/rss' )
+ ,(u'La Une Sports' , u'http://www.dhnet.be/rss/dhsports/' )
+ ,(u'La Une Info' , u'http://www.dhnet.be/rss/dhinfos/' )
+ ]
+
+ def get_cover_url(self):
+ cover_url = strftime('http://pdf-online.dhnet.be/pdfonline/image/%Y%m%d/dh_%Y%m%d_nam_infoge_001.pdf.L.jpg')
+ return cover_url
diff --git a/resources/recipes/eluniverso_ec.recipe b/resources/recipes/eluniverso_ec.recipe
new file mode 100644
index 0000000000..a0e8b46474
--- /dev/null
+++ b/resources/recipes/eluniverso_ec.recipe
@@ -0,0 +1,63 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+eluniverso.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElUniverso_Ecuador(BasicNewsRecipe):
+ title = 'El Universo - Ecuador'
+ __author__ = 'Darko Miletic'
+ description = 'Noticias del Ecuador y el resto del mundo'
+ publisher = 'El Universo'
+ category = 'news, politics, Ecuador'
+ oldest_article = 2
+ max_articles_per_feed = 200
+ no_stylesheets = True
+ encoding = 'utf8'
+ use_embedded_content = False
+ language = 'es'
+ remove_empty_feeds = True
+ publication_type = 'newspaper'
+ masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
+ extra_css = """
+ body{font-family: Verdana,Arial,Helvetica,sans-serif; color: #333333 }
+ h2{font-family: Georgia,"Times New Roman",Times,serif; color: #1B2D60}
+ """
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher' : publisher
+ , 'language' : language
+ }
+
+ remove_tags = [
+ dict(attrs={'class':['flechs','multiBox','colRecursos']})
+ ,dict(name=['meta','link','embed','object','iframe','base'])
+ ]
+ keep_only_tags = [dict(attrs={'class':'Nota'})]
+ remove_tags_after = dict(attrs={'id':'TextoPrint'})
+ remove_tags_before = dict(attrs={'id':'FechaPrint'})
+
+ feeds = [
+ (u'Portada' , u'http://www.eluniverso.com/rss/portada.xml' )
+ ,(u'Politica' , u'http://www.eluniverso.com/rss/politica.xml' )
+ ,(u'Economia' , u'http://www.eluniverso.com/rss/economia.xml' )
+ ,(u'Sucesos' , u'http://www.eluniverso.com/rss/sucesos.xml' )
+ ,(u'Migracion' , u'http://www.eluniverso.com/rss/migrantes_tema.xml' )
+ ,(u'El Pais' , u'http://www.eluniverso.com/rss/elpais.xml' )
+ ,(u'Internacionales' , u'http://www.eluniverso.com/rss/internacionales.xml' )
+ ,(u'Deportes' , u'http://www.eluniverso.com/rss/deportes.xml' )
+ ,(u'Gran Guayaquill' , u'http://www.eluniverso.com/rss/gran_guayaquil.xml' )
+ ,(u'Entretenimiento' , u'http://www.eluniverso.com/rss/arteyespectaculos.xml' )
+ ,(u'Vida' , u'http://www.eluniverso.com/rss/tuvida.xml' )
+ ,(u'Opinion' , u'http://www.eluniverso.com/rss/opinion.xml' )
+ ]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ return soup
+
diff --git a/resources/recipes/gsp.recipe b/resources/recipes/gsp.recipe
new file mode 100644
index 0000000000..90a8eecfe6
--- /dev/null
+++ b/resources/recipes/gsp.recipe
@@ -0,0 +1,20 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1286351181(BasicNewsRecipe):
+ title = u'gsp.ro'
+ __author__ = 'bucsie'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ language='ro'
+ cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg'
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}),
+ dict(name='div', attrs={'id':'icons'})
+ ]
+ remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'})
+
+ feeds = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')]
+
+ def print_version(self, url):
+ return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):]
diff --git a/resources/recipes/guardian.recipe b/resources/recipes/guardian.recipe
index 344e061c26..a4329d279e 100644
--- a/resources/recipes/guardian.recipe
+++ b/resources/recipes/guardian.recipe
@@ -8,10 +8,16 @@ www.guardian.co.uk
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
+from datetime import date
class Guardian(BasicNewsRecipe):
- title = u'The Guardian'
+ title = u'The Guardian / The Observer'
+ if date.today().weekday() == 6:
+ base_url = "http://www.guardian.co.uk/theobserver"
+ else:
+ base_url = "http://www.guardian.co.uk/theguardian"
+
__author__ = 'Seabound and Sujata Raman'
language = 'en_GB'
@@ -19,6 +25,10 @@ class Guardian(BasicNewsRecipe):
max_articles_per_feed = 100
remove_javascript = True
+ # List of section titles to ignore
+ # For example: ['Sport']
+ ignore_sections = []
+
timefmt = ' [%a, %d %b %Y]'
keep_only_tags = [
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
@@ -28,6 +38,7 @@ class Guardian(BasicNewsRecipe):
dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
dict(name='ul', attrs={'class':["pagination"]}),
dict(name='ul', attrs={'id':["content-actions"]}),
+ #dict(name='img'),
]
use_embedded_content = False
@@ -43,18 +54,6 @@ class Guardian(BasicNewsRecipe):
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
'''
- feeds = [
- ('Front Page', 'http://www.guardian.co.uk/rss'),
- ('Business', 'http://www.guardian.co.uk/business/rss'),
- ('Sport', 'http://www.guardian.co.uk/sport/rss'),
- ('Culture', 'http://www.guardian.co.uk/culture/rss'),
- ('Money', 'http://www.guardian.co.uk/money/rss'),
- ('Life & Style', 'http://www.guardian.co.uk/lifeandstyle/rss'),
- ('Travel', 'http://www.guardian.co.uk/travel/rss'),
- ('Environment', 'http://www.guardian.co.uk/environment/rss'),
- ('Comment','http://www.guardian.co.uk/commentisfree/rss'),
- ]
-
def get_article_url(self, article):
url = article.get('guid', None)
if '/video/' in url or '/flyer/' in url or '/quiz/' in url or \
@@ -76,7 +75,8 @@ class Guardian(BasicNewsRecipe):
return soup
def find_sections(self):
- soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
+ # soup = self.index_to_soup("http://www.guardian.co.uk/theobserver")
+ soup = self.index_to_soup(self.base_url)
# find cover pic
img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
if img is not None:
@@ -113,13 +113,10 @@ class Guardian(BasicNewsRecipe):
try:
feeds = []
for title, href in self.find_sections():
- feeds.append((title, list(self.find_articles(href))))
+ if not title in self.ignore_sections:
+ feeds.append((title, list(self.find_articles(href))))
return feeds
except:
raise NotImplementedError
- def postprocess_html(self,soup,first):
- return soup.findAll('html')[0]
-
-
diff --git a/resources/recipes/hna.recipe b/resources/recipes/hna.recipe
index e04837bd17..6e843800ee 100644
--- a/resources/recipes/hna.recipe
+++ b/resources/recipes/hna.recipe
@@ -30,21 +30,33 @@ class hnaDe(BasicNewsRecipe):
dict(id='superbanner'),
dict(id='navigation'),
dict(id='skyscraper'),
+ dict(id='idNavigationWrap'),
+ dict(id='idHeaderSearchForm'),
+ dict(id='idLoginBarWrap'),
+ dict(id='idAccountButtons'),
+ dict(id='idHeadButtons'),
+ dict(id='idBoxesWrap'),
dict(id=''),
dict(name='span'),
dict(name='ul', attrs={'class':'linklist'}),
dict(name='a', attrs={'href':'#'}),
dict(name='div', attrs={'class':'hlist'}),
+ dict(name='li', attrs={'class':'idButton idIsLoginGroup idHeaderRegister '}),
+ dict(name='li', attrs={'class':'idVideoBar idFirst'}),
+ dict(name='li', attrs={'class':'idSetStartPageLink idLast'}),
+ dict(name='li', attrs={'class':'idKinderNetzBar idLast'}),
+ dict(name='li', attrs={'class':'idFotoBar '}),
dict(name='div', attrs={'class':'subc noprint'}),
+ dict(name='div', attrs={'class':'idBreadcrumb'}),
+ dict(name='div', attrs={'class':'idLay idAdvertising idClStandard '}),
+ dict(name='span', attrs={'class':'idHeadLineIntro'}),
dict(name='p', attrs={'class':'breadcrumb'}),
dict(name='a', attrs={'style':'cursor:hand'}),
- dict(name='p', attrs={'class':'h5'})]
+ dict(name='p', attrs={'class':'h5'}),
+ dict(name='p', attrs={'class':'idMoreEnd'})]
#remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
- remove_tags_after = [dict(name='a', attrs={'href':'#'})]
+ remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})]
feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]
-
-
-
diff --git a/resources/recipes/il_fatto.recipe b/resources/recipes/il_fatto.recipe
new file mode 100644
index 0000000000..69ad645b94
--- /dev/null
+++ b/resources/recipes/il_fatto.recipe
@@ -0,0 +1,30 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1286477122(BasicNewsRecipe):
+ title = u'Il Fatto Quotidiano'
+ oldest_article = 7
+ max_articles_per_feed = 25
+ language = 'it'
+ __author__ = 'egilh'
+
+ feeds = [
+ (u'Politica & Palazzo', u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
+ (u'Giustizia & impunit\xe0', u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
+ (u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
+ (u'Economia & Lobby', u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
+ (u'Lavoro & precari', u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
+ (u'Ambiente & Veleni', u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
+ (u'Sport & miliardi', u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
+ (u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'),
+ (u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'),
+ (u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
+ (u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'),
+ (u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'),
+ (u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'),
+ (u'Piacere quotidiano', u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
+ (u'Cervelli in fuga', u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
+ (u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'),
+ (u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/')
+]
+
diff --git a/resources/recipes/journalgazette.recipe b/resources/recipes/journalgazette.recipe
index 406917f5ce..4f12e92280 100644
--- a/resources/recipes/journalgazette.recipe
+++ b/resources/recipes/journalgazette.recipe
@@ -11,6 +11,7 @@ class AdvancedUserRecipe1283666183(BasicNewsRecipe):
title = u'Journal Gazette Ft. Wayne IN'
__author__ = 'cynvision'
oldest_article = 1
+ language = 'en'
max_articles_per_feed = 8
no_stylesheets = True
remove_javascript = True
diff --git a/resources/recipes/lalibre_be.recipe b/resources/recipes/lalibre_be.recipe
new file mode 100644
index 0000000000..53e346bf12
--- /dev/null
+++ b/resources/recipes/lalibre_be.recipe
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Lionel Bergeret '
+'''
+lalibre.be
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LaLibre(BasicNewsRecipe):
+ title = u'La Libre Belgique'
+ __author__ = u'Lionel Bergeret'
+ description = u'News from Belgium in French'
+ publisher = u'lalibre.be'
+ category = 'news, Belgium'
+ oldest_article = 3
+ language = 'fr_BE'
+
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ use_embedded_content = False
+ timefmt = ' [%d %b %Y]'
+
+ keep_only_tags = [
+ dict(name = 'div', attrs = {'id': 'articleHat'})
+ ,dict(name = 'p', attrs = {'id': 'publicationDate'})
+ ,dict(name = 'div', attrs = {'id': 'articleText'})
+ ]
+
+ feeds = [
+ (u'L\'actu' , u'http://www.lalibre.be/rss/?section=10' )
+ ,(u'Culture' , u'http://www.lalibre.be/rss/?section=5' )
+ ,(u'Economie' , u'http://www.lalibre.be/rss/?section=3' )
+ ,(u'Libre Entreprise' , u'http://www.lalibre.be/rss/?section=904' )
+ ,(u'Sports' , u'http://www.lalibre.be/rss/?section=2' )
+ ,(u'Societe' , u'http://www.lalibre.be/rss/?section=12' )
+ ]
+
+ def get_cover_url(self):
+ cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg')
+ return cover_url
diff --git a/resources/recipes/lameuse_be.recipe b/resources/recipes/lameuse_be.recipe
new file mode 100644
index 0000000000..03b7f84a5f
--- /dev/null
+++ b/resources/recipes/lameuse_be.recipe
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Lionel Bergeret '
+'''
+lameuse.be
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LaMeuse(BasicNewsRecipe):
+ title = u'La Meuse'
+ __author__ = u'Lionel Bergeret'
+ description = u'News from Belgium in French'
+ publisher = u'lameuse.be'
+ category = 'news, Belgium'
+ oldest_article = 3
+ encoding = 'utf8'
+ language = 'fr_BE'
+
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ use_embedded_content = False
+ timefmt = ' [%d %b %Y]'
+
+ keep_only_tags = [
+ dict(name = 'div', attrs = {'id': 'article'})
+ ]
+
+ remove_tags = [
+ dict(name = 'div', attrs = {'class': 'sb-group'})
+ ,dict(name = 'div', attrs = {'id': 'share'})
+ ,dict(name = 'div', attrs = {'id': 'commentaires'})
+ ]
+
+ feeds = [
+ (u'Actualite', u'http://www.lameuse.be/services/fils_rss/actualite/index.xml' )
+ ,(u'Belgique', u'http://www.lameuse.be/services/fils_rss/actualite/belgique/index.xml' )
+ ,(u'Monde', u'http://www.lameuse.be/services/fils_rss/actualite/monde/index.xml' )
+ ,(u'Societe', u'http://www.lameuse.be/services/fils_rss/actualite/societe/index.xml' )
+ ,(u'Faits Divers', u'http://www.lameuse.be/services/fils_rss/actualite/faits_divers/index.xml' )
+ ,(u'Economie', u'http://www.lameuse.be/services/fils_rss/actualite/economie/index.xml' )
+ ,(u'Science', u'http://www.lameuse.be/services/fils_rss/actualite/science/index.xml' )
+ ,(u'Sante', u'http://www.lameuse.be/services/fils_rss/actualite/sante/index.xml' )
+ ,(u'Insolite', u'http://www.lameuse.be/services/fils_rss/magazine/insolite/index.xml' )
+ ,(u'Cinema', u'http://www.lameuse.be/services/fils_rss/culture/cinema/index.xml' )
+ ,(u'Musique', u'http://www.lameuse.be/services/fils_rss/culture/musique/index.xml' )
+ ,(u'Livres', u'http://www.lameuse.be/services/fils_rss/culture/livres/index.xml' )
+ ]
+
+ def get_cover_url(self):
+ cover_url = strftime('http://pdf.lameuse.be/pdf/lameuse_%Y-%m-%d_LIEG_ACTUALITE_1.PDF')
+ return cover_url
diff --git a/resources/recipes/lavenir_be.recipe b/resources/recipes/lavenir_be.recipe
new file mode 100644
index 0000000000..68be449ae5
--- /dev/null
+++ b/resources/recipes/lavenir_be.recipe
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Lionel Bergeret '
+'''
+lavenir.net
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LAvenir(BasicNewsRecipe):
+ title = u'L\'Avenir'
+ __author__ = u'Lionel Bergeret'
+ description = u'News from Belgium in French'
+ publisher = u'lavenir.net'
+ category = 'news, Belgium'
+ oldest_article = 3
+ encoding = 'utf8'
+ language = 'fr_BE'
+
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ use_embedded_content = False
+ timefmt = ' [%d %b %Y]'
+
+ keep_only_tags = [
+ dict(name = 'div', attrs = {'class': 'photo'})
+ ,dict(name = 'p', attrs = {'class': 'intro'})
+ ,dict(name = 'div', attrs = {'class': 'article-body'})
+ ]
+
+ feeds = [
+ (u'Belgique' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1§ion=info&info=df156511-c24f-4f21-81c3-a5d439a9cf4b' )
+ ,(u'Monde' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1§ion=info&info=1642237c-66b9-4e8a-a8c1-288d61fefe7e' )
+ ,(u'Societe' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1§ion=info&info=12e1a2f4-7e03-4cf1-afec-016869072317' )
+ ]
+
+ def get_cover_url(self):
+ cover_url = 'http://www.lavenir.net/extra/Static/journal/Pdf/1/UNE_Nationale.PDF'
+ return cover_url
diff --git a/resources/recipes/lesoir_be.recipe b/resources/recipes/lesoir_be.recipe
new file mode 100644
index 0000000000..6b6891c3b8
--- /dev/null
+++ b/resources/recipes/lesoir_be.recipe
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Lionel Bergeret '
+'''
+lesoir.be
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LeSoirBe(BasicNewsRecipe):
+ title = u'Le Soir'
+ __author__ = u'Lionel Bergeret'
+ description = u'News from Belgium in French'
+ publisher = u'lesoir.be'
+ category = 'news, Belgium'
+ oldest_article = 3
+ language = 'fr_BE'
+
+ max_articles_per_feed = 20
+ no_stylesheets = True
+ use_embedded_content = False
+ timefmt = ' [%d %b %Y]'
+
+ keep_only_tags = [
+ dict(name = 'div', attrs = {'id': 'story_head'})
+ ,dict(name = 'div', attrs = {'id': 'story_body'})
+ ]
+
+ remove_tags = [
+ dict(name='form', attrs={'id':'story_actions'})
+ ,dict(name='div', attrs={'id':'sb-share'})
+ ,dict(name='div', attrs={'id':'sb-subscribe'})
+ ]
+
+ feeds = [
+ (u'Belgique' , u'http://www.lesoir.be/actualite/belgique/rss.xml' )
+ ,(u'France' , u'http://www.lesoir.be/actualite/france/rss.xml' )
+ ,(u'Monde' , u'http://www.lesoir.be/actualite/monde/rss.xml' )
+ ,(u'Regions' , u'http://www.lesoir.be/regions/rss.xml' )
+ ,(u'Vie du Net' , u'http://www.lesoir.be/actualite/vie_du_net/rss.xml' )
+ ,(u'Petite Gazette' , u'http://www.lesoir.be/actualite/sciences/rss.xml' )
+ ]
+
+ def get_cover_url(self):
+ cover_url = strftime( 'http://pdf.lesoir.be/pdf/%Y-%m-%d_BRUX_UNE_1.PDF')
+ return cover_url
diff --git a/resources/recipes/medscape.recipe b/resources/recipes/medscape.recipe
new file mode 100644
index 0000000000..ef406c64dc
--- /dev/null
+++ b/resources/recipes/medscape.recipe
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__author__ = 'Tony Stegall'
+__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobileread.com'
+__version__ = '1'
+__date__ = '01, October 2010'
+__docformat__ = 'English'
+
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class MedScrape(BasicNewsRecipe):
+
+ title = 'MedScape'
+ __author__ = 'Tony Stegall'
+ description = 'Nursing News'
+ language = 'en'
+ timefmt = ' [%a, %d %b, %Y]'
+ needs_subscription = True
+ masthead_url = 'http://images.medscape.com/pi/global/header/sp/bg-sp-medscape.gif'
+ no_stylesheets = True
+ remove_javascript = True
+ conversion_options = {'linearize_tables' : True}
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+
+
+ p.authors{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
+ p.postingdate{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
+ h2{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
+
+
+ p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+ '''
+
+ remove_tags = [dict(name='div', attrs={'class':['closewindow2']}),
+ dict(name='div', attrs={'id': ['basicheaderlinks']})
+ ]
+
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('https://profreg.medscape.com/px/getlogin.do')
+ br.select_form(name='LoginForm')
+ br['userId'] = self.username
+ br['password'] = self.password
+ br.submit()
+ return br
+
+ feeds = [
+ ('MedInfo', 'http://www.medscape.com/cx/rssfeeds/2685.xml'),
+ ]
+
+ def print_version(self,url):
+ #the original url is: http://www.medscape.com/viewarticle/728955?src=rss
+ #the print url is: http://www.medscape.com/viewarticle/728955_print
+ print_url = url.partition('?')[0] +'_print'
+ #print 'the printable version is: ',print_url
+ return print_url
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(attrs={'style':True}):
+ del item['style']
+ return soup
diff --git a/resources/recipes/new_yorker.recipe b/resources/recipes/new_yorker.recipe
index d19c5fb6b0..87dea4534b 100644
--- a/resources/recipes/new_yorker.recipe
+++ b/resources/recipes/new_yorker.recipe
@@ -1,50 +1,57 @@
-#!/usr/bin/env python
-
__license__ = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic '
+__copyright__ = '2008-2010, Darko Miletic '
'''
newyorker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
class NewYorker(BasicNewsRecipe):
title = 'The New Yorker'
__author__ = 'Darko Miletic'
description = 'The best of US journalism'
oldest_article = 15
- language = 'en'
-
+ language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'Conde Nast Publications'
category = 'news, politics, USA'
encoding = 'cp1252'
+ publication_type = 'magazine'
+ masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
+ extra_css = """
+ body {font-family: "Times New Roman",Times,serif}
+ .articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
+ .rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
+ """
- keep_only_tags = [dict(name='div', attrs={'id':'printbody'})]
- remove_tags_after = dict(name='div',attrs={'id':'articlebody'})
- remove_tags = [
- dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
- ,dict(name='link')
- ]
-
- feeds = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher' : publisher
+ , 'language' : language
+ }
+
+ keep_only_tags = [dict(name='div', attrs={'id':['articleheads','articleRail','articletext','photocredits']})]
+ remove_tags = [
+ dict(name=['meta','iframe','base','link','embed','object'])
+ ,dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
+ ]
+ remove_attributes = ['lang']
+ feeds = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
def print_version(self, url):
return url + '?printable=true'
- def get_article_url(self, article):
- return article.get('guid', None)
+ def image_url_processor(self, baseurl, url):
+ return url.strip()
- def postprocess_html(self, soup, x):
- body = soup.find('body')
- if body:
- html = soup.find('html')
- if html:
- body.extract()
- html.insert(2, body)
- mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
- soup.head.insert(1,mcharset)
- return soup
+ def get_cover_url(self):
+ cover_url = None
+ soup = self.index_to_soup('http://www.newyorker.com/magazine/toc/')
+ cover_item = soup.find('img',attrs={'id':'inThisIssuePhoto'})
+ if cover_item:
+ cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
+ return cover_url
+
\ No newline at end of file
diff --git a/resources/recipes/nightflier.recipe b/resources/recipes/nightflier.recipe
new file mode 100644
index 0000000000..5fc428cc76
--- /dev/null
+++ b/resources/recipes/nightflier.recipe
@@ -0,0 +1,46 @@
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+nightfliersbookspace.blogspot.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NightfliersBookspace(BasicNewsRecipe):
+ title = "Nightflier's Bookspace"
+ __author__ = 'Darko Miletic'
+ description = 'SF, Fantasy, Books, Knjige'
+ oldest_article = 35
+ max_articles_per_feed = 100
+ language = 'sr'
+ encoding = 'utf-8'
+ no_stylesheets = True
+ use_embedded_content = True
+ publication_type = 'blog'
+ cover_url = ''
+ extra_css = """
+ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+ body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif}
+ .article_description{font-family: sans1, sans-serif}
+ img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
+ """
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : 'SF, fantasy, prevod, blog, Srbija'
+ , 'publisher': 'Ivan Jovanovic'
+ , 'language' : language
+ }
+
+ preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+ feeds = [(u'Posts', u'http://nightfliersbookspace.blogspot.com/feeds/posts/default')]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ return self.adeify_images(soup)
+
+
diff --git a/resources/recipes/onionavclub.recipe b/resources/recipes/onionavclub.recipe
index 5bc069b6ed..038111cbe9 100644
--- a/resources/recipes/onionavclub.recipe
+++ b/resources/recipes/onionavclub.recipe
@@ -12,6 +12,7 @@ class BBC(BasicNewsRecipe):
title = u'The Onion AV Club'
__author__ = 'Stephen Williams'
description = 'Film, Television and Music Reviews'
+ language = 'en'
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
diff --git a/resources/recipes/oriental_daily.recipe b/resources/recipes/oriental_daily.recipe
new file mode 100644
index 0000000000..e8048edcc5
--- /dev/null
+++ b/resources/recipes/oriental_daily.recipe
@@ -0,0 +1,50 @@
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Larry Chan '
+'''
+oriental daily
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class OrientalDaily(BasicNewsRecipe):
+ title = 'Oriental Dailly'
+ __author__ = 'Larry Chan'
+ description = 'News from HK'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ simultaneous_downloads = 5
+ no_stylesheets = True
+ #delay = 1
+ use_embedded_content = False
+ encoding = 'utf8'
+ publisher = 'Oriental Daily'
+ category = 'news, HK, world'
+ language = 'zh'
+ publication_type = 'newsportal'
+ extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ ,'linearize_tables': True
+ }
+
+ remove_tags_after = dict(id='bottomNavCTN')
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':['leadin', 'contentCTN-right']})
+
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':['largeAdsCTN', 'contentCTN-left', 'textAdsCTN', 'footerAds clear']}),
+ dict(name='div', attrs={'id':['articleNav']})
+
+ ]
+
+ remove_attributes = ['width','height','href']
+
+
+ feeds = [(u'Oriental Daily', u'http://orientaldaily.on.cc/rss/news.xml')]
diff --git a/resources/recipes/peterschiff.recipe b/resources/recipes/peterschiff.recipe
index 882dabc43b..842da7f733 100644
--- a/resources/recipes/peterschiff.recipe
+++ b/resources/recipes/peterschiff.recipe
@@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
description = 'Economic commentary'
publisher = 'Euro Pacific capital'
category = 'news, politics, economy, USA'
- oldest_article = 15
+ oldest_article = 25
max_articles_per_feed = 200
no_stylesheets = True
- encoding = 'cp1252'
+ encoding = 'utf8'
use_embedded_content = False
language = 'en'
- country = 'US'
remove_empty_feeds = True
- extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} '
+ extra_css = """
+ body{font-family: Verdana,Times,serif }
+ .field-field-commentary-writer-name{font-weight: bold}
+ .field-items{display: inline}
+ """
conversion_options = {
'comment' : description
@@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
, 'linearize_tables' : True
}
- keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
+ keep_only_tags = [
+ dict(name='h2',attrs={'id':'page-title'})
+ ,dict(name='div',attrs={'class':'node'})
+ ]
+ remove_tags = [
+ dict(name=['meta','link','base','iframe','embed'])
+ ,dict(attrs={'id':'text-zoom'})
+ ]
+ remove_attributes=['track','linktype','lang']
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]
diff --git a/resources/recipes/popscience.recipe b/resources/recipes/popscience.recipe
index 5f66d048a6..fe4a9588fc 100644
--- a/resources/recipes/popscience.recipe
+++ b/resources/recipes/popscience.recipe
@@ -31,7 +31,6 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
#The following will get read of the Gallery: links when found
def preprocess_html(self, soup) :
- print 'SOUP IS: ', soup
weblinks = soup.findAll(['head','h2'])
if weblinks is not None:
for link in weblinks:
diff --git a/resources/recipes/revista_muy.recipe b/resources/recipes/revista_muy.recipe
new file mode 100644
index 0000000000..e452a6f053
--- /dev/null
+++ b/resources/recipes/revista_muy.recipe
@@ -0,0 +1,110 @@
+from calibre.web.feeds.news import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from BeautifulSoup import Tag
+
+class RevistaMuyInteresante(BasicNewsRecipe):
+
+ title = 'Revista Muy Interesante'
+ __author__ = 'Jefferson Frantz'
+ description = 'Revista de divulgacion'
+ timefmt = ' [%d %b, %Y]'
+ language = 'es'
+
+ no_stylesheets = True
+ remove_javascript = True
+
+ extra_css = ' .txt_articulo{ font-family: sans-serif; font-size: medium; text-align: justify } .contentheading{font-family: serif; font-size: large; font-weight: bold; color: #000000; text-align: center}'
+
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+
+ for img_tag in soup.findAll('img'):
+ imagen = img_tag
+ new_tag = Tag(soup,'p')
+ img_tag.replaceWith(new_tag)
+ div = soup.find(attrs={'class':'article_category'})
+ div.insert(0,imagen)
+ break
+ return soup
+
+
+ preprocess_regexps = [
+ (re.compile(r'
\n'
def chapter_break(self, match):
chap = match.group('section')
styles = match.group('styles')
self.html_preprocess_sections = self.html_preprocess_sections + 1
- self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap))
+ self.log("marked " + unicode(self.html_preprocess_sections) +
+ " section markers based on punctuation. - " + unicode(chap))
return '<'+styles+' style="page-break-before:always">'+chap
def insert_indent(self, match):
@@ -63,7 +66,8 @@ class PreProcessor(object):
line_end = line_end_ere.findall(raw)
tot_htm_ends = len(htm_end)
tot_ln_fds = len(line_end)
- self.log("There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked up endings")
+ self.log("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
+ unicode(tot_htm_ends) + " marked up endings")
if percent > 1:
percent = 1
@@ -71,7 +75,7 @@ class PreProcessor(object):
percent = 0
min_lns = tot_ln_fds * percent
- self.log("There must be fewer than " + str(min_lns) + " unmarked lines to add markup")
+ self.log("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
if min_lns > tot_htm_ends:
return True
@@ -112,7 +116,7 @@ class PreProcessor(object):
txtindent = re.compile(ur'
[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE)
html = txtindent.sub(self.insert_indent, html)
if self.found_indents > 1:
- self.log("replaced "+str(self.found_indents)+ " nbsp indents with inline styles")
+ self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
# remove remaining non-breaking spaces
html = re.sub(ur'\u00a0', ' ', html)
# Get rid of empty tags to simplify other processing
@@ -131,7 +135,8 @@ class PreProcessor(object):
lines = linereg.findall(html)
blanks_between_paragraphs = False
if len(lines) > 1:
- self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
+ self.log("There are " + unicode(len(blanklines)) + " blank lines. " +
+ unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
'remove_paragraph_spacing', False):
self.log("deleting blank lines")
@@ -170,20 +175,20 @@ class PreProcessor(object):
#print chapter_marker
heading = re.compile(']*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
- self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
+ self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
#
# Start with most typical chapter headings, get more aggressive until one works
if self.html_preprocess_sections < 10:
chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
html = chapdetect.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
- self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
+ self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
html = chapdetect2.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
- self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
+ self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
@@ -207,11 +212,11 @@ class PreProcessor(object):
# more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50)
- self.log("Hard line breaks check returned "+str(hardbreaks))
+ self.log("Hard line breaks check returned "+unicode(hardbreaks))
# Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
- self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
+ self.log("*** Median line length is " + unicode(length) + ", calculated with " + format + " format ***")
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
if hardbreaks or unwrap_factor < 0.4:
self.log("Unwrapping required, unwrapping Lines")
@@ -240,7 +245,8 @@ class PreProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < 10:
- self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections))
+ self.log("Looking for more split points based on punctuation,"
+ " currently have " + unicode(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P(p|div)[^>]*)>\s*(?P(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*()?([ibu]>){0,2}\s*()?\s*([ibu]>){0,2}\s*()?\s*(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html)
# search for places where a first or second level heading is immediately followed by another
diff --git a/src/calibre/ebooks/epub/fix/epubcheck.py b/src/calibre/ebooks/epub/fix/epubcheck.py
index fd913a654b..81f4ce4d80 100644
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@@ -43,7 +43,11 @@ class Epubcheck(ePubFixer):
default=default)
except:
raise InvalidEpub('Invalid date set in OPF', raw)
- sval = ts.strftime('%Y-%m-%d')
+ try:
+ sval = ts.strftime('%Y-%m-%d')
+ except:
+ from calibre import strftime
+ sval = strftime('%Y-%m-%d', ts.timetuple())
if sval != raw:
self.log.error(
'OPF contains date', raw, 'that epubcheck does not like')
diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py
index 214511ae14..cdd69ea50f 100644
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@@ -117,7 +117,8 @@ class EPUBInput(InputFormatPlugin):
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = None
for f in walk(u'.'):
- if f.lower().endswith('.opf') and '__MACOSX' not in f:
+ if f.lower().endswith('.opf') and '__MACOSX' not in f and \
+ not os.path.basename(f).startswith('.'):
opf = os.path.abspath(f)
break
path = getattr(stream, 'name', 'stream')
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index d4a21e2c8c..01e5190640 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -10,10 +10,9 @@ import os, mimetypes, sys, re
from urllib import unquote, quote
from urlparse import urlparse
-from calibre import relpath, prints
+from calibre import relpath
from calibre.utils.config import tweaks
-from calibre.utils.date import isoformat
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
def string_to_authors(raw):
@@ -45,7 +44,15 @@ def author_to_author_sort(author):
def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors))
-_title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
+try:
+ _title_pat = re.compile(tweaks.get('title_sort_articles',
+ r'^(A|The|An)\s+'), re.IGNORECASE)
+except:
+ print 'Error in title sort pattern'
+ import traceback
+ traceback.print_exc()
+ _title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
+
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
def title_sort(title):
@@ -221,214 +228,18 @@ class ResourceCollection(object):
-class MetaInformation(object):
- '''Convenient encapsulation of book metadata'''
-
- @staticmethod
- def copy(mi):
- ans = MetaInformation(mi.title, mi.authors)
- for attr in ('author_sort', 'title_sort', 'comments', 'category',
- 'publisher', 'series', 'series_index', 'rating',
- 'isbn', 'tags', 'cover_data', 'application_id', 'guide',
- 'manifest', 'spine', 'toc', 'cover', 'language',
- 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
- 'author_sort_map',
- 'pubdate', 'rights', 'publication_type', 'uuid'):
- if hasattr(mi, attr):
- setattr(ans, attr, getattr(mi, attr))
-
- def __init__(self, title, authors=(_('Unknown'),)):
- '''
+def MetaInformation(title, authors=(_('Unknown'),)):
+ ''' Convenient encapsulation of book metadata, needed for compatibility
@param title: title or ``_('Unknown')`` or a MetaInformation object
@param authors: List of strings or []
- '''
- mi = None
- if hasattr(title, 'title') and hasattr(title, 'authors'):
- mi = title
- title = mi.title
- authors = mi.authors
- self.title = title
- self.author = list(authors) if authors else []# Needed for backward compatibility
- #: List of strings or []
- self.authors = list(authors) if authors else []
- self.tags = getattr(mi, 'tags', [])
- #: mi.cover_data = (ext, data)
- self.cover_data = getattr(mi, 'cover_data', (None, None))
- self.author_sort_map = getattr(mi, 'author_sort_map', {})
-
- for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
- 'series', 'series_index', 'rating', 'isbn', 'language',
- 'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
- 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
- 'rights', 'publication_type', 'uuid',
- ):
- setattr(self, x, getattr(mi, x, None))
-
- def print_all_attributes(self):
- for x in ('title','author', 'author_sort', 'title_sort', 'comments', 'category', 'publisher',
- 'series', 'series_index', 'tags', 'rating', 'isbn', 'language',
- 'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
- 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
- 'rights', 'publication_type', 'uuid', 'author_sort_map'
- ):
- prints(x, getattr(self, x, 'None'))
-
- def smart_update(self, mi, replace_metadata=False):
- '''
- Merge the information in C{mi} into self. In case of conflicts, the
- information in C{mi} takes precedence, unless the information in mi is
- NULL. If replace_metadata is True, then the information in mi always
- takes precedence.
- '''
- if mi.title and mi.title != _('Unknown'):
- self.title = mi.title
-
- if mi.authors and mi.authors[0] != _('Unknown'):
- self.authors = mi.authors
-
- for attr in ('author_sort', 'title_sort', 'category',
- 'publisher', 'series', 'series_index', 'rating',
- 'isbn', 'application_id', 'manifest', 'spine', 'toc',
- 'cover', 'guide', 'book_producer',
- 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
- 'publication_type', 'uuid'):
- if replace_metadata:
- setattr(self, attr, getattr(mi, attr, 1.0 if \
- attr == 'series_index' else None))
- elif hasattr(mi, attr):
- val = getattr(mi, attr)
- if val is not None:
- setattr(self, attr, val)
-
- if replace_metadata:
- self.tags = mi.tags
- elif mi.tags:
- self.tags += mi.tags
- self.tags = list(set(self.tags))
-
- if mi.author_sort_map:
- self.author_sort_map.update(mi.author_sort_map)
-
- if getattr(mi, 'cover_data', False):
- other_cover = mi.cover_data[-1]
- self_cover = self.cover_data[-1] if self.cover_data else ''
- if not self_cover: self_cover = ''
- if not other_cover: other_cover = ''
- if len(other_cover) > len(self_cover):
- self.cover_data = mi.cover_data
-
- if replace_metadata:
- self.comments = getattr(mi, 'comments', '')
- else:
- my_comments = getattr(self, 'comments', '')
- other_comments = getattr(mi, 'comments', '')
- if not my_comments:
- my_comments = ''
- if not other_comments:
- other_comments = ''
- if len(other_comments.strip()) > len(my_comments.strip()):
- self.comments = other_comments
-
- other_lang = getattr(mi, 'language', None)
- if other_lang and other_lang.lower() != 'und':
- self.language = other_lang
-
-
- def format_series_index(self):
- try:
- x = float(self.series_index)
- except ValueError:
- x = 1
- return fmt_sidx(x)
-
- def authors_from_string(self, raw):
- self.authors = string_to_authors(raw)
-
- def format_authors(self):
- return authors_to_string(self.authors)
-
- def format_tags(self):
- return u', '.join([unicode(t) for t in self.tags])
-
- def format_rating(self):
- return unicode(self.rating)
-
- def __unicode__(self):
- ans = []
- def fmt(x, y):
- ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
-
- fmt('Title', self.title)
- if self.title_sort:
- fmt('Title sort', self.title_sort)
- if self.authors:
- fmt('Author(s)', authors_to_string(self.authors) + \
- ((' [' + self.author_sort + ']') if self.author_sort else ''))
- if self.publisher:
- fmt('Publisher', self.publisher)
- if getattr(self, 'book_producer', False):
- fmt('Book Producer', self.book_producer)
- if self.category:
- fmt('Category', self.category)
- if self.comments:
- fmt('Comments', self.comments)
- if self.isbn:
- fmt('ISBN', self.isbn)
- if self.tags:
- fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
- if self.series:
- fmt('Series', self.series + ' #%s'%self.format_series_index())
- if self.language:
- fmt('Language', self.language)
- if self.rating is not None:
- fmt('Rating', self.rating)
- if self.timestamp is not None:
- fmt('Timestamp', isoformat(self.timestamp))
- if self.pubdate is not None:
- fmt('Published', isoformat(self.pubdate))
- if self.rights is not None:
- fmt('Rights', unicode(self.rights))
- if self.lccn:
- fmt('LCCN', unicode(self.lccn))
- if self.lcc:
- fmt('LCC', unicode(self.lcc))
- if self.ddc:
- fmt('DDC', unicode(self.ddc))
-
- return u'\n'.join(ans)
-
- def to_html(self):
- ans = [(_('Title'), unicode(self.title))]
- ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
- ans += [(_('Publisher'), unicode(self.publisher))]
- ans += [(_('Producer'), unicode(self.book_producer))]
- ans += [(_('Comments'), unicode(self.comments))]
- ans += [('ISBN', unicode(self.isbn))]
- if self.lccn:
- ans += [('LCCN', unicode(self.lccn))]
- if self.lcc:
- ans += [('LCC', unicode(self.lcc))]
- if self.ddc:
- ans += [('DDC', unicode(self.ddc))]
- ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
- if self.series:
- ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
- ans += [(_('Language'), unicode(self.language))]
- if self.timestamp is not None:
- ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
- if self.pubdate is not None:
- ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
- if self.rights is not None:
- ans += [(_('Rights'), unicode(self.rights))]
- for i, x in enumerate(ans):
- ans[i] = u'
%s
%s
'%x
- return u'
%s
'%u'\n'.join(ans)
-
- def __str__(self):
- return self.__unicode__().encode('utf-8')
-
- def __nonzero__(self):
- return bool(self.title or self.author or self.comments or self.tags)
+ '''
+ from calibre.ebooks.metadata.book.base import Metadata
+ mi = None
+ if hasattr(title, 'title') and hasattr(title, 'authors'):
+ mi = title
+ title = mi.title
+ authors = mi.authors
+ return Metadata(title, authors, other=mi)
def check_isbn10(isbn):
try:
diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py
index c3b95f1188..82de7400d7 100644
--- a/src/calibre/ebooks/metadata/book/__init__.py
+++ b/src/calibre/ebooks/metadata/book/__init__.py
@@ -11,48 +11,45 @@ an empty list/dictionary for complex types and (None, None) for cover_data
'''
SOCIAL_METADATA_FIELDS = frozenset([
- 'tags', # Ordered list
- # A floating point number between 0 and 10
- 'rating',
- # A simple HTML enabled string
- 'comments',
- # A simple string
- 'series',
- # A floating point number
- 'series_index',
+ 'tags', # Ordered list
+ 'rating', # A floating point number between 0 and 10
+ 'comments', # A simple HTML enabled string
+ 'series', # A simple string
+ 'series_index', # A floating point number
# Of the form { scheme1:value1, scheme2:value2}
# For example: {'isbn':'123456789', 'doi':'xxxx', ... }
'classifiers',
- 'isbn', # Pseudo field for convenience, should get/set isbn classifier
+])
+'''
+The list of names that convert to classifiers when in get and set.
+'''
+
+TOP_LEVEL_CLASSIFIERS = frozenset([
+ 'isbn',
])
PUBLICATION_METADATA_FIELDS = frozenset([
- # title must never be None. Should be _('Unknown')
- 'title',
+ 'title', # title must never be None. Should be _('Unknown')
# Pseudo field that can be set, but if not set is auto generated
# from title and languages
'title_sort',
- # Ordered list of authors. Must never be None, can be [_('Unknown')]
- 'authors',
- # Map of sort strings for each author
- 'author_sort_map',
+ 'authors', # Ordered list. Must never be None, can be [_('Unknown')]
+ 'author_sort_map', # Map of sort strings for each author
# Pseudo field that can be set, but if not set is auto generated
# from authors and languages
'author_sort',
'book_producer',
- # Dates and times must be timezone aware
- 'timestamp',
+ 'timestamp', # Dates and times must be timezone aware
'pubdate',
'rights',
# So far only known publication type is periodical:calibre
# If None, means book
'publication_type',
- # A UUID usually of type 4
- 'uuid',
- 'languages', # ordered list
- # Simple string, no special semantics
- 'publisher',
+ 'uuid', # A UUID usually of type 4
+ 'language', # the primary language of this book
+ 'languages', # ordered list
+ 'publisher', # Simple string, no special semantics
# Absolute path to image file encoded in filesystem_encoding
'cover',
# Of the form (format, data) where format is, for e.g. 'jpeg', 'png', 'gif'...
@@ -69,33 +66,63 @@ BOOK_STRUCTURE_FIELDS = frozenset([
])
USER_METADATA_FIELDS = frozenset([
- # A dict of a form to be specified
+ # A dict of dicts similar to field_metadata. Each field description dict
+ # also contains a value field with the key #value#.
'user_metadata',
])
DEVICE_METADATA_FIELDS = frozenset([
- # Ordered list of strings
- 'device_collections',
- 'lpath', # Unicode, / separated
- # In bytes
- 'size',
- # Mimetype of the book file being represented
- 'mime',
+ 'device_collections', # Ordered list of strings
+ 'lpath', # Unicode, / separated
+ 'size', # In bytes
+ 'mime', # Mimetype of the book file being represented
+
])
CALIBRE_METADATA_FIELDS = frozenset([
- # An application id
- # Semantics to be defined. Is it a db key? a db name + key? A uuid?
- 'application_id',
+ 'application_id', # An application id, currently set to the db_id.
+ 'db_id', # the calibre primary key of the item.
+ 'formats', # list of formats (extensions) for this book
]
)
+ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
+ PUBLICATION_METADATA_FIELDS).union(
+ BOOK_STRUCTURE_FIELDS).union(
+ USER_METADATA_FIELDS).union(
+ DEVICE_METADATA_FIELDS).union(
+ CALIBRE_METADATA_FIELDS)
-SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
- USER_METADATA_FIELDS).union(
- PUBLICATION_METADATA_FIELDS).union(
- CALIBRE_METADATA_FIELDS).union(
- frozenset(['lpath'])) # I don't think we need device_collections
+# All fields except custom fields
+STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
+ PUBLICATION_METADATA_FIELDS).union(
+ BOOK_STRUCTURE_FIELDS).union(
+ DEVICE_METADATA_FIELDS).union(
+ CALIBRE_METADATA_FIELDS)
-# Serialization of covers/thumbnails will have to be handled carefully, maybe
-# as an option to the serializer class
+# Metadata fields that smart update must do special processing to copy.
+SC_FIELDS_NOT_COPIED = frozenset(['title', 'title_sort', 'authors',
+ 'author_sort', 'author_sort_map',
+ 'cover_data', 'tags', 'language',
+ 'classifiers'])
+
+# Metadata fields that smart update should copy only if the source is not None
+SC_FIELDS_COPY_NOT_NULL = frozenset(['lpath', 'size', 'comments', 'thumbnail'])
+
+# Metadata fields that smart update should copy without special handling
+SC_COPYABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
+ PUBLICATION_METADATA_FIELDS).union(
+ BOOK_STRUCTURE_FIELDS).union(
+ DEVICE_METADATA_FIELDS).union(
+ CALIBRE_METADATA_FIELDS) - \
+ SC_FIELDS_NOT_COPIED.union(
+ SC_FIELDS_COPY_NOT_NULL)
+
+SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
+ USER_METADATA_FIELDS).union(
+ PUBLICATION_METADATA_FIELDS).union(
+ CALIBRE_METADATA_FIELDS).union(
+ DEVICE_METADATA_FIELDS) - \
+ frozenset(['device_collections', 'formats',
+ 'cover_data'])
+ # these are rebuilt when needed
diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 3fed47091f..3b96c98a7b 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -5,9 +5,19 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import copy
+import copy, traceback
+
+from calibre import prints
+from calibre.constants import DEBUG
+from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
+from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
+from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
+from calibre.ebooks.metadata.book import TOP_LEVEL_CLASSIFIERS
+from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
+from calibre.library.field_metadata import FieldMetadata
+from calibre.utils.date import isoformat, format_date
+from calibre.utils.formatter import TemplateFormatter
-from calibre.ebooks.metadata.book import RESERVED_METADATA_FIELDS
NULL_VALUES = {
'user_metadata': {},
@@ -19,103 +29,609 @@ NULL_VALUES = {
'author_sort_map': {},
'authors' : [_('Unknown')],
'title' : _('Unknown'),
+ 'language' : 'und'
}
+field_metadata = FieldMetadata()
+
+class SafeFormat(TemplateFormatter):
+
+ def get_value(self, key, args, kwargs):
+ try:
+ if key != 'title_sort':
+ key = field_metadata.search_term_to_field_key(key.lower())
+ b = self.book.get_user_metadata(key, False)
+ if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
+ v = ''
+ elif b and b['datatype'] == 'float' and b.get(key, 0.0) == 0.0:
+ v = ''
+ else:
+ ign, v = self.book.format_field(key.lower(), series_with_index=False)
+ if v is None:
+ return ''
+ if v == '':
+ return ''
+ return v
+ except:
+ if DEBUG:
+ traceback.print_exc()
+ return key
+
+composite_formatter = SafeFormat()
+
class Metadata(object):
'''
- This class must expose a superset of the API of MetaInformation in terms
- of attribute access and methods. Only the __init__ method is different.
- MetaInformation will simply become a function that creates and fills in
- the attributes of this class.
+ A class representing all the metadata for a book.
Please keep the method based API of this class to a minimum. Every method
becomes a reserved field name.
'''
- def __init__(self):
- object.__setattr__(self, '_data', copy.deepcopy(NULL_VALUES))
+ def __init__(self, title, authors=(_('Unknown'),), other=None):
+ '''
+ @param title: title or ``_('Unknown')``
+ @param authors: List of strings or []
+ @param other: None or a metadata object
+ '''
+ _data = copy.deepcopy(NULL_VALUES)
+ object.__setattr__(self, '_data', _data)
+ if other is not None:
+ self.smart_update(other)
+ else:
+ if title:
+ self.title = title
+ if authors:
+ #: List of strings or []
+ self.author = list(authors) if authors else []# Needed for backward compatibility
+ self.authors = list(authors) if authors else []
+
+ def is_null(self, field):
+ null_val = NULL_VALUES.get(field, None)
+ val = getattr(self, field, None)
+ return not val or val == null_val
def __getattribute__(self, field):
_data = object.__getattribute__(self, '_data')
- if field in RESERVED_METADATA_FIELDS:
+ if field in TOP_LEVEL_CLASSIFIERS:
+ return _data.get('classifiers').get(field, None)
+ if field in STANDARD_METADATA_FIELDS:
return _data.get(field, None)
try:
return object.__getattribute__(self, field)
except AttributeError:
pass
if field in _data['user_metadata'].iterkeys():
- # TODO: getting user metadata values
- pass
+ d = _data['user_metadata'][field]
+ val = d['#value#']
+ if d['datatype'] != 'composite':
+ return val
+ if val is None:
+ d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field
+ val = d['#value#'] = composite_formatter.safe_format(
+ d['display']['composite_template'],
+ self,
+ _('TEMPLATE ERROR'),
+ self).strip()
+ return val
+
raise AttributeError(
'Metadata object has no attribute named: '+ repr(field))
-
- def __setattr__(self, field, val):
+ def __setattr__(self, field, val, extra=None):
_data = object.__getattribute__(self, '_data')
- if field in RESERVED_METADATA_FIELDS:
- if field != 'user_metadata':
- if not val:
- val = NULL_VALUES[field]
- _data[field] = val
- else:
- raise AttributeError('You cannot set user_metadata directly.')
+ if field in TOP_LEVEL_CLASSIFIERS:
+ _data['classifiers'].update({field: val})
+ elif field in STANDARD_METADATA_FIELDS:
+ if val is None:
+ val = NULL_VALUES.get(field, None)
+ _data[field] = val
elif field in _data['user_metadata'].iterkeys():
- # TODO: Setting custom column values
- pass
+ if _data['user_metadata'][field]['datatype'] == 'composite':
+ _data['user_metadata'][field]['#value#'] = None
+ else:
+ _data['user_metadata'][field]['#value#'] = val
+ _data['user_metadata'][field]['#extra#'] = extra
else:
# You are allowed to stick arbitrary attributes onto this object as
- # long as they dont conflict with global or user metadata names
+ # long as they don't conflict with global or user metadata names
# Don't abuse this privilege
self.__dict__[field] = val
- @property
- def user_metadata_names(self):
- 'The set of user metadata names this object knows about'
+ def __iter__(self):
+ return object.__getattribute__(self, '_data').iterkeys()
+
+ def has_key(self, key):
+ return key in object.__getattribute__(self, '_data')
+
+ def deepcopy(self):
+ m = Metadata(None)
+ m.__dict__ = copy.deepcopy(self.__dict__)
+ object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
+ return m
+
+ def deepcopy_metadata(self):
+ m = Metadata(None)
+ object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
+ return m
+
+ def get(self, field, default=None):
+ try:
+ return self.__getattribute__(field)
+ except AttributeError:
+ return default
+
+ def get_extra(self, field):
_data = object.__getattribute__(self, '_data')
- return frozenset(_data['user_metadata'].iterkeys())
+ if field in _data['user_metadata'].iterkeys():
+ return _data['user_metadata'][field]['#extra#']
+ raise AttributeError(
+ 'Metadata object has no attribute named: '+ repr(field))
- # Old MetaInformation API {{{
- def copy(self):
- pass
+ def set(self, field, val, extra=None):
+ self.__setattr__(field, val, extra)
+ def get_classifiers(self):
+ '''
+ Return a copy of the classifiers dictionary.
+ The dict is small, and the penalty for using a reference where a copy is
+ needed is large. Also, we don't want any manipulations of the returned
+ dict to show up in the book.
+ '''
+ return copy.deepcopy(object.__getattribute__(self, '_data')['classifiers'])
+
+ def set_classifiers(self, classifiers):
+ object.__getattribute__(self, '_data')['classifiers'] = classifiers
+
+ # field-oriented interface. Intended to be the same as in LibraryDatabase
+
+ def standard_field_keys(self):
+ '''
+ return a list of all possible keys, even if this book doesn't have them
+ '''
+ return STANDARD_METADATA_FIELDS
+
+ def custom_field_keys(self):
+ '''
+ return a list of the custom fields in this book
+ '''
+ return object.__getattribute__(self, '_data')['user_metadata'].iterkeys()
+
+ def all_field_keys(self):
+ '''
+ All field keys known by this instance, even if their value is None
+ '''
+ _data = object.__getattribute__(self, '_data')
+ return frozenset(ALL_METADATA_FIELDS.union(_data['user_metadata'].iterkeys()))
+
+ def metadata_for_field(self, key):
+ '''
+ return metadata describing a standard or custom field.
+ '''
+ if key not in self.custom_field_keys():
+ return self.get_standard_metadata(key, make_copy=False)
+ return self.get_user_metadata(key, make_copy=False)
+
+ def all_non_none_fields(self):
+ '''
+ Return a dictionary containing all non-None metadata fields, including
+ the custom ones.
+ '''
+ result = {}
+ _data = object.__getattribute__(self, '_data')
+ for attr in STANDARD_METADATA_FIELDS:
+ v = _data.get(attr, None)
+ if v is not None:
+ result[attr] = v
+ # separate these because it uses the self.get(), not _data.get()
+ for attr in TOP_LEVEL_CLASSIFIERS:
+ v = self.get(attr, None)
+ if v is not None:
+ result[attr] = v
+ for attr in _data['user_metadata'].iterkeys():
+ v = self.get(attr, None)
+ if v is not None:
+ result[attr] = v
+ if _data['user_metadata'][attr]['datatype'] == 'series':
+ result[attr+'_index'] = _data['user_metadata'][attr]['#extra#']
+ return result
+
+ # End of field-oriented interface
+
+ # Extended interfaces. These permit one to get copies of metadata dictionaries, and to
+ # get and set custom field metadata
+
+ def get_standard_metadata(self, field, make_copy):
+ '''
+ return field metadata from the field if it is there. Otherwise return
+ None. field is the key name, not the label. Return a copy if requested,
+ just in case the user wants to change values in the dict.
+ '''
+ if field in field_metadata and field_metadata[field]['kind'] == 'field':
+ if make_copy:
+ return copy.deepcopy(field_metadata[field])
+ return field_metadata[field]
+ return None
+
+ def get_all_standard_metadata(self, make_copy):
+ '''
+ return a dict containing all the standard field metadata associated with
+ the book.
+ '''
+ if not make_copy:
+ return field_metadata
+ res = {}
+ for k in field_metadata:
+ if field_metadata[k]['kind'] == 'field':
+ res[k] = copy.deepcopy(field_metadata[k])
+ return res
+
+ def get_all_user_metadata(self, make_copy):
+ '''
+ return a dict containing all the custom field metadata associated with
+ the book.
+ '''
+ _data = object.__getattribute__(self, '_data')
+ user_metadata = _data['user_metadata']
+ if not make_copy:
+ return user_metadata
+ res = {}
+ for k in user_metadata:
+ res[k] = copy.deepcopy(user_metadata[k])
+ return res
+
+ def get_user_metadata(self, field, make_copy):
+ '''
+ return field metadata from the object if it is there. Otherwise return
+ None. field is the key name, not the label. Return a copy if requested,
+ just in case the user wants to change values in the dict.
+ '''
+ _data = object.__getattribute__(self, '_data')
+ _data = _data['user_metadata']
+ if field in _data:
+ if make_copy:
+ return copy.deepcopy(_data[field])
+ return _data[field]
+ return None
+
+ def set_all_user_metadata(self, metadata):
+ '''
+ store custom field metadata into the object. Field is the key name
+ not the label
+ '''
+ if metadata is None:
+ traceback.print_stack()
+ else:
+ for key in metadata:
+ self.set_user_metadata(key, metadata[key])
+
+ def set_user_metadata(self, field, metadata):
+ '''
+ store custom field metadata for one column into the object. Field is
+ the key name not the label
+ '''
+ if field is not None:
+ if not field.startswith('#'):
+ raise AttributeError(
+ 'Custom field name %s must begin with \'#\''%repr(field))
+ if metadata is None:
+ traceback.print_stack()
+ return
+ metadata = copy.deepcopy(metadata)
+ if '#value#' not in metadata:
+ if metadata['datatype'] == 'text' and metadata['is_multiple']:
+ metadata['#value#'] = []
+ else:
+ metadata['#value#'] = None
+ _data = object.__getattribute__(self, '_data')
+ _data['user_metadata'][field] = metadata
+
+ def template_to_attribute(self, other, ops):
+ '''
+ Takes a list [(src,dest), (src,dest)], evaluates the template in the
+ context of other, then copies the result to self[dest]. This is on a
+ best-efforts basis. Some assignments can make no sense.
+ '''
+ if not ops:
+ return
+ for op in ops:
+ try:
+ src = op[0]
+ dest = op[1]
+ val = composite_formatter.safe_format\
+ (src, other, 'PLUGBOARD TEMPLATE ERROR', other)
+ if dest == 'tags':
+ self.set(dest, [f.strip() for f in val.split(',') if f.strip()])
+ elif dest == 'authors':
+ self.set(dest, [f.strip() for f in val.split('&') if f.strip()])
+ else:
+ self.set(dest, val)
+ except:
+ if DEBUG:
+ traceback.print_exc()
+
+ # Old Metadata API {{{
def print_all_attributes(self):
- pass
+ for x in STANDARD_METADATA_FIELDS:
+ prints('%s:'%x, getattr(self, x, 'None'))
+ for x in self.custom_field_keys():
+ meta = self.get_user_metadata(x, make_copy=False)
+ if meta is not None:
+ prints(x, meta)
+ prints('--------------')
def smart_update(self, other, replace_metadata=False):
- pass
+ '''
+ Merge the information in `other` into self. In case of conflicts, the information
+ in `other` takes precedence, unless the information in `other` is NULL.
+ '''
+ def copy_not_none(dest, src, attr):
+ v = getattr(src, attr, None)
+ if v not in (None, NULL_VALUES.get(attr, None)):
+ setattr(dest, attr, copy.deepcopy(v))
- def format_series_index(self):
- pass
+ if other.title and other.title != _('Unknown'):
+ self.title = other.title
+ if hasattr(other, 'title_sort'):
+ self.title_sort = other.title_sort
+
+ if other.authors and other.authors[0] != _('Unknown'):
+ self.authors = list(other.authors)
+ if hasattr(other, 'author_sort_map'):
+ self.author_sort_map = dict(other.author_sort_map)
+ if hasattr(other, 'author_sort'):
+ self.author_sort = other.author_sort
+
+ if replace_metadata:
+ # SPECIAL_FIELDS = frozenset(['lpath', 'size', 'comments', 'thumbnail'])
+ for attr in SC_COPYABLE_FIELDS:
+ setattr(self, attr, getattr(other, attr, 1.0 if \
+ attr == 'series_index' else None))
+ self.tags = other.tags
+ self.cover_data = getattr(other, 'cover_data',
+ NULL_VALUES['cover_data'])
+ self.set_all_user_metadata(other.get_all_user_metadata(make_copy=True))
+ for x in SC_FIELDS_COPY_NOT_NULL:
+ copy_not_none(self, other, x)
+ if callable(getattr(other, 'get_classifiers', None)):
+ self.set_classifiers(other.get_classifiers())
+ # language is handled below
+ else:
+ for attr in SC_COPYABLE_FIELDS:
+ copy_not_none(self, other, attr)
+ for x in SC_FIELDS_COPY_NOT_NULL:
+ copy_not_none(self, other, x)
+
+ if other.tags:
+ # Case-insensitive but case preserving merging
+ lotags = [t.lower() for t in other.tags]
+ lstags = [t.lower() for t in self.tags]
+ ot, st = map(frozenset, (lotags, lstags))
+ for t in st.intersection(ot):
+ sidx = lstags.index(t)
+ oidx = lotags.index(t)
+ self.tags[sidx] = other.tags[oidx]
+ self.tags += [t for t in other.tags if t.lower() in ot-st]
+
+ if getattr(other, 'cover_data', False):
+ other_cover = other.cover_data[-1]
+ self_cover = self.cover_data[-1] if self.cover_data else ''
+ if not self_cover: self_cover = ''
+ if not other_cover: other_cover = ''
+ if len(other_cover) > len(self_cover):
+ self.cover_data = other.cover_data
+
+ if callable(getattr(other, 'custom_field_keys', None)):
+ for x in other.custom_field_keys():
+ meta = other.get_user_metadata(x, make_copy=True)
+ if meta is not None:
+ self_tags = self.get(x, [])
+ self.set_user_metadata(x, meta) # get... did the deepcopy
+ other_tags = other.get(x, [])
+ if meta['is_multiple']:
+ # Case-insensitive but case preserving merging
+ lotags = [t.lower() for t in other_tags]
+ lstags = [t.lower() for t in self_tags]
+ ot, st = map(frozenset, (lotags, lstags))
+ for t in st.intersection(ot):
+ sidx = lstags.index(t)
+ oidx = lotags.index(t)
+ self_tags[sidx] = other.tags[oidx]
+ self_tags += [t for t in other.tags if t.lower() in ot-st]
+ setattr(self, x, self_tags)
+
+ my_comments = getattr(self, 'comments', '')
+ other_comments = getattr(other, 'comments', '')
+ if not my_comments:
+ my_comments = ''
+ if not other_comments:
+ other_comments = ''
+ if len(other_comments.strip()) > len(my_comments.strip()):
+ self.comments = other_comments
+
+ # Copy all the non-none classifiers
+ if callable(getattr(other, 'get_classifiers', None)):
+ d = self.get_classifiers()
+ s = other.get_classifiers()
+ d.update([v for v in s.iteritems() if v[1] is not None])
+ self.set_classifiers(d)
+ else:
+ # other structure not Metadata. Copy the top-level classifiers
+ for attr in TOP_LEVEL_CLASSIFIERS:
+ copy_not_none(self, other, attr)
+
+ other_lang = getattr(other, 'language', None)
+ if other_lang and other_lang.lower() != 'und':
+ self.language = other_lang
+
+ def format_series_index(self, val=None):
+ from calibre.ebooks.metadata import fmt_sidx
+ v = self.series_index if val is None else val
+ try:
+ x = float(v)
+ except (ValueError, TypeError):
+ x = 1
+ return fmt_sidx(x)
def authors_from_string(self, raw):
- pass
+ from calibre.ebooks.metadata import string_to_authors
+ self.authors = string_to_authors(raw)
def format_authors(self):
- pass
+ from calibre.ebooks.metadata import authors_to_string
+ return authors_to_string(self.authors)
def format_tags(self):
- pass
+ return u', '.join([unicode(t) for t in self.tags])
def format_rating(self):
return unicode(self.rating)
+ def format_field(self, key, series_with_index=True):
+ name, val, ign, ign = self.format_field_extended(key, series_with_index)
+ return (name, val)
+
+ def format_field_extended(self, key, series_with_index=True):
+ from calibre.ebooks.metadata import authors_to_string
+ '''
+ returns the tuple (field_name, formatted_value)
+ '''
+
+ # Handle custom series index
+ if key.startswith('#') and key.endswith('_index'):
+ tkey = key[:-6] # strip the _index
+ cmeta = self.get_user_metadata(tkey, make_copy=False)
+ if cmeta['datatype'] == 'series':
+ if self.get(tkey):
+ res = self.get_extra(tkey)
+ return (unicode(cmeta['name']+'_index'),
+ self.format_series_index(res), res, cmeta)
+ else:
+ return (unicode(cmeta['name']+'_index'), '', '', cmeta)
+
+ if key in self.custom_field_keys():
+ res = self.get(key, None)
+ cmeta = self.get_user_metadata(key, make_copy=False)
+ name = unicode(cmeta['name'])
+ if cmeta['datatype'] != 'composite' and (res is None or res == ''):
+ return (name, res, None, None)
+ orig_res = res
+ cmeta = self.get_user_metadata(key, make_copy=False)
+ if res is None or res == '':
+ return (name, res, None, None)
+ orig_res = res
+ datatype = cmeta['datatype']
+ if datatype == 'text' and cmeta['is_multiple']:
+ res = u', '.join(res)
+ elif datatype == 'series' and series_with_index:
+ if self.get_extra(key) is not None:
+ res = res + \
+ ' [%s]'%self.format_series_index(val=self.get_extra(key))
+ elif datatype == 'datetime':
+ res = format_date(res, cmeta['display'].get('date_format','dd MMM yyyy'))
+ elif datatype == 'bool':
+ res = _('Yes') if res else _('No')
+ return (name, unicode(res), orig_res, cmeta)
+
+ # Translate aliases into the standard field name
+ fmkey = field_metadata.search_term_to_field_key(key)
+
+ if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
+ res = self.get(key, None)
+ fmeta = field_metadata[fmkey]
+ name = unicode(fmeta['name'])
+ if res is None or res == '':
+ return (name, res, None, None)
+ orig_res = res
+ name = unicode(fmeta['name'])
+ datatype = fmeta['datatype']
+ if key == 'authors':
+ res = authors_to_string(res)
+ elif key == 'series_index':
+ res = self.format_series_index(res)
+ elif datatype == 'text' and fmeta['is_multiple']:
+ res = u', '.join(res)
+ elif datatype == 'series' and series_with_index:
+ res = res + ' [%s]'%self.format_series_index()
+ elif datatype == 'datetime':
+ res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
+ return (name, unicode(res), orig_res, fmeta)
+
+ return (None, None, None, None)
+
def __unicode__(self):
- pass
+ from calibre.ebooks.metadata import authors_to_string
+ ans = []
+ def fmt(x, y):
+ ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
+
+ fmt('Title', self.title)
+ if self.title_sort:
+ fmt('Title sort', self.title_sort)
+ if self.authors:
+ fmt('Author(s)', authors_to_string(self.authors) + \
+ ((' [' + self.author_sort + ']') if self.author_sort else ''))
+ if self.publisher:
+ fmt('Publisher', self.publisher)
+ if getattr(self, 'book_producer', False):
+ fmt('Book Producer', self.book_producer)
+ if self.comments:
+ fmt('Comments', self.comments)
+ if self.isbn:
+ fmt('ISBN', self.isbn)
+ if self.tags:
+ fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
+ if self.series:
+ fmt('Series', self.series + ' #%s'%self.format_series_index())
+ if self.language:
+ fmt('Language', self.language)
+ if self.rating is not None:
+ fmt('Rating', self.rating)
+ if self.timestamp is not None:
+ fmt('Timestamp', isoformat(self.timestamp))
+ if self.pubdate is not None:
+ fmt('Published', isoformat(self.pubdate))
+ if self.rights is not None:
+ fmt('Rights', unicode(self.rights))
+ for key in self.custom_field_keys():
+ val = self.get(key, None)
+ if val:
+ (name, val) = self.format_field(key)
+ fmt(name, unicode(val))
+ return u'\n'.join(ans)
def to_html(self):
- pass
+ from calibre.ebooks.metadata import authors_to_string
+ ans = [(_('Title'), unicode(self.title))]
+ ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
+ ans += [(_('Publisher'), unicode(self.publisher))]
+ ans += [(_('Producer'), unicode(self.book_producer))]
+ ans += [(_('Comments'), unicode(self.comments))]
+ ans += [('ISBN', unicode(self.isbn))]
+ ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
+ if self.series:
+ ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
+ ans += [(_('Language'), unicode(self.language))]
+ if self.timestamp is not None:
+ ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
+ if self.pubdate is not None:
+ ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
+ if self.rights is not None:
+ ans += [(_('Rights'), unicode(self.rights))]
+ for key in self.custom_field_keys():
+ val = self.get(key, None)
+ if val:
+ (name, val) = self.format_field(key)
+ ans += [(name, val)]
+ for i, x in enumerate(ans):
+ ans[i] = u'
%s
%s
'%x
+ return u'
%s
'%u'\n'.join(ans)
def __str__(self):
return self.__unicode__().encode('utf-8')
def __nonzero__(self):
- return True
+ return bool(self.title or self.author or self.comments or self.tags)
# }}}
-# We don't need reserved field names for this object any more. Lets just use a
-# protocol like the last char of a user field label should be _ when using this
-# object
-# So mi.tags returns the builtin tags and mi.tags_ returns the user tags
-
diff --git a/src/calibre/ebooks/metadata/book/json_codec.py b/src/calibre/ebooks/metadata/book/json_codec.py
new file mode 100644
index 0000000000..c02d4e953d
--- /dev/null
+++ b/src/calibre/ebooks/metadata/book/json_codec.py
@@ -0,0 +1,143 @@
+'''
+Created on 4 Jun 2010
+
+@author: charles
+'''
+
+from base64 import b64encode, b64decode
+import json
+import traceback
+
+from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
+from calibre.constants import filesystem_encoding, preferred_encoding
+from calibre.library.field_metadata import FieldMetadata
+from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE
+from calibre.utils.magick import Image
+from calibre import isbytestring
+
+# Translate datetimes to and from strings. The string form is the datetime in
+# UTC. The returned date is also UTC
+def string_to_datetime(src):
+ if src == "None":
+ return None
+ return parse_date(src)
+
+def datetime_to_string(dateval):
+ if dateval is None or dateval == UNDEFINED_DATE:
+ return "None"
+ return isoformat(dateval)
+
+def encode_thumbnail(thumbnail):
+ '''
+ Encode the image part of a thumbnail, then return the 3 part tuple
+ '''
+ if thumbnail is None:
+ return None
+ if not isinstance(thumbnail, (tuple, list)):
+ try:
+ img = Image()
+ img.load(thumbnail)
+ width, height = img.size
+ thumbnail = (width, height, thumbnail)
+ except:
+ return None
+ return (thumbnail[0], thumbnail[1], b64encode(str(thumbnail[2])))
+
+def decode_thumbnail(tup):
+ '''
+ Decode an encoded thumbnail into its 3 component parts
+ '''
+ if tup is None:
+ return None
+ return (tup[0], tup[1], b64decode(tup[2]))
+
+def object_to_unicode(obj, enc=preferred_encoding):
+
+ def dec(x):
+ return x.decode(enc, 'replace')
+
+ if isbytestring(obj):
+ return dec(obj)
+ if isinstance(obj, (list, tuple)):
+ return [dec(x) if isbytestring(x) else x for x in obj]
+ if isinstance(obj, dict):
+ ans = {}
+ for k, v in obj.items():
+ k = object_to_unicode(k)
+ v = object_to_unicode(v)
+ ans[k] = v
+ return ans
+ return obj
+
+class JsonCodec(object):
+
+ def __init__(self):
+ self.field_metadata = FieldMetadata()
+
+ def encode_to_file(self, file, booklist):
+ file.write(json.dumps(self.encode_booklist_metadata(booklist),
+ indent=2, encoding='utf-8'))
+
+ def encode_booklist_metadata(self, booklist):
+ result = []
+ for book in booklist:
+ result.append(self.encode_book_metadata(book))
+ return result
+
+ def encode_book_metadata(self, book):
+ result = {}
+ for key in SERIALIZABLE_FIELDS:
+ result[key] = self.encode_metadata_attr(book, key)
+ return result
+
+ def encode_metadata_attr(self, book, key):
+ if key == 'user_metadata':
+ meta = book.get_all_user_metadata(make_copy=True)
+ for k in meta:
+ if meta[k]['datatype'] == 'datetime':
+ meta[k]['#value#'] = datetime_to_string(meta[k]['#value#'])
+ return meta
+ if key in self.field_metadata:
+ datatype = self.field_metadata[key]['datatype']
+ else:
+ datatype = None
+ value = book.get(key)
+ if key == 'thumbnail':
+ return encode_thumbnail(value)
+ elif isbytestring(value): # str includes bytes
+ enc = filesystem_encoding if key == 'lpath' else preferred_encoding
+ return object_to_unicode(value, enc=enc)
+ elif datatype == 'datetime':
+ return datetime_to_string(value)
+ else:
+ return object_to_unicode(value)
+
+ def decode_from_file(self, file, booklist, book_class, prefix):
+ js = []
+ try:
+ js = json.load(file, encoding='utf-8')
+ for item in js:
+ book = book_class(prefix, item.get('lpath', None))
+ for key in item.keys():
+ meta = self.decode_metadata(key, item[key])
+ if key == 'user_metadata':
+ book.set_all_user_metadata(meta)
+ else:
+ setattr(book, key, meta)
+ booklist.append(book)
+ except:
+ print 'exception during JSON decoding'
+ traceback.print_exc()
+
+ def decode_metadata(self, key, value):
+ if key == 'user_metadata':
+ for k in value:
+ if value[k]['datatype'] == 'datetime':
+ value[k]['#value#'] = string_to_datetime(value[k]['#value#'])
+ return value
+ elif key in self.field_metadata:
+ if self.field_metadata[key]['datatype'] == 'datetime':
+ return string_to_datetime(value)
+ if key == 'thumbnail':
+ return decode_thumbnail(value)
+ return value
diff --git a/src/calibre/ebooks/metadata/cli.py b/src/calibre/ebooks/metadata/cli.py
index 780d3febcf..a0be187512 100644
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@@ -109,7 +109,7 @@ def do_set_metadata(opts, mi, stream, stream_type):
from_opf = getattr(opts, 'from_opf', None)
if from_opf is not None:
from calibre.ebooks.metadata.opf2 import OPF
- opf_mi = MetaInformation(OPF(open(from_opf, 'rb')))
+ opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata()
mi.smart_update(opf_mi)
for pref in config().option_set.preferences:
diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py
index df9a394258..e60837a553 100644
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@@ -164,10 +164,10 @@ def get_cover(opf, opf_path, stream, reader=None):
return render_html_svg_workaround(cpage, default_log)
def get_metadata(stream, extract_cover=True):
- """ Return metadata as a :class:`MetaInformation` object """
+ """ Return metadata as a :class:`Metadata` object """
stream.seek(0)
reader = OCFZipReader(stream)
- mi = MetaInformation(reader.opf)
+ mi = reader.opf.to_book_metadata()
if extract_cover:
try:
cdata = get_cover(reader.opf, reader.opf_path, stream, reader=reader)
diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py
index 576fbaa6fc..6e0d56dfa0 100644
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@@ -33,7 +33,10 @@ def get_metadata(stream):
le = XPath('descendant::fb2:last-name')(au)
if le:
lname = tostring(le[0])
- author += ' '+lname
+ if author:
+ author += ' '+lname
+ else:
+ author = lname
if author:
authors.append(author)
if len(authors) == 1 and author is not None:
diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py
index 96807c06ae..9b8a42e482 100644
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@@ -29,7 +29,7 @@ class MetadataSource(Plugin): # {{{
future use.
The fetch method must store the results in `self.results` as a list of
- :class:`MetaInformation` objects. If there is an error, it should be stored
+ :class:`Metadata` objects. If there is an error, it should be stored
in `self.exception` and `self.tb` (for the traceback).
'''
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index 356cc3f1b1..07a054eeaa 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -8,7 +8,7 @@ import sys, re
from urllib import quote
from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre import browser
@@ -42,34 +42,48 @@ def fetch_metadata(url, max=100, timeout=5.):
return books
-class ISBNDBMetadata(MetaInformation):
+class ISBNDBMetadata(Metadata):
def __init__(self, book):
- MetaInformation.__init__(self, None, [])
+ Metadata.__init__(self, None, [])
- self.isbn = book.get('isbn13', book.get('isbn'))
- self.title = book.find('titlelong').string
+ def tostring(e):
+ if not hasattr(e, 'string'):
+ return None
+ ans = e.string
+ if ans is not None:
+ ans = unicode(ans).strip()
+ if not ans:
+ ans = None
+ return ans
+
+ self.isbn = unicode(book.get('isbn13', book.get('isbn')))
+ self.title = tostring(book.find('titlelong'))
if not self.title:
- self.title = book.find('title').string
+ self.title = tostring(book.find('title'))
+ if not self.title:
+ self.title = _('Unknown')
self.title = unicode(self.title).strip()
- au = unicode(book.find('authorstext').string).strip()
- temp = au.split(',')
self.authors = []
- for au in temp:
- if not au: continue
- self.authors.extend([a.strip() for a in au.split('&')])
+ au = tostring(book.find('authorstext'))
+ if au:
+ au = au.strip()
+ temp = au.split(',')
+ for au in temp:
+ if not au: continue
+ self.authors.extend([a.strip() for a in au.split('&')])
try:
- self.author_sort = book.find('authors').find('person').string
+ self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
pass
- self.publisher = book.find('publishertext').string
+ self.publisher = tostring(book.find('publishertext'))
- summ = book.find('summary')
- if summ and hasattr(summ, 'string') and summ.string:
- self.comments = 'SUMMARY:\n'+summ.string
+ summ = tostring(book.find('summary'))
+ if summ:
+ self.comments = 'SUMMARY:\n'+summ
def build_isbn(base_url, opts):
diff --git a/src/calibre/ebooks/metadata/library_thing.py b/src/calibre/ebooks/metadata/library_thing.py
index 669d9478a3..7f312da1d9 100644
--- a/src/calibre/ebooks/metadata/library_thing.py
+++ b/src/calibre/ebooks/metadata/library_thing.py
@@ -12,6 +12,7 @@ import mechanize
from calibre import browser, prints
from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
+isbn).read()
if not raw:
return mi
+ raw = raw.decode('utf-8', 'replace')
+ raw = strip_encoding_declarations(raw)
root = html.fromstring(raw)
h1 = root.xpath('//div[@class="headsummary"]/h1')
if h1 and not mi.title:
diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py
index 1a267b6858..3be1f22632 100644
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@@ -6,7 +6,6 @@ Support for reading the metadata from a LIT file.
import cStringIO, os
-from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
def get_metadata(stream):
@@ -16,7 +15,7 @@ def get_metadata(stream):
src = litfile.get_metadata().encode('utf-8')
litfile = litfile._litfile
opf = OPF(cStringIO.StringIO(src), os.getcwd())
- mi = MetaInformation(opf)
+ mi = opf.to_book_metadata()
covers = []
for item in opf.iterguide():
if 'cover' not in item.get('type', '').lower():
diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py
index eae8171362..87b8d3b535 100644
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@@ -108,7 +108,8 @@ def _get_metadata(stream, stream_type, use_libprs_metadata,
base = metadata_from_filename(name, pat=pattern)
if force_read_metadata or is_recipe(name) or prefs['read_file_metadata']:
mi = get_file_type_metadata(stream, stream_type)
- if base.title == os.path.splitext(name)[0] and base.authors is None:
+ if base.title == os.path.splitext(name)[0] and \
+ base.is_null('authors') and base.is_null('isbn'):
# Assume that there was no metadata in the file and the user set pattern
# to match meta info from the file name did not match.
# The regex is meant to match the standard format filenames are written
@@ -181,7 +182,7 @@ def metadata_from_filename(name, pat=None):
mi.isbn = si
except (IndexError, ValueError):
pass
- if not mi.title:
+ if mi.is_null('title'):
mi.title = name
return mi
@@ -194,7 +195,7 @@ def opf_metadata(opfpath):
try:
opf = OPF(f, os.path.dirname(opfpath))
if opf.application_id is not None:
- mi = MetaInformation(opf)
+ mi = opf.to_book_metadata()
if hasattr(opf, 'cover') and opf.cover:
cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
if os.access(cpath, os.R_OK):
diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py
index 408bab828d..30668d70f7 100644
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@@ -404,14 +404,16 @@ class MetadataUpdater(object):
if self.cover_record is not None:
size = len(self.cover_record)
cover = rescale_image(data, size)
- cover += '\0' * (size - len(cover))
- self.cover_record[:] = cover
+ if len(cover) <= size:
+ cover += '\0' * (size - len(cover))
+ self.cover_record[:] = cover
if self.thumbnail_record is not None:
size = len(self.thumbnail_record)
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
- thumbnail += '\0' * (size - len(thumbnail))
- self.thumbnail_record[:] = thumbnail
- return
+ if len(thumbnail) <= size:
+ thumbnail += '\0' * (size - len(thumbnail))
+ self.thumbnail_record[:] = thumbnail
+ return
def set_metadata(stream, mi):
mu = MetadataUpdater(stream)
diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index 2d9de7f780..5c2477c3dc 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
lxml based OPF parser.
'''
-import re, sys, unittest, functools, os, mimetypes, uuid, glob, cStringIO
+import re, sys, unittest, functools, os, mimetypes, uuid, glob, cStringIO, json
from urllib import unquote
from urlparse import urlparse
@@ -16,11 +16,13 @@ from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.constants import __appname__, __version__, filesystem_encoding
from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.metadata import MetaInformation, string_to_authors
+from calibre.ebooks.metadata import string_to_authors, MetaInformation
+from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang
+from calibre import prints
-class Resource(object):
+class Resource(object): # {{{
'''
Represents a resource (usually a file on the filesystem or a URL pointing
to the web. Such resources are commonly referred to in OPF files.
@@ -101,8 +103,9 @@ class Resource(object):
def __repr__(self):
return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
+# }}}
-class ResourceCollection(object):
+class ResourceCollection(object): # {{{
def __init__(self):
self._resources = []
@@ -153,10 +156,9 @@ class ResourceCollection(object):
for res in self:
res.set_basedir(path)
+# }}}
-
-
-class ManifestItem(Resource):
+class ManifestItem(Resource): # {{{
@staticmethod
def from_opf_manifest_item(item, basedir):
@@ -194,8 +196,9 @@ class ManifestItem(Resource):
return self.media_type
raise IndexError('%d out of bounds.'%index)
+# }}}
-class Manifest(ResourceCollection):
+class Manifest(ResourceCollection): # {{{
@staticmethod
def from_opf_manifest_element(items, dir):
@@ -262,7 +265,9 @@ class Manifest(ResourceCollection):
if i.id == id:
return i.mime_type
-class Spine(ResourceCollection):
+# }}}
+
+class Spine(ResourceCollection): # {{{
class Item(Resource):
@@ -334,7 +339,9 @@ class Spine(ResourceCollection):
for i in self:
yield i.path
-class Guide(ResourceCollection):
+# }}}
+
+class Guide(ResourceCollection): # {{{
class Reference(Resource):
@@ -371,6 +378,7 @@ class Guide(ResourceCollection):
self[-1].type = type
self[-1].title = ''
+# }}}
class MetadataField(object):
@@ -412,7 +420,29 @@ class MetadataField(object):
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
obj.set_text(elem, unicode(val))
-class OPF(object):
+
+def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
+ from calibre.utils.config import to_json
+ from calibre.ebooks.metadata.book.json_codec import object_to_unicode
+
+ for name, fm in all_user_metadata.items():
+ try:
+ fm = object_to_unicode(fm)
+ fm = json.dumps(fm, default=to_json, ensure_ascii=False)
+ except:
+ prints('Failed to write user metadata:', name)
+ import traceback
+ traceback.print_exc()
+ continue
+ meta = metadata_elem.makeelement('meta')
+ meta.set('name', 'calibre:user_metadata:'+name)
+ meta.set('content', fm)
+ meta.tail = tail
+ metadata_elem.append(meta)
+
+
+class OPF(object): # {{{
+
MIMETYPE = 'application/oebps-package+xml'
PARSER = etree.XMLParser(recover=True)
NAMESPACES = {
@@ -497,6 +527,43 @@ class OPF(object):
self.guide = Guide.from_opf_guide(guide, basedir) if guide else None
self.cover_data = (None, None)
self.find_toc()
+ self.read_user_metadata()
+
+ def read_user_metadata(self):
+ self._user_metadata_ = {}
+ temp = Metadata('x', ['x'])
+ from calibre.utils.config import from_json
+ elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
+ '"calibre:user_metadata:") and @content]')
+ for elem in elems:
+ name = elem.get('name')
+ name = ':'.join(name.split(':')[2:])
+ if not name or not name.startswith('#'):
+ continue
+ fm = elem.get('content')
+ try:
+ fm = json.loads(fm, object_hook=from_json)
+ temp.set_user_metadata(name, fm)
+ except:
+ prints('Failed to read user metadata:', name)
+ import traceback
+ traceback.print_exc()
+ continue
+ self._user_metadata_ = temp.get_all_user_metadata(True)
+
+ def to_book_metadata(self):
+ ans = MetaInformation(self)
+ for n, v in self._user_metadata_.items():
+ ans.set_user_metadata(n, v)
+ return ans
+
+ def write_user_metadata(self):
+ elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
+ '"calibre:user_metadata:") and @content]')
+ for elem in elems:
+ elem.getparent().remove(elem)
+ serialize_user_metadata(self.metadata,
+ self._user_metadata_)
def find_toc(self):
self.toc = None
@@ -911,6 +978,7 @@ class OPF(object):
return elem
def render(self, encoding='utf-8'):
+ self.write_user_metadata()
raw = etree.tostring(self.root, encoding=encoding, pretty_print=True)
if not raw.lstrip().startswith('\n'%encoding.upper()+raw
@@ -924,18 +992,22 @@ class OPF(object):
val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None):
setattr(self, attr, val)
+ temp = self.to_book_metadata()
+ temp.smart_update(mi, replace_metadata=replace_metadata)
+ self._user_metadata_ = temp.get_all_user_metadata(True)
+# }}}
-class OPFCreator(MetaInformation):
+class OPFCreator(Metadata):
- def __init__(self, base_path, *args, **kwargs):
+ def __init__(self, base_path, other):
'''
Initialize.
@param base_path: An absolute path to the directory in which this OPF file
will eventually be. This is used by the L{create_manifest} method
to convert paths to files into relative paths.
'''
- MetaInformation.__init__(self, *args, **kwargs)
+ Metadata.__init__(self, title='', other=other)
self.base_path = os.path.abspath(base_path)
if self.application_id is None:
self.application_id = str(uuid.uuid4())
@@ -1115,6 +1187,8 @@ class OPFCreator(MetaInformation):
item.set('title', ref.title)
guide.append(item)
+ serialize_user_metadata(metadata, self.get_all_user_metadata(False))
+
root = E.package(
metadata,
manifest,
@@ -1156,7 +1230,7 @@ def metadata_to_opf(mi, as_string=True):
%(id)s%(uuid)s
-
+
'''%dict(a=__appname__, id=mi.application_id, uuid=mi.uuid)))
@@ -1188,7 +1262,7 @@ def metadata_to_opf(mi, as_string=True):
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
if hasattr(mi.pubdate, 'isoformat'):
factory(DC('date'), isoformat(mi.pubdate))
- if mi.category:
+ if hasattr(mi, 'category') and mi.category:
factory(DC('type'), mi.category)
if mi.comments:
factory(DC('description'), mi.comments)
@@ -1217,6 +1291,8 @@ def metadata_to_opf(mi, as_string=True):
if mi.title_sort:
meta('title_sort', mi.title_sort)
+ serialize_user_metadata(metadata, mi.get_all_user_metadata(False))
+
metadata[-1].tail = '\n' +(' '*4)
if mi.cover:
@@ -1334,5 +1410,30 @@ def suite():
def test():
unittest.TextTestRunner(verbosity=2).run(suite())
+def test_user_metadata():
+ from cStringIO import StringIO
+ mi = Metadata('Test title', ['test author1', 'test author2'])
+ um = {
+ '#myseries': { '#value#': u'test series\xe4', 'datatype':'text',
+ 'is_multiple': None, 'name': u'My Series'},
+ '#myseries_index': { '#value#': 2.45, 'datatype': 'float',
+ 'is_multiple': None},
+ '#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
+ 'is_multiple': '|', 'name': u'My Tags'}
+ }
+ mi.set_all_user_metadata(um)
+ raw = metadata_to_opf(mi)
+ opfc = OPFCreator(os.getcwd(), other=mi)
+ out = StringIO()
+ opfc.render(out)
+ raw2 = out.getvalue()
+ f = StringIO(raw)
+ opf = OPF(f)
+ f2 = StringIO(raw2)
+ opf2 = OPF(f2)
+ assert um == opf._user_metadata_
+ assert um == opf2._user_metadata_
+ print opf.render()
+
if __name__ == '__main__':
- test()
+ test_user_metadata()
diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py
index d116ec30fb..ad41125575 100644
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@@ -125,7 +125,7 @@ def create_metadata(stream, options):
au = u', '.join(au)
author = au.encode('ascii', 'ignore')
md += r'{\author %s}'%(author,)
- if options.category:
+ if options.get('category', None):
category = options.category.encode('ascii', 'ignore')
md += r'{\category %s}'%(category,)
comp = options.comment if hasattr(options, 'comment') else options.comments
@@ -180,7 +180,7 @@ def set_metadata(stream, options):
src = pat.sub(r'{\\author ' + author + r'}', src)
else:
src = add_metadata_item(src, 'author', author)
- category = options.category
+ category = options.get('category', None)
if category != None:
category = category.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 231ad51eee..cf15e3e29c 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -184,13 +184,14 @@ class MobiMLizer(object):
elif tag in NESTABLE_TAGS and istate.rendered:
para = wrapper = bstate.nested[-1]
elif left > 0 and indent >= 0:
+ ems = self.profile.mobi_ems_per_blockquote
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
para = wrapper
- emleft = int(round(left / self.profile.fbase)) - 1
+ emleft = int(round(left / self.profile.fbase)) - ems
emleft = min((emleft, 10))
- while emleft > 0:
+ while emleft > ems/2.0:
para = etree.SubElement(para, XHTML('blockquote'))
- emleft -= 1
+ emleft -= ems
else:
para = wrapper = etree.SubElement(parent, XHTML('p'))
bstate.inline = bstate.para = para
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 2a35c7cb45..dbe6854006 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -234,7 +234,7 @@ class MobiReader(object):
self.debug = debug
self.embedded_mi = None
self.base_css_rules = textwrap.dedent('''
- blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
+ blockquote { margin: 0em 0em 0em 2em; text-align: justify }
p { margin: 0em; text-align: justify }
@@ -441,7 +441,7 @@ class MobiReader(object):
html.tostring(elem, encoding='utf-8') + ''
stream = cStringIO.StringIO(raw)
opf = OPF(stream)
- self.embedded_mi = MetaInformation(opf)
+ self.embedded_mi = opf.to_book_metadata()
if guide is not None:
for ref in guide.xpath('descendant::reference'):
if 'cover' in ref.get('type', '').lower():
diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index 37936d6016..9105890d44 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -15,7 +15,6 @@ from struct import pack
import time
from urlparse import urldefrag
-from PIL import Image
from cStringIO import StringIO
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS
@@ -28,6 +27,7 @@ from calibre.ebooks.oeb.base import namespace
from calibre.ebooks.oeb.base import prefixname
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.compression.palmdoc import compress_doc
+from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
INDEXING = True
FCIS_FLIS = True
@@ -111,46 +111,35 @@ def align_block(raw, multiple=4, pad='\0'):
return raw + pad*(multiple - extra)
def rescale_image(data, maxsizeb, dimen=None):
- image = Image.open(StringIO(data))
- format = image.format
- changed = False
- if image.format not in ('JPEG', 'GIF'):
- width, height = image.size
- area = width * height
- if area <= 40000:
- format = 'GIF'
- else:
- image = image.convert('RGBA')
- format = 'JPEG'
- changed = True
if dimen is not None:
- image.thumbnail(dimen, Image.ANTIALIAS)
- changed = True
- if changed:
- data = StringIO()
- image.save(data, format)
- data = data.getvalue()
+ data = thumbnail(data, width=dimen, height=dimen)[-1]
+ else:
+ # Replace transparent pixels with white pixels and convert to JPEG
+ data = save_cover_data_to(data, 'img.jpg', return_data=True)
if len(data) <= maxsizeb:
return data
- image = image.convert('RGBA')
- for quality in xrange(95, -1, -1):
- data = StringIO()
- image.save(data, 'JPEG', quality=quality)
- data = data.getvalue()
- if len(data) <= maxsizeb:
- return data
- width, height = image.size
- for scale in xrange(99, 0, -1):
- scale = scale / 100.
- data = StringIO()
- scaled = image.copy()
- size = (int(width * scale), (height * scale))
- scaled.thumbnail(size, Image.ANTIALIAS)
- scaled.save(data, 'JPEG', quality=0)
- data = data.getvalue()
- if len(data) <= maxsizeb:
- return data
- # Well, we tried?
+ orig_data = data
+ img = Image()
+ quality = 95
+
+ img.load(data)
+ while len(data) >= maxsizeb and quality >= 10:
+ quality -= 5
+ img.set_compression_quality(quality)
+ data = img.export('jpg')
+ if len(data) <= maxsizeb:
+ return data
+ orig_data = data
+
+ scale = 0.9
+ while len(data) >= maxsizeb and scale >= 0.05:
+ img = Image()
+ img.load(orig_data)
+ w, h = img.size
+ img.size = (int(scale*w), int(scale*h))
+ img.set_compression_quality(quality)
+ data = img.export('jpg')
+ scale -= 0.05
return data
class Serializer(object):
@@ -1796,12 +1785,13 @@ class MobiWriter(object):
self._oeb.log.debug('Index records dumped to', t)
def _clean_text_value(self, text):
- if not text:
- text = u'(none)'
- text = text.strip()
- if not isinstance(text, unicode):
- text = text.decode('utf-8', 'replace')
- text = text.encode('ascii','replace')
+ if text is not None and text.strip() :
+ text = text.strip()
+ if not isinstance(text, unicode):
+ text = text.decode('utf-8', 'replace')
+ text = text.encode('utf-8')
+ else :
+ text = "(none)".encode('utf-8')
return text
def _add_to_ctoc(self, ctoc_str, record_offset):
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index b5f61db3ac..e85098e293 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -654,8 +654,6 @@ class Metadata(object):
if predicate(x):
l.remove(x)
-
-
def __getitem__(self, key):
return self.items[key]
diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py
index d7d7bbf725..0f61969373 100644
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@@ -126,24 +126,29 @@ class OEBReader(object):
def _metadata_from_opf(self, opf):
from calibre.ebooks.metadata.opf2 import OPF
- from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
stream = cStringIO.StringIO(etree.tostring(opf))
- mi = MetaInformation(OPF(stream))
+ mi = OPF(stream).to_book_metadata()
if not mi.language:
mi.language = get_lang().replace('_', '-')
self.oeb.metadata.add('language', mi.language)
- if not mi.title:
- mi.title = self.oeb.translate(__('Unknown'))
- if not mi.authors:
- mi.authors = [self.oeb.translate(__('Unknown'))]
if not mi.book_producer:
- mi.book_producer = '%(a)s (%(v)s) [http://%(a)s.kovidgoyal.net]'%\
+ mi.book_producer = '%(a)s (%(v)s) [http://%(a)s-ebook.com]'%\
dict(a=__appname__, v=__version__)
meta_info_to_oeb_metadata(mi, self.oeb.metadata, self.logger)
- self.oeb.metadata.add('identifier', str(uuid.uuid4()), id='uuid_id',
- scheme='uuid')
+ m = self.oeb.metadata
+ m.add('identifier', str(uuid.uuid4()), id='uuid_id', scheme='uuid')
self.oeb.uid = self.oeb.metadata.identifier[-1]
+ if not m.title:
+ m.add('title', self.oeb.translate(__('Unknown')))
+ has_aut = False
+ for x in m.creator:
+ if getattr(x, 'role', '').lower() in ('', 'aut'):
+ has_aut = True
+ break
+ if not has_aut:
+ m.add('creator', self.oeb.translate(__('Unknown')), role='aut')
+
def _manifest_prune_invalid(self):
'''
diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py
index 532c9bbc03..6d4c65c2fb 100644
--- a/src/calibre/ebooks/oeb/transforms/cover.py
+++ b/src/calibre/ebooks/oeb/transforms/cover.py
@@ -31,12 +31,14 @@ class CoverManager(object):
-
+