Merge from trunk

This commit is contained in:
Charles Haley 2010-10-01 08:34:17 +01:00
commit a59c989d58
179 changed files with 45391 additions and 27387 deletions

View File

@ -4,6 +4,99 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
- version: 0.7.20
date: 2010-09-24
new features:
- title: "Tweak epub feature."
type: major
description: >
"Now you can conveniently browse the contents of an epub, tweak them and rebuild the epub within your calibre library
by right clicking on the book and selecting Tweak ePub. See http://www.mobileread.com/forums/showthread.php?t=99875
for details."
- title: "Add button to Edit metadata dialog to trim borders from the cover"
- title: "Kobo driver: Add support for setting the ReadStatus to Read and correctly deal with empty collections"
- title: "Improved algorithm for removal of hyphens during pre-processing"
- title: "EPUB metadata: Don't read timestamp value from epubs as I am sick of closing bugs about adding books and having the Date not be today."
- title: "After bulk edit metadata, reselect previously selected books."
bug fixes:
- title: "Fix regression in 0.7.19 that broke the By Author and By Title category listing in Stanza/Aldiko feeds."
- title: "MOBI Output: Fix regression that broke sections list in downloaded periodicals on Kindle for non-english news sources"
- title: "News download: Rationalize cover processing."
tickets: [6852]
- title: "Cover cache: load images only in the GUI thread to prevent stale files being leftover by set_path due to Windows file locking"
- title: "Database: Make renaming of folders on case change more robust"
tickets: [6914]
- title: "When adding/replacing files to/in EPUB files, set the GPF bit for all files in the archive, to prevent unzip from complaining in linux"
tickets: [6363]
- title: "Plugin loading: Handle encoding declarations in .py files correctly"
- title: "MOBI input: Another corner case"
tickets: [6909]
- title: "IPC: Store results file in the calibre temp dir and also dont die if for some reason removing result file fails. Should make adding/saving more robust"
- title: "Database: Fix regression that caused has_cover to create empty directories unneccessarily"
- title: "Detection of Alex on unix"
tickets: [5900]
- title: "News download: Don't add inline table of contents when downloading news for the Kindle"
- title: "Add prologue and epilogue to default chapter detection regex"
- title: "Kobo driver: Fix issue where books that are read were getting their status reset to Unread"
- title: "Device drivers: Fix occassional false positive when matching books on device with books in the calibre library"
- title: "Content server: Making serving of large files more efficient."
- title: "GUI device detection: Handle case when user yanks connected device before device connection handler is called."
tickets: [6864]
- title: "Strip leading/trailing whitespace when setting metadata using the edit metadata dialog"
tickets: [6854]
- title: "KOBO: Editing the Im_Reading list with SD Card installed fixed"
tickets: [6850]
new recipes:
- title: "Neal's Nuze and Popular Science"
author: Tony Stegall
- title: "Rmf24.pl"
author: "Tomasz Dlugosz"
- title: "Gazeta Pomorska"
author: "Richard"
- title: "Le Journal de Montreal and superesportes"
author: "Luciano Furtado"
- title: "The Marker"
author: Marbs
- title: "Tagesanzeiger"
author: noxxx
improved recipes:
- Danas
- Harvard Business Review
- version: 0.7.19
date: 2010-09-17
@ -61,6 +154,7 @@
- title: "PDB Input: Fix bug in conversion of TOC in some PML files"
new recipes:
- title: "taz.de RSS"
author: Alexander Schremmer
@ -272,7 +366,7 @@
new features:
- title: "Multiple library support: Various improvements to make using multiple calibre libraries easier."
type: major
desc: >
description: >
"Now, when you switch libraries using the Choose Library button on the toolbar, entries are created in the menu of that button to easily switch to that library in the
future. Also, you can now right click on a book in the calibre library and use the 'Copy to library' action to copy the book to another library,
that you have switched to at least once. The name of the current library is shown in the titlebar.
@ -280,7 +374,7 @@
- title: "Content server: Allow setting a restriction so that the server shares only some of the books in the library."
type: major
desc: >
description: >
"You can now use a Saved Search as a restiction for the content server, via Preferences->Content Server. This will cause the
server to share only those books that match the saved search.
"

4339
imgsrc/console.svg Normal file

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 113 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 53 KiB

300
imgsrc/plugboard.svg Normal file
View File

@ -0,0 +1,300 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="128.00171"
height="128.00175"
id="svg2"
sodipodi:version="0.32"
inkscape:version="0.48.0 r9654"
sodipodi:docname="plugboard.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
inkscape:export-filename="C:\Dokumente und Einstellungen\Appel\Desktop\PlugboardIcon\plugboard2.png"
inkscape:export-xdpi="72.0466"
inkscape:export-ydpi="72.0466"
version="1.1">
<defs
id="defs4">
<linearGradient
id="linearGradient3176">
<stop
style="stop-color:#3a78be;stop-opacity:1;"
offset="0"
id="stop3178" />
<stop
style="stop-color:#6f9afa;stop-opacity:1;"
offset="1"
id="stop3180" />
</linearGradient>
<linearGradient
id="linearGradient3168">
<stop
style="stop-color:#3a78be;stop-opacity:1;"
offset="0"
id="stop3170" />
<stop
style="stop-color:#6f9afa;stop-opacity:1;"
offset="1"
id="stop3172" />
</linearGradient>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3168"
id="linearGradient3174"
x1="386.89221"
y1="703.53375"
x2="386.89221"
y2="252.50571"
gradientUnits="userSpaceOnUse" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3176"
id="linearGradient3182"
x1="387.41043"
y1="501.67398"
x2="387.41043"
y2="252.02386"
gradientUnits="userSpaceOnUse" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3176"
id="linearGradient3035"
gradientUnits="userSpaceOnUse"
x1="387.41043"
y1="501.67398"
x2="387.41043"
y2="252.02386" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3168"
id="linearGradient3037"
gradientUnits="userSpaceOnUse"
x1="386.89221"
y1="703.53375"
x2="386.89221"
y2="252.50571" />
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="1.0508882"
inkscape:cx="-90.42008"
inkscape:cy="71.977333"
inkscape:document-units="px"
inkscape:current-layer="layer1"
showguides="true"
inkscape:guide-bbox="true"
inkscape:window-width="1280"
inkscape:window-height="979"
inkscape:window-x="0"
inkscape:window-y="33"
showgrid="false"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:window-maximized="0">
<sodipodi:guide
orientation="vertical"
position="-153.50258,-506.94648"
id="guide3191" />
<sodipodi:guide
orientation="vertical"
position="274.4401,-506.94648"
id="guide3193" />
<sodipodi:guide
orientation="horizontal"
position="-323.06477,409.4968"
id="guide3195" />
<sodipodi:guide
orientation="horizontal"
position="-323.06477,211.67424"
id="guide3197" />
<sodipodi:guide
orientation="horizontal"
position="-323.06477,9.814489"
id="guide3199" />
</sodipodi:namedview>
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Ebene 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-323.06477,-417.41394)">
<g
id="g3015"
transform="matrix(0.20679483,0,0,0.21391708,307.0229,378.43143)">
<g
transform="translate(3.581054,-461.3231)"
id="g5213">
<path
sodipodi:type="arc"
style="fill:none;stroke:#ffffff;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path4201"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
transform="translate(41.899029,623.49247)" />
<path
sodipodi:type="arc"
style="fill:none;stroke:#ffffff;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path4203"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
transform="translate(41.899029,821.58422)" />
<path
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
sodipodi:ry="82.08963"
sodipodi:rx="82.08963"
sodipodi:cy="125.15305"
sodipodi:cx="129.19025"
id="path4205"
style="fill:none;stroke:#ffffff;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
sodipodi:type="arc"
transform="translate(41.899029,1019.6759)" />
<path
sodipodi:type="arc"
style="fill:none;stroke:#ffffff;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path4209"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
transform="translate(466.69074,623.49247)" />
<path
sodipodi:type="arc"
style="fill:none;stroke:#ffffff;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path4211"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
transform="translate(466.69074,821.58422)" />
<path
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
sodipodi:ry="82.08963"
sodipodi:rx="82.08963"
sodipodi:cy="125.15305"
sodipodi:cx="129.19025"
id="path4213"
style="fill:none;stroke:#ffffff;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
sodipodi:type="arc"
transform="translate(466.69074,1019.6759)" />
<path
style="fill:none;stroke:#ffffff;stroke-width:50;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
d="m 168.66696,746.8288 152.06768,0 2.85473,123.09483 c 56.19521,-0.39952 52.33668,51.2956 53.28826,76.46761 1.35196,23.3996 12.75809,72.52216 -57.09457,73.61286 l 0.95158,127.8527 277.22073,0"
id="path4215"
sodipodi:nodetypes="ccccccc"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#ffffff;stroke-width:50;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
d="m 168.66696,945.99709 278.56646,0 0,-199.65012 151.75839,0"
id="path4217"
inkscape:connector-curvature="0" />
</g>
<g
id="g3012">
<path
sodipodi:type="arc"
style="fill:none;stroke:#3a78be;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path2160"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
transform="translate(45.480079,154.16937)" />
</g>
<path
transform="translate(45.480079,352.26112)"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
sodipodi:ry="82.08963"
sodipodi:rx="82.08963"
sodipodi:cy="125.15305"
sodipodi:cx="129.19025"
id="path3140"
style="fill:none;stroke:#3a78be;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
sodipodi:type="arc" />
<path
transform="translate(45.480079,550.35281)"
sodipodi:type="arc"
style="fill:none;stroke:#3a78be;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path3142"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z" />
<path
transform="translate(470.27179,154.16937)"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
sodipodi:ry="82.08963"
sodipodi:rx="82.08963"
sodipodi:cy="125.15305"
sodipodi:cx="129.19025"
id="path3151"
style="fill:none;stroke:#3a78be;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
sodipodi:type="arc" />
<path
transform="translate(470.27179,352.26112)"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z"
sodipodi:ry="82.08963"
sodipodi:rx="82.08963"
sodipodi:cy="125.15305"
sodipodi:cx="129.19025"
id="path3153"
style="fill:none;stroke:#3a78be;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
sodipodi:type="arc" />
<path
transform="translate(470.27179,550.35281)"
sodipodi:type="arc"
style="fill:none;stroke:#3a78be;stroke-width:30;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="path3155"
sodipodi:cx="129.19025"
sodipodi:cy="125.15305"
sodipodi:rx="82.08963"
sodipodi:ry="82.08963"
d="m 211.27988,125.15305 c 0,45.33685 -36.75278,82.08963 -82.08963,82.08963 -45.336854,0 -82.089634,-36.75278 -82.089634,-82.08963 0,-45.336848 36.75278,-82.089627 82.089634,-82.089627 45.33685,0 82.08963,36.752779 82.08963,82.089627 z" />
<path
id="path3203"
d="m 172.24801,476.67399 278.56646,0 0,-199.65012 151.75839,0"
style="fill:none;stroke:url(#linearGradient3035);stroke-width:50;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
inkscape:connector-curvature="0" />
<path
sodipodi:nodetypes="ccccccc"
id="path3201"
d="m 172.24801,277.5057 152.06768,0 2.85473,123.09483 c 45.72787,0.55206 48.53038,48.44087 53.28826,76.46761 -1.50277,23.3996 -4.37028,72.52219 -57.09457,73.61289 l 0.95158,127.85271 277.22073,0"
style="fill:none;stroke:url(#linearGradient3037);stroke-width:50;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -54,19 +54,53 @@ function render_book(book) {
formats = book.attr("formats").split(",");
if (formats.length > 0) {
for (i=0; i < formats.length; i++) {
title += '<a title="Download in '+formats[i]+' format" class="format" href="'+format_url(formats[i], id, book.attr("title"))+'">'+formats[i]+'</a>, ';
title += '<a title="Download in '+formats[i]+' format" class="format" href="'+format_url(formats[i], id, book.attr("safe_title"))+'">'+formats[i]+'</a>, ';
}
title = title.slice(0, title.length-2);
title += '&nbsp;({0}&nbsp;MB)&nbsp;'.format(size);
}
if (tags) title += 'Tags=[{0}] '.format(tags);
title += '<span class="tagdata_short" style="display:all">'
if (tags) {
t = tags.split(':&:', 2);
m = parseInt(t[0]);
tall = t[1].split(',');
t = t[1].split(',', m);
if (tall.length > m) t[m] = '...'
title += 'Tags=[{0}] '.format(t.join(','));
}
custcols = book.attr("custcols").split(',')
for ( i = 0; i < custcols.length; i++) {
if (custcols[i].length > 0) {
vals = book.attr(custcols[i]).split(':#:', 2);
if (vals[0].indexOf('#T#') == 0) { //startswith
vals[0] = vals[0].substr(3, vals[0].length)
t = vals[1].split(':&:', 2);
m = parseInt(t[0]);
t = t[1].split(',', m);
if (t.length == m) t[m] = '...';
vals[1] = t.join(',');
}
title += '{0}=[{1}] '.format(vals[0], vals[1]);
}
}
title += '</span>'
title += '<span class="tagdata_long" style="display:none">'
if (tags) {
t = tags.split(':&:', 2);
title += 'Tags=[{0}] '.format(t[1]);
}
custcols = book.attr("custcols").split(',')
for ( i = 0; i < custcols.length; i++) {
if (custcols[i].length > 0) {
vals = book.attr(custcols[i]).split(':#:', 2);
if (vals[0].indexOf('#T#') == 0) { //startswith
vals[0] = vals[0].substr(3, vals[0].length)
vals[1] = (vals[1].split(':&:', 2))[1];
}
title += '{0}=[{1}] '.format(vals[0], vals[1]);
}
}
title += '</span>'
title += '<img style="display:none" alt="" src="get/cover/{0}" /></span>'.format(id);
title += '<div class="comments">{0}</div>'.format(comments)
// Render authors cell
@ -170,11 +204,15 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
var cover = row.find('img').attr('src');
var collapsed = row.find('.comments').css('display') == 'none';
$("#book_list tbody tr * .comments").css('display', 'none');
$("#book_list tbody tr * .tagdata_short").css('display', 'inherit');
$("#book_list tbody tr * .tagdata_long").css('display', 'none');
$('#cover_pane').css('visibility', 'hidden');
if (collapsed) {
row.find('.comments').css('display', 'inherit');
$('#cover_pane img').attr('src', cover);
$('#cover_pane').css('visibility', 'visible');
row.find(".tagdata_short").css('display', 'none');
row.find(".tagdata_long").css('display', 'inherit');
}
});

View File

@ -93,6 +93,37 @@ save_template_title_series_sorting = 'library_order'
auto_connect_to_folder = ''
# Specify renaming rules for sony collections. Collections on Sonys are named
# depending upon whether the field is standard or custom. A collection derived
# from a standard field is named for the value in that field. For example, if
# the standard 'series' column contains the name 'Darkover', then the series
# will be named 'Darkover'. A collection derived from a custom field will have
# the name of the field added to the value. For example, if a custom series
# column named 'My Series' contains the name 'Darkover', then the collection
# will be named 'Darkover (My Series)'. If two books have fields that generate
# the same collection name, then both books will be in that collection. This
# tweak lets you specify for a standard or custom field the value to be put
# inside the parentheses. You can use it to add a parenthetical description to a
# standard field, for example 'Foo (Tag)' instead of the 'Foo'. You can also use
# it to force multiple fields to end up in the same collection. For example, you
# could force the values in 'series', '#my_series_1', and '#my_series_2' to
# appear in collections named 'some_value (Series)', thereby merging all of the
# fields into one set of collections. The syntax of this tweak is
# {'field_lookup_name':'name_to_use', 'lookup_name':'name', ...}
# Example 1: I want three series columns to be merged into one set of
# collections. If the column lookup names are 'series', '#series_1' and
# '#series_2', and if I want nothing in the parenthesis, then the value to use
# in the tweak value would be:
# sony_collection_renaming_rules={'series':'', '#series_1':'', '#series_2':''}
# Example 2: I want the word '(Series)' to appear on collections made from
# series, and the word '(Tag)' to appear on collections made from tags. Use:
# sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
# Example 3: I want 'series' and '#myseries' to be merged, and for the
# collection name to have '(Series)' appended. The renaming rule is:
# sony_collection_renaming_rules={'series':'Series', '#myseries':'Series'}
sony_collection_renaming_rules={}
# Create search terms to apply a query across several built-in search terms.
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
# Example: create the term 'myseries' that when used as myseries:foo would
@ -114,6 +145,24 @@ add_new_book_tags_when_importing_books = False
# Set the maximum number of tags to show per book in the content server
max_content_server_tags_shown=5
# Set custom metadata fields that the content server will or will not display.
# content_server_will_display is a list of custom fields to be displayed.
# content_server_wont_display is a list of custom fields not to be displayed.
# wont_display has priority over will_display.
# The special value '*' means all custom fields.
# Defaults:
# content_server_will_display = ['*']
# content_server_wont_display = ['']
# Examples:
# To display only the custom fields #mytags and #genre:
# content_server_will_display = ['#mytags', '#genre']
# content_server_wont_display = ['']
# To display all fields except #mycomments:
# content_server_will_display = ['*']
# content_server_wont_display['#mycomments']
content_server_will_display = ['*']
content_server_wont_display = ['']
# Set the maximum number of sort 'levels' that calibre will use to resort the
# library after certain operations such as searches or device insertion. Each

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 652 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 922 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 334 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -1,9 +1,22 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Tony Stegall'
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__ = '1.03'
__date__ = '27, September 2010'
__docformat__ = 'restructuredtext en'
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
now = datetime.datetime.now()
title = 'The AJC'
language = 'en'
timefmt = ' [%a,%d %B %Y %I:%M %p]'
__author__ = 'TonytheBookworm'
language = 'en'
description = 'News from Atlanta and USA'
publisher = 'The Atlanta Journal'
category = 'news, politics, USA'
@ -13,10 +26,14 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
h1.articleHeadline{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2.articleSubheadline{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p.byline{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
p.organization{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
@ -71,9 +88,11 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'),
]
def postprocess_html(self, soup, first):
for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}):
credit_tag.name ='p'
credit_tag.extract()
return soup

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Tony Stegall'
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__ = '1.04'
__date__ = '27, September 2010'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'Nealz Nuze'
language = 'en'
__author__ = 'TonytheBookworm'
description = 'Neal Boortz Show Radio Notes'
publisher = 'Neal Boortz'
category = 'news, politics, USA, talkshow'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
use_embedded_content = True
masthead_url = 'http://boortz.com/images/nuze_logo.gif'
conversion_options = {'linearize_tables' : True}
feeds = [
('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml')
]

View File

@ -26,7 +26,7 @@ class AdvancedUserRecipe1278162597(BasicNewsRecipe):
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh-cn'
language = 'zh_CN'
encoding = 'gb2312'
conversion_options = {'linearize_tables':True}

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>'
'''
cinebel.be
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Cinebel(BasicNewsRecipe):
title = u'Cinebel'
__author__ = u'Lionel Bergeret'
description = u'Cinema news from Belgium in French'
publisher = u'cinebel.be'
category = 'news, cinema, movie, Belgium'
oldest_article = 3
encoding = 'utf8'
language = 'fr_BE'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%d %b %Y]'
keep_only_tags = [
dict(name = 'span', attrs = {'class': 'movieMainTitle'})
,dict(name = 'div', attrs = {'id': 'filmPoster'})
,dict(name = 'div', attrs = {'id': 'filmDefinition'})
,dict(name = 'div', attrs = {'id': 'synopsis'})
]
feeds = [
(u'Les sorties de la semaine' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=0' )
,(u'Top 10' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2' )
]
def get_cover_url(self):
cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif'
return cover_url

View File

@ -51,8 +51,14 @@ class Danas(BasicNewsRecipe):
preprocess_regexps = [
(re.compile(u'\u0110'), lambda match: u'\u00D0')
,(re.compile(u'\u201c'), lambda match: '"')
,(re.compile(u'\u201e'), lambda match: '"')
,(re.compile(u'\u2018'), lambda match: '&lsquo;') # left single quotation mark
,(re.compile(u'\u2019'), lambda match: '&rsquo;') # right single quotation mark
,(re.compile(u'\u201a'), lambda match: '&lsquo;') # single low-9 quotation mark
,(re.compile(u'\u201b'), lambda match: '&rsquo;') # single high-reversed-9 quotation mark
,(re.compile(u'\u201c'), lambda match: '&ldquo;') # left double quotation mark
,(re.compile(u'\u201d'), lambda match: '&rdquo;') # right double quotation mark
,(re.compile(u'\u201e'), lambda match: '&ldquo;') # double low-9 quotation mark
,(re.compile(u'\u201f'), lambda match: '&rdquo;') # double high-reversed-9 quotation mark
]
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
@ -90,6 +96,8 @@ class Danas(BasicNewsRecipe):
,(u'Vostani Serbie' , u'http://www.danas.rs/rss/rss.asp?column_id=57')
,(u'Med&Jad-a' , u'http://www.danas.rs/rss/rss.asp?column_id=58')
,(u'Svetlosti pozornice' , u'http://www.danas.rs/rss/rss.asp?column_id=59')
,(u'Dva cvancika' , u'http://www.danas.rs/rss/rss.asp?column_id=65')
,(u'Iz kornera' , u'http://www.danas.rs/rss/rss.asp?column_id=64')
]
def preprocess_html(self, soup):

View File

@ -0,0 +1,39 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>'
'''
dhnet.be
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class DHNetBe(BasicNewsRecipe):
title = u'La Derniere Heure'
__author__ = u'Lionel Bergeret'
description = u'News from Belgium in French'
publisher = u'dhnet.be'
category = 'news, Belgium'
oldest_article = 3
language = 'fr_BE'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%d %b %Y]'
keep_only_tags = [
dict(name = 'div', attrs = {'id': 'articleText'})
,dict(name = 'div', attrs = {'id': 'articlePicureAndLinks'})
]
feeds = [
(u'La Une' , u'http://www.dhnet.be/rss' )
,(u'La Une Sports' , u'http://www.dhnet.be/rss/dhsports/' )
,(u'La Une Info' , u'http://www.dhnet.be/rss/dhinfos/' )
]
def get_cover_url(self):
cover_url = strftime('http://pdf-online.dhnet.be/pdfonline/image/%Y%m%d/dh_%Y%m%d_nam_infoge_001.pdf.L.jpg')
return cover_url

View File

@ -0,0 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'How To Geek'
language = 'en'
__author__ = 'TonytheBookworm'
description = 'Daily Computer Tips and Tricks'
publisher = 'Howtogeek'
category = 'PC,tips,tricks'
oldest_article = 2
max_articles_per_feed = 100
linearize_tables = True
no_stylesheets = True
remove_javascript = True
masthead_url = 'http://blog.stackoverflow.com/wp-content/uploads/how-to-geek-logo.png'
remove_tags =[dict(name='a', attrs={'target':['_blank']}),
dict(name='table', attrs={'id':['articleTable']}),
dict(name='div', attrs={'class':['feedflare']}),
]
feeds = [
('Tips', 'http://feeds.howtogeek.com/howtogeek')
]

View File

@ -11,6 +11,7 @@ class AdvancedUserRecipe1283666183(BasicNewsRecipe):
title = u'Journal Gazette Ft. Wayne IN'
__author__ = 'cynvision'
oldest_article = 1
language = 'en'
max_articles_per_feed = 8
no_stylesheets = True
remove_javascript = True

View File

@ -0,0 +1,57 @@
from calibre.web.feeds.news import BasicNewsRecipe
class JerusalemPost(BasicNewsRecipe):
title = 'Jerusalem post'
language = 'fr'
__author__ = 'TonytheBookworm'
description = 'The Jerusalem Post (in French)'
publisher = 'jpost'
category = 'news'
oldest_article = 30
max_articles_per_feed = 100
linearize_tables = True
no_stylesheets = True
remove_javascript = True
masthead_url = 'http://static.jpost.com/JPSITES/images/JFrench/2008/site/jplogo.JFrench.gif'
remove_tags = [
dict(name='a', attrs={'href':['javascript:window.print()']}),
dict(name='div', attrs={'class':['bot']}),
]
feeds = [
('NEWS', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762036&pagename=JFrench%2FPage%2FRSS'),
('JFrench En route vers la paix', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762201&pagename=JFrench%2FPage%2FRSS'),
('JFrench Politique', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737334&pagename=JFrench%2FPage%2FRSS'),
('JFrench Securite', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737338&pagename=JFrench%2FPage%2FRSS'),
('JFrench Moyen Orient', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737342&pagename=JFrench%2FPage%2FRSS'),
('JFrench Diplomatie / Monde', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737346&pagename=JFrench%2FPage%2FRSS'),
('JFrench Economie / Sciences', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737358&pagename=JFrench%2FPage%2FRSS'),
('JFrench Societe', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737354&pagename=JFrench%2FPage%2FRSS'),
('JFrench Opinions', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737350&pagename=JFrench%2FPage%2FRSS'),
('JFrench Monde juif', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737366&pagename=JFrench%2FPage%2FRSS'),
('JFrench Culture / Sport', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737362&pagename=JFrench%2FPage%2FRSS')
]
def print_version(self, url):
split1 = url.split("cid=")
#for testing only -------
#print 'SPLIT IS: ', split1
#print 'ORG URL IS: ', url
#---------------------------
idnum = split1[1] # get the actual value of the id article
#for testing only --------------------
#print 'the idnum is: ', idnum
#--------------------------------------
print_url = 'http://fr.jpost.com/servlet/Satellite?cid=' + idnum + '&pagename=JFrench%2FJPArticle%2FPrinter'
#for testing only -------------------------
#print 'PRINT URL IS: ', print_url
#------------------------------------------
return print_url
#example of how links should be formated
#--------------------------------------------------------------------------------------------------------------
#org version = http://fr.jpost.com/servlet/Satellite?pagename=JFrench/JPArticle/ShowFull&cid=1282804806075
#print version = http://fr.jpost.com/servlet/Satellite?cid=1282804806075&pagename=JFrench%2FJPArticle%2FPrinter
#------------------------------------------------------------------------------------------------------------------

View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>'
'''
lalibre.be
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LaLibre(BasicNewsRecipe):
title = u'La Libre Belgique'
__author__ = u'Lionel Bergeret'
description = u'News from Belgium in French'
publisher = u'lalibre.be'
category = 'news, Belgium'
oldest_article = 3
language = 'fr_BE'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%d %b %Y]'
keep_only_tags = [
dict(name = 'div', attrs = {'id': 'articleHat'})
,dict(name = 'p', attrs = {'id': 'publicationDate'})
,dict(name = 'div', attrs = {'id': 'articleText'})
]
feeds = [
(u'L\'actu' , u'http://www.lalibre.be/rss/?section=10' )
,(u'Culture' , u'http://www.lalibre.be/rss/?section=5' )
,(u'Economie' , u'http://www.lalibre.be/rss/?section=3' )
,(u'Libre Entreprise' , u'http://www.lalibre.be/rss/?section=904' )
,(u'Sports' , u'http://www.lalibre.be/rss/?section=2' )
,(u'Societe' , u'http://www.lalibre.be/rss/?section=12' )
]
def get_cover_url(self):
cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg')
return cover_url

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>'
'''
lameuse.be
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LaMeuse(BasicNewsRecipe):
title = u'La Meuse'
__author__ = u'Lionel Bergeret'
description = u'News from Belgium in French'
publisher = u'lameuse.be'
category = 'news, Belgium'
oldest_article = 3
encoding = 'utf8'
language = 'fr_BE'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%d %b %Y]'
keep_only_tags = [
dict(name = 'div', attrs = {'id': 'article'})
]
remove_tags = [
dict(name = 'div', attrs = {'class': 'sb-group'})
,dict(name = 'div', attrs = {'id': 'share'})
,dict(name = 'div', attrs = {'id': 'commentaires'})
]
feeds = [
(u'Actualite', u'http://www.lameuse.be/services/fils_rss/actualite/index.xml' )
,(u'Belgique', u'http://www.lameuse.be/services/fils_rss/actualite/belgique/index.xml' )
,(u'Monde', u'http://www.lameuse.be/services/fils_rss/actualite/monde/index.xml' )
,(u'Societe', u'http://www.lameuse.be/services/fils_rss/actualite/societe/index.xml' )
,(u'Faits Divers', u'http://www.lameuse.be/services/fils_rss/actualite/faits_divers/index.xml' )
,(u'Economie', u'http://www.lameuse.be/services/fils_rss/actualite/economie/index.xml' )
,(u'Science', u'http://www.lameuse.be/services/fils_rss/actualite/science/index.xml' )
,(u'Sante', u'http://www.lameuse.be/services/fils_rss/actualite/sante/index.xml' )
,(u'Insolite', u'http://www.lameuse.be/services/fils_rss/magazine/insolite/index.xml' )
,(u'Cinema', u'http://www.lameuse.be/services/fils_rss/culture/cinema/index.xml' )
,(u'Musique', u'http://www.lameuse.be/services/fils_rss/culture/musique/index.xml' )
,(u'Livres', u'http://www.lameuse.be/services/fils_rss/culture/livres/index.xml' )
]
def get_cover_url(self):
cover_url = strftime('http://pdf.lameuse.be/pdf/lameuse_%Y-%m-%d_LIEG_ACTUALITE_1.PDF')
return cover_url

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>'
'''
lavenir.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LAvenir(BasicNewsRecipe):
title = u'L\'Avenir'
__author__ = u'Lionel Bergeret'
description = u'News from Belgium in French'
publisher = u'lavenir.net'
category = 'news, Belgium'
oldest_article = 3
encoding = 'utf8'
language = 'fr_BE'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%d %b %Y]'
keep_only_tags = [
dict(name = 'div', attrs = {'class': 'photo'})
,dict(name = 'p', attrs = {'class': 'intro'})
,dict(name = 'div', attrs = {'class': 'article-body'})
]
feeds = [
(u'Belgique' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1&section=info&info=df156511-c24f-4f21-81c3-a5d439a9cf4b' )
,(u'Monde' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1&section=info&info=1642237c-66b9-4e8a-a8c1-288d61fefe7e' )
,(u'Societe' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1&section=info&info=12e1a2f4-7e03-4cf1-afec-016869072317' )
]
def get_cover_url(self):
cover_url = 'http://www.lavenir.net/extra/Static/journal/Pdf/1/UNE_Nationale.PDF'
return cover_url

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>'
'''
lesoir.be
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LeSoirBe(BasicNewsRecipe):
title = u'Le Soir'
__author__ = u'Lionel Bergeret'
description = u'News from Belgium in French'
publisher = u'lesoir.be'
category = 'news, Belgium'
oldest_article = 3
language = 'fr_BE'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%d %b %Y]'
keep_only_tags = [
dict(name = 'div', attrs = {'id': 'story_head'})
,dict(name = 'div', attrs = {'id': 'story_body'})
]
remove_tags = [
dict(name='form', attrs={'id':'story_actions'})
,dict(name='div', attrs={'id':'sb-share'})
,dict(name='div', attrs={'id':'sb-subscribe'})
]
feeds = [
(u'Belgique' , u'http://www.lesoir.be/actualite/belgique/rss.xml' )
,(u'France' , u'http://www.lesoir.be/actualite/france/rss.xml' )
,(u'Monde' , u'http://www.lesoir.be/actualite/monde/rss.xml' )
,(u'Regions' , u'http://www.lesoir.be/regions/rss.xml' )
,(u'Vie du Net' , u'http://www.lesoir.be/actualite/vie_du_net/rss.xml' )
,(u'Petite Gazette' , u'http://www.lesoir.be/actualite/sciences/rss.xml' )
]
def get_cover_url(self):
cover_url = strftime( 'http://pdf.lesoir.be/pdf/%Y-%m-%d_BRUX_UNE_1.PDF')
return cover_url

View File

@ -12,6 +12,7 @@ class BBC(BasicNewsRecipe):
title = u'The Onion AV Club'
__author__ = 'Stephen Williams'
description = 'Film, Television and Music Reviews'
language = 'en'
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100

View File

@ -0,0 +1,50 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Larry Chan <larry1chan at gmail.com>'
'''
oriental daily
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class OrientalDaily(BasicNewsRecipe):
title = 'Oriental Dailly'
__author__ = 'Larry Chan'
description = 'News from HK'
oldest_article = 2
max_articles_per_feed = 100
simultaneous_downloads = 5
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'Oriental Daily'
category = 'news, HK, world'
language = 'zh'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
remove_tags_after = dict(id='bottomNavCTN')
keep_only_tags = [
dict(name='div', attrs={'id':['leadin', 'contentCTN-right']})
]
remove_tags = [
dict(name='div', attrs={'class':['largeAdsCTN', 'contentCTN-left', 'textAdsCTN', 'footerAds clear']}),
dict(name='div', attrs={'id':['articleNav']})
]
remove_attributes = ['width','height','href']
feeds = [(u'Oriental Daily', u'http://orientaldaily.on.cc/rss/news.xml')]

View File

@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
description = 'Economic commentary'
publisher = 'Euro Pacific capital'
category = 'news, politics, economy, USA'
oldest_article = 15
oldest_article = 25
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
encoding = 'utf8'
use_embedded_content = False
language = 'en'
country = 'US'
remove_empty_feeds = True
extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} '
extra_css = """
body{font-family: Verdana,Times,serif }
.field-field-commentary-writer-name{font-weight: bold}
.field-items{display: inline}
"""
conversion_options = {
'comment' : description
@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
, 'linearize_tables' : True
}
keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
keep_only_tags = [
dict(name='h2',attrs={'id':'page-title'})
,dict(name='div',attrs={'class':'node'})
]
remove_tags = [
dict(name=['meta','link','base','iframe','embed'])
,dict(attrs={'id':'text-zoom'})
]
remove_attributes=['track','linktype','lang']
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]

View File

@ -1,5 +1,5 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'Popular Science'
@ -12,13 +12,11 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
use_embedded_content = True
masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}),
dict(name='span', attrs={'class':['comments']}),
dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}),
dict(name='ul', attrs={'class':['item-list clear-block']})]
feeds = [
('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
@ -33,7 +31,6 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
#The following will get read of the Gallery: links when found
def preprocess_html(self, soup) :
print 'SOUP IS: ', soup
weblinks = soup.findAll(['head','h2'])
if weblinks is not None:
for link in weblinks:

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rmf24.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RMF24_opinie(BasicNewsRecipe):
title = u'Rmf24.pl - Opinie'
description = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True
remove_javascript = True
feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
(u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
(u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
(u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
keep_only_tags = [
dict(name='div', attrs={'class':'box articleSingle print'}),
dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
remove_tags = [
dict(name='div', attrs={'class':'toTop'}),
dict(name='div', attrs={'class':'category'}),
dict(name='div', attrs={'class':'REMOVE'}),
dict(name='div', attrs={'class':'embed embedAd'})]
extra_css = '''
h1 { font-size: 1.2em; }
'''
# thanks to Kovid Goyal
def get_article_url(self, article):
link = article.get('link')
if 'audio' not in link:
return link
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
]
]

View File

@ -1,68 +1,53 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
sciam.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ScientificAmerican(BasicNewsRecipe):
title = u'Scientific American'
description = u'Popular science. Monthly magazine.'
__author__ = 'Kovid Goyal and Sujata Raman'
description = u'Popular Science. Monthly magazine.'
category = 'science'
__author__ = 'Starson17'
no_stylesheets = True
use_embedded_content = False
language = 'en'
publisher = 'Nature Publishing Group'
remove_empty_feeds = True
remove_javascript = True
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
extra_css = '''
p{font-weight: normal; font-size:small}
li{font-weight: normal; font-size:small}
.headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
h2{font-size:x-small;}
h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags_before = dict(name='div', attrs={'class':'headline'})
remove_tags_after = dict(id=['article'])
remove_tags = [
dict(id=['sharetools', 'reddit']),
#dict(name='script'),
{'class':['float_left', 'atools']},
{"class": re.compile(r'also-in-this')},
dict(name='a',title = ["Get the Rest of the Article","Subscribe","Buy this Issue"]),
dict(name = 'img',alt = ["Graphic - Get the Rest of the Article"]),
dict(name='div', attrs={'class':['commentbox']}),
dict(name='h2', attrs={'class':['discuss_h2']}),
conversion_options = {'linearize_tables' : True
, 'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(name='h2', attrs={'class':'articleTitle'})
,dict(name='p', attrs={'id':'articleDek'})
,dict(name='p', attrs={'class':'articleInfo'})
,dict(name='div', attrs={'id':['articleContent']})
,dict(name='img', attrs={'src':re.compile(r'/media/inline/blog/Image/', re.DOTALL|re.IGNORECASE)})
]
html2lrf_options = ['--base-font-size', '8']
recursions = 1
match_regexps = [r'article.cfm.id=\S+page=(2|3|4|5|6|7|8|9|10|11|12|13|14|15)']
remove_tags = [dict(name='a', attrs={'class':'tinyCommentCount'})]
def parse_index(self):
soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/')
monthtag = soup.find('div',attrs={'id':'magazine-main_col2'})
month = self.tag_to_string(monthtag.contents[1])
self.timefmt = ' [%s]'%(self.tag_to_string(month))
issuetag = soup.find('p',attrs={'id':'articleDek'})
self.timefmt = ' [%s]'%(self.tag_to_string(issuetag))
img = soup.find('img', alt='Scientific American Magazine', src=True)
if img is not None:
self.cover_url = img['src']
features, feeds = [], []
for p in soup.find(id='magazine-main_col2').findAll('p') :
a = p.find('a', href=True)
for a in soup.find(attrs={'class':'primaryCol'}).findAll('a',attrs={'title':'Feature'}):
if a is None: continue
desc = ''
s = p.find('span', attrs={'class':"sub"})
s = a.parent.parent.find(attrs={'class':'dek'})
desc = self.tag_to_string(s)
article = {
'url' : a['href'],
'title' : self.tag_to_string(a),
@ -71,51 +56,36 @@ class ScientificAmerican(BasicNewsRecipe):
}
features.append(article)
feeds.append(('Features', features))
section = []
department = []
title = None
for x in soup.find(id='magazine-main_col1').findAll(['div', 'a']):
if x.name == 'div':
if section:
feeds.append((title, section))
title = self.tag_to_string(x)
section = []
else:
if 'article.cfm' in x['href']:
for li in soup.find(attrs={'class':'secondaryCol'}).findAll('li'):
if 'department.cfm' in li.a['href']:
if department:
feeds.append((title, department))
title = self.tag_to_string(li.a)
department = []
if 'article.cfm' in li.h3.a['href']:
article = {
'url' : x['href'],
'title' : self.tag_to_string(x),
'url' : li.h3.a['href'],
'title' : self.tag_to_string(li.h3.a),
'date': '',
'description': '',
'description': self.tag_to_string(li.p),
}
section.append(article)
if section:
feeds.append((title, section))
department.append(article)
if department:
feeds.append((title, department))
return feeds
def postprocess_html(self, soup, first_fetch):
if soup is not None:
for span in soup.findAll('span', attrs={'class':'pagination'}):
span.extract()
if not first_fetch:
div = soup.find('div', attrs={'class':'headline'})
if div:
div.extract()
for item in soup.findAll('a'):
if 'topic.cfm' in item['href']:
item.replaceWith(item.string)
return soup
preprocess_regexps = [
(re.compile(r'Already a Digital subscriber.*Now</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'If your institution has site license access, enter.*here</a>.', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'to subscribe to our.*;.*\}', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'\)\(jQuery\);.*-->', re.DOTALL|re.IGNORECASE), lambda match: ''),
]
extra_css = '''
p{font-weight: normal; font-size:small}
li{font-weight: normal; font-size:small}
.headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
h2{font-size:large; font-family:Arial,Helvetica,sans-serif;}
h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
'''

View File

@ -7,6 +7,7 @@ class TechnologyReview(BasicNewsRecipe):
description = 'MIT Technology Magazine'
publisher = 'Technology Review Inc.'
category = 'Technology, Innovation, R&D'
language = 'en'
oldest_article = 14
max_articles_per_feed = 100
No_stylesheets = True

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
telegraph.co.uk
'''
@ -8,14 +7,16 @@ telegraph.co.uk
from calibre.web.feeds.news import BasicNewsRecipe
class TelegraphUK(BasicNewsRecipe):
title = u'Telegraph.co.uk'
title = 'Telegraph.co.uk'
__author__ = 'Darko Miletic and Sujata Raman'
description = 'News from United Kingdom'
oldest_article = 7
oldest_article = 2
category = 'news, politics, UK'
publisher = 'Telegraph Media Group ltd.'
max_articles_per_feed = 100
no_stylesheets = True
language = 'en'
language = 'en_GB'
remove_empty_feeds = True
use_embedded_content = False
extra_css = '''
@ -27,13 +28,20 @@ class TelegraphUK(BasicNewsRecipe):
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(name='div', attrs={'class':'storyHead'})
,dict(name='div', attrs={'class':'story' })
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
dict(name='div', attrs={'class':['storyHead','byline']})
,dict(name='div', attrs={'id':'mainBodyArea' })
]
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
#,dict(name='div', attrs={'class':['toolshideoneQuarter']})
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide','related_links_video']})
,dict(name='ul' , attrs={'class':['shareThis shareBottom']})
,dict(name='span', attrs={'class':['num','placeComment']})
]
@ -51,24 +59,7 @@ class TelegraphUK(BasicNewsRecipe):
]
def get_article_url(self, article):
url = article.get('guid', None)
url = article.get('link', None)
if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
url = None
return url
def postprocess_html(self,soup,first):
for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
for pTag in bylineTag.findAll(name='p'):
if getattr(pTag.contents[0],"Comments",True):
pTag.extract()
return soup

View File

@ -0,0 +1,40 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.thewaythefutureblogs.com
Frederik Pohl's Blog
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheWayTheFutureBlogs(BasicNewsRecipe):
title = 'The Way the Future Blogs'
__author__ = 'Darko Miletic'
description = "Frederik Pohl's blog"
publisher = 'Frederik Pohl'
category = 'news, SF, books'
oldest_article = 30
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
extra_css = ' body{font-family: Georgia,serif } '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags =[dict(name=['meta','object','embed','iframe','base','link'])]
keep_only_tags=[dict(attrs={'class':['post','commentlist']})]
remove_attributes=['width','height','lang','border']
feeds = [(u'Posts', u'http://www.thewaythefutureblogs.com/feed/')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,5 +1,5 @@
" Project wide builtins
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I"]
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen"]
python << EOFPY
import os

View File

@ -63,7 +63,7 @@ class Check(Command):
description = 'Check for errors in the calibre source code'
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P']
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen']
CACHE = '.check-cache.pickle'
def get_files(self, cache):

View File

@ -123,7 +123,7 @@ class VMInstaller(Command):
subprocess.check_call(['scp',
self.VM_NAME+':build/calibre/'+installer, 'dist'])
if not os.path.exists(installer):
self.warn('Failed to download installer')
self.warn('Failed to download installer: '+installer)
raise SystemExit(1)
def clean(self):

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.7.19'
__version__ = '0.7.20'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re

View File

@ -218,7 +218,7 @@ class MetadataReaderPlugin(Plugin): # {{{
with the input data.
:param type: The type of file. Guaranteed to be one of the entries
in :attr:`file_types`.
:return: A :class:`calibre.ebooks.metadata.MetaInformation` object
:return: A :class:`calibre.ebooks.metadata.book.Metadata` object
'''
return None
# }}}
@ -248,7 +248,7 @@ class MetadataWriterPlugin(Plugin): # {{{
with the input data.
:param type: The type of file. Guaranteed to be one of the entries
in :attr:`file_types`.
:param mi: A :class:`calibre.ebooks.metadata.MetaInformation` object
:param mi: A :class:`calibre.ebooks.metadata.book.Metadata` object
'''
pass

View File

@ -226,8 +226,7 @@ class OPFMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation
return MetaInformation(OPF(stream, os.getcwd()))
return OPF(stream, os.getcwd()).to_book_metadata()
class PDBMetadataReader(MetadataReaderPlugin):
@ -447,7 +446,7 @@ from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
BOOQ, ELONEX, POCKETBOOK301, MENTOR
from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK, MIBUK
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
from calibre.devices.nook.driver import NOOK
from calibre.devices.prs505.driver import PRS505
@ -469,14 +468,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
LibraryThingCovers
LibraryThingCovers, DoubanCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, LibraryThingCovers]
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers]
plugins += [
ComicInput,
EPUBInput,
@ -521,6 +520,7 @@ plugins += [
IREXDR1000,
IREXDR800,
JETBOOK,
JETBOOK_MINI,
MIBUK,
SHINEBOOK,
POCKETBOOK360,
@ -797,6 +797,17 @@ class Sending(PreferencesPlugin):
description = _('Control how calibre transfers files to your '
'ebook reader')
class Plugboard(PreferencesPlugin):
name = 'Plugboard'
icon = I('plugboard.png')
gui_name = _('Metadata plugboards')
category = 'Import/Export'
gui_category = _('Import/Export')
category_order = 3
name_order = 4
config_widget = 'calibre.gui2.preferences.plugboard'
description = _('Change metadata fields before saving/sending')
class Email(PreferencesPlugin):
name = 'Email'
icon = I('mail.png')
@ -857,8 +868,8 @@ class Misc(PreferencesPlugin):
description = _('Miscellaneous advanced configuration')
plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
CommonOptions, OutputOptions, Adding, Saving, Sending, Email, Server,
Plugins, Tweaks, Misc]
CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
Email, Server, Plugins, Tweaks, Misc]
#}}}

View File

@ -1,3 +1,4 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
@ -251,6 +252,9 @@ class OutputProfile(Plugin):
#: The character used to represent a star in ratings
ratings_char = u'*'
#: Unsupported unicode characters to be replaced during preprocessing
unsupported_unicode_chars = []
@classmethod
def tags_to_string(cls, tags):
return escape(', '.join(tags))
@ -422,6 +426,8 @@ class SonyReaderOutput(OutputProfile):
dpi = 168.451
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
unsupported_unicode_chars = [u'\u201f', u'\u201b']
class KoboReaderOutput(OutputProfile):

View File

@ -67,10 +67,17 @@ def load_plugin(path_to_zip_file): # {{{
if name.lower().endswith('plugin.py'):
locals = {}
raw = zf.read(name)
match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
encoding = 'utf-8'
lines, encoding = raw.splitlines(), 'utf-8'
cr = re.compile(r'coding[:=]\s*([-\w.]+)')
raw = []
for l in lines[:2]:
match = cr.search(l)
if match is not None:
encoding = match.group(1)
else:
raw.append(l)
raw += lines[2:]
raw = '\n'.join(raw)
raw = raw.decode(encoding)
raw = re.sub('\r\n', '\n', raw)
exec raw in locals
@ -113,7 +120,7 @@ def enable_plugin(plugin_or_name):
config['enabled_plugins'] = ep
default_disabled_plugins = set([
'Douban Books',
'Douban Books', 'Douban.com covers',
])
def is_disabled(plugin):

View File

@ -36,11 +36,17 @@ Run an embedded python interpreter.
'plugin code.')
parser.add_option('--reinitialize-db', default=None,
help='Re-initialize the sqlite calibre database at the '
'specified path. Useful to recover from db corruption.')
'specified path. Useful to recover from db corruption.'
' You can also specify the path to an SQL dump which '
'will be used instead of trying to dump the database.'
' This can be useful when dumping fails, but dumping '
'with sqlite3 works.')
parser.add_option('-p', '--py-console', help='Run python console',
default=False, action='store_true')
return parser
def reinit_db(dbpath, callback=None):
def reinit_db(dbpath, callback=None, sql_dump=None):
if not os.path.exists(dbpath):
raise ValueError(dbpath + ' does not exist')
from calibre.library.sqlite import connect
@ -50,13 +56,17 @@ def reinit_db(dbpath, callback=None):
uv = conn.get('PRAGMA user_version;', all=False)
conn.execute('PRAGMA writable_schema=ON')
conn.commit()
if sql_dump is None:
sql_lines = conn.dump()
else:
sql_lines = open(sql_dump, 'rb').read()
conn.close()
dest = dbpath + '.tmp'
try:
with closing(connect(dest, False)) as nconn:
nconn.execute('create temporary table temp_sequence(id INTEGER PRIMARY KEY AUTOINCREMENT)')
nconn.commit()
if sql_dump is None:
if callable(callback):
callback(len(sql_lines), True)
for i, line in enumerate(sql_lines):
@ -70,6 +80,8 @@ def reinit_db(dbpath, callback=None):
finally:
if callable(callback):
callback(i, False)
else:
nconn.executescript(sql_lines)
nconn.execute('pragma user_version=%d'%int(uv))
nconn.commit()
os.remove(dbpath)
@ -148,6 +160,9 @@ def main(args=sys.argv):
if len(args) > 1:
vargs.append(args[-1])
main(vargs)
elif opts.py_console:
from calibre.utils.pyconsole.main import main
main()
elif opts.command:
sys.argv = args[:1]
exec opts.command
@ -165,7 +180,10 @@ def main(args=sys.argv):
prints('CALIBRE_EXTENSIONS_PATH='+sys.extensions_location)
prints('CALIBRE_PYTHON_PATH='+os.pathsep.join(sys.path))
elif opts.reinitialize_db is not None:
reinit_db(opts.reinitialize_db)
sql_dump = None
if len(args) > 1 and os.access(args[-1], os.R_OK):
sql_dump = args[-1]
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
else:
from calibre import ipython
ipython()

View File

@ -56,6 +56,7 @@ def get_connected_device():
return dev
def debug(ioreg_to_tmp=False, buf=None):
import textwrap
from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner, win_pnp_drives
from calibre.constants import iswindows, isosx, __version__
@ -95,13 +96,19 @@ def debug(ioreg_to_tmp=False, buf=None):
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
ioreg += Device.run_ioreg()
connected_devices = []
for dev in sorted(device_plugins(), cmp=lambda
x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
out('Looking for', dev.__class__.__name__)
devplugins = list(sorted(device_plugins(), cmp=lambda
x,y:cmp(x.__class__.__name__, y.__class__.__name__)))
out('Available plugins:', textwrap.fill(' '.join([x.__class__.__name__ for x in
devplugins])))
out(' ')
out('Looking for devices...')
for dev in devplugins:
connected, det = s.is_device_connected(dev, debug=True)
if connected:
out('\t\tDetected possible device', dev.__class__.__name__)
connected_devices.append((dev, det))
out(' ')
errors = {}
success = False
out('Devices possibly connected:', end=' ')

View File

@ -34,7 +34,7 @@ class ANDROID(USBMS):
0x227]},
# Samsung
0x04e8 : { 0x681d : [0x0222, 0x0400],
0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400],
0x681c : [0x0222, 0x0224, 0x0400],
0x6640 : [0x0100],
},

View File

@ -13,7 +13,8 @@ from calibre.devices.errors import UserFeedback
from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre.devices.interface import DevicePlugin
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.ebooks.metadata import authors_to_string, MetaInformation
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.epub import set_metadata
from calibre.library.server.utils import strftime
from calibre.utils.config import config_dir
@ -871,7 +872,7 @@ class ITUNES(DriverBase):
once uploaded to the device. len(names) == len(files)
:return: A list of 3-element tuples. The list is meant to be passed
to L{add_books_to_metadata}.
:metadata: If not None, it is a list of :class:`MetaInformation` objects.
:metadata: If not None, it is a list of :class:`Metadata` objects.
The idea is to use the metadata to determine where on the device to
put the book. len(metadata) == len(files). Apart from the regular
cover (path to cover), there may also be a thumbnail attribute, which should
@ -3008,14 +3009,14 @@ class BookList(list):
'''
return {}
class Book(MetaInformation):
class Book(Metadata):
'''
A simple class describing a book in the iTunes Books Library.
- See ebooks.metadata.__init__ for all fields
'''
def __init__(self,title,author):
MetaInformation.__init__(self, title, authors=[author])
Metadata.__init__(self, title, authors=[author])
@dynamic_property
def title_sorter(self):

View File

@ -38,7 +38,7 @@ class FOLDER_DEVICE(USBMS):
THUMBNAIL_HEIGHT = 68 # Height for thumbnails on device
CAN_SET_METADATA = True
CAN_SET_METADATA = ['title', 'authors']
SUPPORTS_SUB_DIRS = True
#: Icon for this device

View File

@ -37,7 +37,7 @@ class DevicePlugin(Plugin):
THUMBNAIL_HEIGHT = 68
#: Whether the metadata on books can be set via the GUI.
CAN_SET_METADATA = True
CAN_SET_METADATA = ['title', 'authors', 'collections']
#: Path separator for paths to books on device
path_sep = os.sep
@ -316,7 +316,7 @@ class DevicePlugin(Plugin):
being uploaded to the device.
:param names: A list of file names that the books should have
once uploaded to the device. len(names) == len(files)
:param metadata: If not None, it is a list of :class:`MetaInformation` objects.
:param metadata: If not None, it is a list of :class:`Metadata` objects.
The idea is to use the metadata to determine where on the device to
put the book. len(metadata) == len(files). Apart from the regular
cover (path to cover), there may also be a thumbnail attribute, which should
@ -335,7 +335,7 @@ class DevicePlugin(Plugin):
the device.
:param locations: Result of a call to L{upload_books}
:param metadata: List of :class:`MetaInformation` objects, same as for
:param metadata: List of :class:`Metadata` objects, same as for
:meth:`upload_books`.
:param booklists: A tuple containing the result of calls to
(:meth:`books(oncard=None)`,

View File

@ -99,4 +99,30 @@ class MIBUK(USBMS):
VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = 'WOLDERMIBUK'
class JETBOOK_MINI(USBMS):
'''
['0x4b8',
'0x507',
'0x100',
'ECTACO',
'ECTACO ATA/ATAPI Bridge (Bulk-Only)',
'Rev.0.20']
'''
FORMATS = ['fb2', 'txt']
gui_name = 'JetBook Mini'
name = 'JetBook Mini Device Interface'
description = _('Communicate with the JetBook Mini reader.')
author = 'Kovid Goyal'
VENDOR_ID = [0x4b8]
PRODUCT_ID = [0x507]
BCD = [0x100]
VENDOR_NAME = 'ECTACO'
WINDOWS_MAIN_MEM = '' # Matches PROD_
MAIN_MEMORY_VOLUME_LABEL = 'Jetbook Mini'
SUPPORTS_SUB_DIRS = True

View File

@ -4,37 +4,15 @@ __copyright__ = '2010, Timothy Legge <timlegge at gmail.com>'
'''
import os
import re
import time
from calibre.ebooks.metadata import MetaInformation
from calibre.constants import filesystem_encoding, preferred_encoding
from calibre import isbytestring
from calibre.devices.usbms.books import Book as Book_
class Book(MetaInformation):
class Book(Book_):
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
'title_sort', 'comments', 'category', 'publisher', 'series',
'series_index', 'rating', 'isbn', 'language', 'application_id',
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
'uuid', 'device_collections',
]
def __init__(self, prefix, lpath, title, authors, mime, date, ContentType, thumbnail_name, size=None, other=None):
MetaInformation.__init__(self, '')
self.device_collections = []
self._new_book = False
self.path = os.path.join(prefix, lpath)
if os.sep == '\\':
self.path = self.path.replace('/', '\\')
self.lpath = lpath.replace('\\', '/')
else:
self.lpath = lpath
def __init__(self, prefix, lpath, title, authors, mime, date, ContentType,
thumbnail_name, size=None, other=None):
Book_.__init__(self, prefix, lpath)
self.title = title
if not authors:
@ -63,56 +41,6 @@ class Book(MetaInformation):
if other:
self.smart_update(other)
def __eq__(self, other):
return self.path == getattr(other, 'path', None)
@dynamic_property
def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.lpath.rpartition('.')[0])
if match:
return int(match.group(1))
return None
return property(fget=fget, doc=doc)
@dynamic_property
def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
return property(doc=doc, fget=fget)
@dynamic_property
def thumbnail(self):
return None
def smart_update(self, other, replace_metadata=False):
'''
Merge the information in C{other} into self. In case of conflicts, the information
in C{other} takes precedence, unless the information in C{other} is NULL.
'''
MetaInformation.smart_update(self, other)
for attr in self.BOOK_ATTRS:
if hasattr(other, attr):
val = getattr(other, attr, None)
setattr(self, attr, val)
def to_json(self):
json = {}
for attr in self.JSON_ATTRS:
val = getattr(self, attr)
if isbytestring(val):
enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
val = val.decode(enc, 'replace')
elif isinstance(val, (list, tuple)):
val = [x.decode(preferred_encoding, 'replace') if
isbytestring(x) else x for x in val]
json[attr] = val
return json
class ImageWrapper(object):
def __init__(self, image_path):
self.image_path = image_path

View File

@ -30,7 +30,7 @@ class KOBO(USBMS):
# Ordered list of supported formats
FORMATS = ['epub', 'pdf']
CAN_SET_METADATA = True
CAN_SET_METADATA = ['collections']
VENDOR_ID = [0x2237]
PRODUCT_ID = [0x4161]

View File

@ -27,7 +27,7 @@ class PRS505(USBMS):
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
CAN_SET_METADATA = True
CAN_SET_METADATA = ['title', 'authors', 'collections']
VENDOR_ID = [0x054c] #: SONY Vendor Id
PRODUCT_ID = [0x031e]

View File

@ -6,29 +6,18 @@ __docformat__ = 'restructuredtext en'
import os, re, time, sys
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.book.base import Metadata
from calibre.devices.mime import mime_type_ext
from calibre.devices.interface import BookList as _BookList
from calibre.constants import filesystem_encoding, preferred_encoding
from calibre.constants import preferred_encoding
from calibre import isbytestring
from calibre.utils.config import prefs
class Book(MetaInformation):
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
'title_sort', 'comments', 'category', 'publisher', 'series',
'series_index', 'rating', 'isbn', 'language', 'application_id',
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
'uuid',
]
from calibre.utils.config import prefs, tweaks
class Book(Metadata):
def __init__(self, prefix, lpath, size=None, other=None):
from calibre.ebooks.metadata.meta import path_to_ext
MetaInformation.__init__(self, '')
Metadata.__init__(self, '')
self._new_book = False
self.device_collections = []
@ -72,32 +61,6 @@ class Book(MetaInformation):
def thumbnail(self):
return None
def smart_update(self, other, replace_metadata=False):
'''
Merge the information in C{other} into self. In case of conflicts, the information
in C{other} takes precedence, unless the information in C{other} is NULL.
'''
MetaInformation.smart_update(self, other, replace_metadata)
for attr in self.BOOK_ATTRS:
if hasattr(other, attr):
val = getattr(other, attr, None)
setattr(self, attr, val)
def to_json(self):
json = {}
for attr in self.JSON_ATTRS:
val = getattr(self, attr)
if isbytestring(val):
enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
val = val.decode(enc, 'replace')
elif isinstance(val, (list, tuple)):
val = [x.decode(preferred_encoding, 'replace') if
isbytestring(x) else x for x in val]
json[attr] = val
return json
class BookList(_BookList):
def __init__(self, oncard, prefix, settings):
@ -131,11 +94,30 @@ class CollectionsBookList(BookList):
def supports_collections(self):
return True
def compute_category_name(self, attr, category, field_meta):
renames = tweaks['sony_collection_renaming_rules']
attr_name = renames.get(attr, None)
if attr_name is None:
if field_meta['is_custom']:
attr_name = '(%s)'%field_meta['name']
else:
attr_name = ''
elif attr_name != '':
attr_name = '(%s)'%attr_name
cat_name = '%s %s'%(category, attr_name)
return cat_name.strip()
def get_collections(self, collection_attributes):
from calibre.devices.usbms.driver import debug_print
debug_print('Starting get_collections:', prefs['manage_device_metadata'])
debug_print('Renaming rules:', tweaks['sony_collection_renaming_rules'])
# Complexity: we can use renaming rules only when using automatic
# management. Otherwise we don't always have the metadata to make the
# right decisions
use_renaming_rules = prefs['manage_device_metadata'] == 'on_connect'
collections = {}
series_categories = set([])
# This map of sets is used to avoid linear searches when testing for
# book equality
collections_lpaths = {}
@ -163,39 +145,72 @@ class CollectionsBookList(BookList):
attrs = collection_attributes
for attr in attrs:
attr = attr.strip()
val = getattr(book, attr, None)
# If attr is device_collections, then we cannot use
# format_field, because we don't know the fields where the
# values came from.
if attr == 'device_collections':
doing_dc = True
val = book.device_collections # is a list
else:
doing_dc = False
ign, val, orig_val, fm = book.format_field_extended(attr)
if not val: continue
if isbytestring(val):
val = val.decode(preferred_encoding, 'replace')
if isinstance(val, (list, tuple)):
val = list(val)
elif isinstance(val, unicode):
elif fm['datatype'] == 'series':
val = [orig_val]
elif fm['datatype'] == 'text' and fm['is_multiple']:
val = orig_val
else:
val = [val]
for category in val:
is_series = False
if doing_dc:
# Attempt to determine if this value is a series by
# comparing it to the series name.
if category == book.series:
is_series = True
elif fm['is_custom']: # is a custom field
if fm['datatype'] == 'text' and len(category) > 1 and \
category[0] == '[' and category[-1] == ']':
continue
if fm['datatype'] == 'series':
is_series = True
else: # is a standard field
if attr == 'tags' and len(category) > 1 and \
category[0] == '[' and category[-1] == ']':
continue
if category not in collections:
collections[category] = []
collections_lpaths[category] = set()
if lpath not in collections_lpaths[category]:
collections_lpaths[category].add(lpath)
collections[category].append(book)
if attr == 'series' or \
('series' in collection_attributes and
getattr(book, 'series', None) == category):
series_categories.add(category)
book.get('series', None) == category):
is_series = True
if use_renaming_rules:
cat_name = self.compute_category_name(attr, category, fm)
else:
cat_name = category
if cat_name not in collections:
collections[cat_name] = []
collections_lpaths[cat_name] = set()
if lpath in collections_lpaths[cat_name]:
continue
collections_lpaths[cat_name].add(lpath)
if is_series:
collections[cat_name].append(
(book, book.get(attr+'_index', sys.maxint)))
else:
collections[cat_name].append(
(book, book.get('title_sort', 'zzzz')))
# Sort collections
result = {}
for category, books in collections.items():
def tgetter(x):
return getattr(x, 'title_sort', 'zzzz')
books.sort(cmp=lambda x,y:cmp(tgetter(x), tgetter(y)))
if category in series_categories:
# Ensures books are sub sorted by title
def getter(x):
return getattr(x, 'series_index', sys.maxint)
books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
return collections
books.sort(cmp=lambda x,y:cmp(x[1], y[1]))
result[category] = [x[0] for x in books]
return result
def rebuild_collections(self, booklist, oncard):
'''

View File

@ -829,12 +829,14 @@ class Device(DeviceConfig, DevicePlugin):
ext = os.path.splitext(fname)[1]
from calibre.library.save_to_disk import get_components
from calibre.library.save_to_disk import config
opts = config().parse()
if not isinstance(template, unicode):
template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', ''))
# The db id will be in the created filename
extra_components = get_components(template, mdata, fname,
length=250-len(app_id)-1)
timefmt=opts.send_timefmt, length=250-len(app_id)-1)
if not extra_components:
extra_components.append(sanitize(self.filename_callback(fname,
mdata)))

View File

@ -13,7 +13,6 @@ for a particular device.
import os
import re
import time
import json
from itertools import cycle
from calibre import prints, isbytestring
@ -21,6 +20,7 @@ from calibre.constants import filesystem_encoding, DEBUG
from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
from calibre.ebooks.metadata.book.json_codec import JsonCodec
BASE_TIME = None
def debug_print(*args):
@ -50,7 +50,7 @@ class USBMS(CLI, Device):
book_class = Book
FORMATS = []
CAN_SET_METADATA = False
CAN_SET_METADATA = []
METADATA_CACHE = 'metadata.calibre'
def get_device_information(self, end_session=True):
@ -288,6 +288,7 @@ class USBMS(CLI, Device):
# at the end just before the return
def sync_booklists(self, booklists, end_session=True):
debug_print('USBMS: starting sync_booklists')
json_codec = JsonCodec()
if not os.path.exists(self.normalize_path(self._main_prefix)):
os.makedirs(self.normalize_path(self._main_prefix))
@ -296,10 +297,8 @@ class USBMS(CLI, Device):
if prefix is not None and isinstance(booklists[listid], self.booklist_class):
if not os.path.exists(prefix):
os.makedirs(self.normalize_path(prefix))
js = [item.to_json() for item in booklists[listid] if
hasattr(item, 'to_json')]
with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
f.write(json.dumps(js, indent=2, encoding='utf-8'))
json_codec.encode_to_file(f, booklists[listid])
write_prefix(self._main_prefix, 0)
write_prefix(self._card_a_prefix, 1)
write_prefix(self._card_b_prefix, 2)
@ -345,19 +344,13 @@ class USBMS(CLI, Device):
@classmethod
def parse_metadata_cache(cls, bl, prefix, name):
# bl = cls.booklist_class()
js = []
json_codec = JsonCodec()
need_sync = False
cache_file = cls.normalize_path(os.path.join(prefix, name))
if os.access(cache_file, os.R_OK):
try:
with open(cache_file, 'rb') as f:
js = json.load(f, encoding='utf-8')
for item in js:
book = cls.book_class(prefix, item.get('lpath', None))
for key in item.keys():
setattr(book, key, item[key])
bl.append(book)
json_codec.decode_from_file(f, bl, cls.book_class, prefix)
except:
import traceback
traceback.print_exc()
@ -392,7 +385,7 @@ class USBMS(CLI, Device):
@classmethod
def book_from_path(cls, prefix, lpath):
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.book.base import Metadata
if cls.settings().read_metadata or cls.MUST_READ_METADATA:
mi = cls.metadata_from_path(cls.normalize_path(os.path.join(prefix, lpath)))
@ -401,7 +394,7 @@ class USBMS(CLI, Device):
mi = metadata_from_filename(cls.normalize_path(os.path.basename(lpath)),
cls.build_template_regexp())
if mi is None:
mi = MetaInformation(os.path.splitext(os.path.basename(lpath))[0],
mi = Metadata(os.path.splitext(os.path.basename(lpath))[0],
[_('Unknown')])
size = os.stat(cls.normalize_path(os.path.join(prefix, lpath))).st_size
book = cls.book_class(prefix, lpath, other=mi, size=size)

View File

@ -701,7 +701,7 @@ OptionRecommendation(name='timestamp',
self.opts.read_metadata_from_opf)
opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
os.path.dirname(self.opts.read_metadata_from_opf))
mi = MetaInformation(opf)
mi = opf.to_book_metadata()
self.opts_to_mi(mi)
if mi.cover:
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):

View File

@ -62,25 +62,34 @@ def wrap_lines(match):
else:
return ital+' '
def line_length(format, raw, percent):
class DocAnalysis(object):
'''
raw is the raw text to find the line length to use for wrapping.
Provides various text analysis functions to determine how the document is structured.
format is the type of document analysis will be done against.
raw is the raw text to determine the line length to use for wrapping.
Blank lines are excluded from analysis
'''
def __init__(self, format='html', raw=''):
raw = raw.replace('&nbsp;', ' ')
if format == 'html':
linere = re.compile('(?<=<p)(?![^>]*>\s*</p>).*?(?=</p>)', re.DOTALL)
elif format == 'pdf':
linere = re.compile('(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL)
elif format == 'spanned_html':
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
self.lines = linere.findall(raw)
def line_length(self, percent):
'''
Analyses the document to find the median line length.
percentage is a decimal number, 0 - 1 which is used to determine
how far in the list of line lengths to use. The list of line lengths is
ordered smallest to larged and does not include duplicates. 0.5 is the
median value.
'''
raw = raw.replace('&nbsp;', ' ')
if format == 'html':
linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL)
elif format == 'pdf':
linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
elif format == 'spanned_html':
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
lines = linere.findall(raw)
lengths = []
for line in lines:
for line in self.lines:
if len(line) > 0:
lengths.append(len(line))
@ -106,6 +115,52 @@ def line_length(format, raw, percent):
return lengths[index]
def line_histogram(self, percent):
'''
Creates a broad histogram of the document to determine whether it incorporates hard
line breaks. Lines are sorted into 20 'buckets' based on length.
percent is the percentage of lines that should be in a single bucket to return true
The majority of the lines will exist in 1-2 buckets in typical docs with hard line breaks
'''
minLineLength=20 # Ignore lines under 20 chars (typical of spaces)
maxLineLength=1900 # Discard larger than this to stay in range
buckets=20 # Each line is divided into a bucket based on length
#print "there are "+str(len(lines))+" lines"
#max = 0
#for line in self.lines:
# l = len(line)
# if l > max:
# max = l
#print "max line found is "+str(max)
# Build the line length histogram
hRaw = [ 0 for i in range(0,buckets) ]
for line in self.lines:
l = len(line)
if l > minLineLength and l < maxLineLength:
l = int(l/100)
#print "adding "+str(l)
hRaw[l]+=1
# Normalize the histogram into percents
totalLines = len(self.lines)
h = [ float(count)/totalLines for count in hRaw ]
#print "\nhRaw histogram lengths are: "+str(hRaw)
#print " percents are: "+str(h)+"\n"
# Find the biggest bucket
maxValue = 0
for i in range(0,len(h)):
if h[i] > maxValue:
maxValue = h[i]
if maxValue < percent:
#print "Line lengths are too variable. Not unwrapping."
return False
else:
#print str(maxValue)+" of the lines were in one bucket"
return True
class Dehyphenator(object):
'''
Analyzes words to determine whether hyphens should be retained/removed. Uses the document
@ -117,23 +172,41 @@ class Dehyphenator(object):
def __init__(self):
# Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex'
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation
self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
def dehyphenate(self, match):
firsthalf = match.group('firstpart')
secondhalf = match.group('secondpart')
hyphenated = str(firsthalf) + "-" + str(secondhalf)
dehyphenated = str(firsthalf) + str(secondhalf)
try:
wraptags = match.group('wraptags')
except:
wraptags = ''
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
match = booklookup.search(self.html)
if match:
try:
searchresult = self.html.find(lookupword.lower())
except:
return hyphenated
if self.format == 'html_cleanup':
if self.html.find(lookupword) != -1 or searchresult != -1:
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
return dehyphenated
elif self.html.find(hyphenated) != -1:
#print "Cleanup:returned hyphenated word: " + str(hyphenated)
return hyphenated
else:
#print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
return firsthalf+u'\u2014'+wraptags+secondhalf
else:
if self.html.find(lookupword) != -1 or searchresult != -1:
#print "returned dehyphenated word: " + str(dehyphenated)
return dehyphenated
else:
@ -142,17 +215,19 @@ class Dehyphenator(object):
def __call__(self, html, format, length=1):
self.html = html
self.format = format
if format == 'html':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^"\s>]+)-\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)-\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
elif format == 'pdf':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^"\s>]+)-\s*(<p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)-\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
elif format == 'individual_words':
intextmatch = re.compile('>[^<]*\b(?P<firstpart>[^"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
elif format == 'html_cleanup':
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
html = intextmatch.sub(self.dehyphenate, html)
return html
class CSSPreProcessor(object):
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
@ -286,7 +361,7 @@ class HTMLPreProcessor(object):
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
# Detect Chapters to match default XPATH in GUI
(re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
(re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
# Cover the case where every letter in a chapter title is separated by a space
(re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
@ -374,10 +449,8 @@ class HTMLPreProcessor(object):
print 'Failed to parse remove_footer regexp'
traceback.print_exc()
# unwrap em/en dashes, delete soft hyphens - moved here so it's executed after header/footer removal
# delete soft hyphens - moved here so it's executed after header/footer removal
if is_pdftohtml:
# unwrap em/en dashes
end_rules.append((re.compile(u'(?<=[–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens
end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting
@ -389,13 +462,17 @@ class HTMLPreProcessor(object):
if is_pdftohtml:
end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)
length = -1
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
docanalysis = DocAnalysis('pdf', html)
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
if length:
#print "The pdf line length returned is " + str(length)
# unwrap em/en dashes
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
end_rules.append(
# Un wrap using punctuation
(re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
(re.compile(u'(?<=.{%i}([a-z,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
)
for rule in self.PREPROCESS + start_rules:
@ -425,7 +502,7 @@ class HTMLPreProcessor(object):
for rule in rules + end_rules:
html = rule[0].sub(rule[1], html)
if is_pdftohtml:
if is_pdftohtml and length > -1:
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'pdf', length)
@ -453,6 +530,14 @@ class HTMLPreProcessor(object):
if getattr(self.extra_opts, 'smarten_punctuation', False):
html = self.smarten_punctuation(html)
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
if unsupported_unicode_chars:
from calibre.ebooks.unidecode.unidecoder import Unidecoder
unidecoder = Unidecoder()
for char in unsupported_unicode_chars:
asciichar = unidecoder.decode(char)
html = html.replace(char, asciichar)
return html
def smarten_punctuation(self, html):

View File

@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log
class PreProcessor(object):
@ -22,18 +22,21 @@ class PreProcessor(object):
title = match.group('title')
if not title:
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log("found " + str(self.html_preprocess_sections) + " chapters. - " + str(chap))
self.log("found " + unicode(self.html_preprocess_sections) +
" chapters. - " + unicode(chap))
return '<h2>'+chap+'</h2>\n'
else:
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log("found " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title))
self.log("found " + unicode(self.html_preprocess_sections) +
" chapters & titles. - " + unicode(chap) + ", " + unicode(title))
return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'
def chapter_break(self, match):
chap = match.group('section')
styles = match.group('styles')
self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap))
self.log("marked " + unicode(self.html_preprocess_sections) +
" section markers based on punctuation. - " + unicode(chap))
return '<'+styles+' style="page-break-before:always">'+chap
def insert_indent(self, match):
@ -63,7 +66,8 @@ class PreProcessor(object):
line_end = line_end_ere.findall(raw)
tot_htm_ends = len(htm_end)
tot_ln_fds = len(line_end)
self.log("There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked up endings")
self.log("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
unicode(tot_htm_ends) + " marked up endings")
if percent > 1:
percent = 1
@ -71,19 +75,24 @@ class PreProcessor(object):
percent = 0
min_lns = tot_ln_fds * percent
self.log("There must be fewer than " + str(min_lns) + " unmarked lines to add markup")
self.log("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
if min_lns > tot_htm_ends:
return True
def __call__(self, html):
self.log("********* Preprocessing HTML *********")
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = re.sub(r"\s*</p>", "</p>\n", html)
html = re.sub(r"\s*<p>\s*", "\n<p>", html)
###### Check Markup ######
#
# some lit files don't have any <p> tags or equivalent (generally just plain text between
# <pre> tags), check and mark up line endings if required before proceeding
if self.no_markup(html, 0.1):
self.log("not enough paragraph markers, adding now")
# check if content is in pre tags, use txt procesor to mark up if so
# check if content is in pre tags, use txt processor to mark up if so
pre = re.compile(r'<pre>', re.IGNORECASE)
if len(pre.findall(html)) == 1:
self.log("Running Text Processing")
@ -107,53 +116,84 @@ class PreProcessor(object):
txtindent = re.compile(ur'<p(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE)
html = txtindent.sub(self.insert_indent, html)
if self.found_indents > 1:
self.log("replaced "+str(self.found_indents)+ " nbsp indents with inline styles")
self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
# remove remaining non-breaking spaces
html = re.sub(ur'\u00a0', ' ', html)
# Get rid of empty <o:p> tags to simplify other processing
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Get rid of empty span, bold, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
# If more than 40% of the lines are empty paragraphs and the user has enabled remove
# paragraph spacing then delete blank lines to clean up spacing
linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
blankreg = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
#multi_blank = re.compile(r'(\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>){2,}', re.IGNORECASE)
blanklines = blankreg.findall(html)
lines = linereg.findall(html)
blanks_between_paragraphs = False
if len(lines) > 1:
self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
self.log("There are " + unicode(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
'remove_paragraph_spacing', False):
self.log("deleting blank lines")
html = blankreg.sub('', html)
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = re.sub(r"\s*</p>", "</p>\n", html)
html = re.sub(r"\s*<p>\s*", "\n<p>", html)
elif float(len(blanklines)) / float(len(lines)) > 0.40:
blanks_between_paragraphs = True
#print "blanks between paragraphs is marked True"
else:
blanks_between_paragraphs = False
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
# detect chapters/sections to match xpath or splitting logic
#
# Build the Regular Expressions in pieces
lookahead = "(?=<(p|div))"
chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
chapter_header_open = r"(?P<chap>"
chapter_header_close = ")\s*"
chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)\s[^>]*>)?\s*</(?P=outer)>\s*"
if blanks_between_paragraphs:
blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
else:
blank_lines = ""
opt_title_open = "("
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
title_header_open = "(?P<title>"
title_header_close = ")\s*"
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
opt_title_close = ")?"
default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
#print chapter_marker
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
#
# Start with most typical chapter headings, get more aggressive until one works
if self.html_preprocess_sections < 10:
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
html = chapdetect.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
html = chapdetect2.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
###### Unwrap lines ######
#
self.log("Unwrapping Lines")
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
# that lines can be un-wrapped across page boundaries
@ -168,29 +208,45 @@ class PreProcessor(object):
format = 'html'
else:
format = 'html'
# Check Line histogram to determine if the document uses hard line breaks, If 50% or
# more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50)
self.log("Hard line breaks check returned "+unicode(hardbreaks))
# Calculate Length
length = line_length(format, html, getattr(self.extra_opts,
'html_unwrap_factor', 0.4))
self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
max_length = length * 1.4
min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})")
#
# Unwrap em/en dashes, delete soft-hyphens
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html)
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log("*** Median line length is " + unicode(length) + ", calculated with " + format + " format ***")
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
if hardbreaks or unwrap_factor < 0.4:
self.log("Unwrapping required, unwrapping Lines")
# Unwrap em/en dashes
html = re.sub(u'(?<=.{%i}[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % length, '', html)
# Dehyphenate
self.log("Unwrapping/Removing hyphens")
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html', length)
self.log("Done dehyphenating")
# Unwrap lines using punctation and line length
unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
unwrap = re.compile(u"(?<=.{%i}([a-z,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
html = unwrap.sub(' ', html)
#check any remaining hyphens, but only unwrap if there is a match
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html_cleanup', length)
else:
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
self.log("Cleaning up hyphenation")
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html_cleanup', length)
self.log("Done dehyphenating")
# delete soft hyphens
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < 10:
self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections))
self.log("Looking for more split points based on punctuation,"
" currently have " + unicode(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html)
# search for places where a first or second level heading is immediately followed by another

View File

@ -43,7 +43,11 @@ class Epubcheck(ePubFixer):
default=default)
except:
raise InvalidEpub('Invalid date set in OPF', raw)
try:
sval = ts.strftime('%Y-%m-%d')
except:
from calibre import strftime
sval = strftime('%Y-%m-%d', ts.timetuple())
if sval != raw:
self.log.error(
'OPF contains date', raw, 'that epubcheck does not like')

View File

@ -10,10 +10,9 @@ import os, mimetypes, sys, re
from urllib import unquote, quote
from urlparse import urlparse
from calibre import relpath, prints
from calibre import relpath
from calibre.utils.config import tweaks
from calibre.utils.date import isoformat
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
def string_to_authors(raw):
@ -221,214 +220,18 @@ class ResourceCollection(object):
class MetaInformation(object):
'''Convenient encapsulation of book metadata'''
@staticmethod
def copy(mi):
ans = MetaInformation(mi.title, mi.authors)
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
'author_sort_map',
'pubdate', 'rights', 'publication_type', 'uuid'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
def __init__(self, title, authors=(_('Unknown'),)):
'''
def MetaInformation(title, authors=(_('Unknown'),)):
''' Convenient encapsulation of book metadata, needed for compatibility
@param title: title or ``_('Unknown')`` or a MetaInformation object
@param authors: List of strings or []
'''
from calibre.ebooks.metadata.book.base import Metadata
mi = None
if hasattr(title, 'title') and hasattr(title, 'authors'):
mi = title
title = mi.title
authors = mi.authors
self.title = title
self.author = list(authors) if authors else []# Needed for backward compatibility
#: List of strings or []
self.authors = list(authors) if authors else []
self.tags = getattr(mi, 'tags', [])
#: mi.cover_data = (ext, data)
self.cover_data = getattr(mi, 'cover_data', (None, None))
self.author_sort_map = getattr(mi, 'author_sort_map', {})
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
'rights', 'publication_type', 'uuid',
):
setattr(self, x, getattr(mi, x, None))
def print_all_attributes(self):
for x in ('title','author', 'author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'tags', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
'rights', 'publication_type', 'uuid', 'author_sort_map'
):
prints(x, getattr(self, x, 'None'))
def smart_update(self, mi, replace_metadata=False):
'''
Merge the information in C{mi} into self. In case of conflicts, the
information in C{mi} takes precedence, unless the information in mi is
NULL. If replace_metadata is True, then the information in mi always
takes precedence.
'''
if mi.title and mi.title != _('Unknown'):
self.title = mi.title
if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'guide', 'book_producer',
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
'publication_type', 'uuid'):
if replace_metadata:
setattr(self, attr, getattr(mi, attr, 1.0 if \
attr == 'series_index' else None))
elif hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
setattr(self, attr, val)
if replace_metadata:
self.tags = mi.tags
elif mi.tags:
self.tags += mi.tags
self.tags = list(set(self.tags))
if mi.author_sort_map:
self.author_sort_map.update(mi.author_sort_map)
if getattr(mi, 'cover_data', False):
other_cover = mi.cover_data[-1]
self_cover = self.cover_data[-1] if self.cover_data else ''
if not self_cover: self_cover = ''
if not other_cover: other_cover = ''
if len(other_cover) > len(self_cover):
self.cover_data = mi.cover_data
if replace_metadata:
self.comments = getattr(mi, 'comments', '')
else:
my_comments = getattr(self, 'comments', '')
other_comments = getattr(mi, 'comments', '')
if not my_comments:
my_comments = ''
if not other_comments:
other_comments = ''
if len(other_comments.strip()) > len(my_comments.strip()):
self.comments = other_comments
other_lang = getattr(mi, 'language', None)
if other_lang and other_lang.lower() != 'und':
self.language = other_lang
def format_series_index(self):
try:
x = float(self.series_index)
except ValueError:
x = 1
return fmt_sidx(x)
def authors_from_string(self, raw):
self.authors = string_to_authors(raw)
def format_authors(self):
return authors_to_string(self.authors)
def format_tags(self):
return u', '.join([unicode(t) for t in self.tags])
def format_rating(self):
return unicode(self.rating)
def __unicode__(self):
ans = []
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
fmt('Title', self.title)
if self.title_sort:
fmt('Title sort', self.title_sort)
if self.authors:
fmt('Author(s)', authors_to_string(self.authors) + \
((' [' + self.author_sort + ']') if self.author_sort else ''))
if self.publisher:
fmt('Publisher', self.publisher)
if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer)
if self.category:
fmt('Category', self.category)
if self.comments:
fmt('Comments', self.comments)
if self.isbn:
fmt('ISBN', self.isbn)
if self.tags:
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index())
if self.language:
fmt('Language', self.language)
if self.rating is not None:
fmt('Rating', self.rating)
if self.timestamp is not None:
fmt('Timestamp', isoformat(self.timestamp))
if self.pubdate is not None:
fmt('Published', isoformat(self.pubdate))
if self.rights is not None:
fmt('Rights', unicode(self.rights))
if self.lccn:
fmt('LCCN', unicode(self.lccn))
if self.lcc:
fmt('LCC', unicode(self.lcc))
if self.ddc:
fmt('DDC', unicode(self.ddc))
return u'\n'.join(ans)
def to_html(self):
ans = [(_('Title'), unicode(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
ans += [(_('Publisher'), unicode(self.publisher))]
ans += [(_('Producer'), unicode(self.book_producer))]
ans += [(_('Comments'), unicode(self.comments))]
ans += [('ISBN', unicode(self.isbn))]
if self.lccn:
ans += [('LCCN', unicode(self.lccn))]
if self.lcc:
ans += [('LCC', unicode(self.lcc))]
if self.ddc:
ans += [('DDC', unicode(self.ddc))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
if self.series:
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
ans += [(_('Language'), unicode(self.language))]
if self.timestamp is not None:
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
if self.pubdate is not None:
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights))]
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans)
def __str__(self):
return self.__unicode__().encode('utf-8')
def __nonzero__(self):
return bool(self.title or self.author or self.comments or self.tags)
return Metadata(title, authors, other=mi)
def check_isbn10(isbn):
try:

View File

@ -12,47 +12,44 @@ an empty list/dictionary for complex types and (None, None) for cover_data
SOCIAL_METADATA_FIELDS = frozenset([
'tags', # Ordered list
# A floating point number between 0 and 10
'rating',
# A simple HTML enabled string
'comments',
# A simple string
'series',
# A floating point number
'series_index',
'rating', # A floating point number between 0 and 10
'comments', # A simple HTML enabled string
'series', # A simple string
'series_index', # A floating point number
# Of the form { scheme1:value1, scheme2:value2}
# For example: {'isbn':'123456789', 'doi':'xxxx', ... }
'classifiers',
'isbn', # Pseudo field for convenience, should get/set isbn classifier
])
'''
The list of names that convert to classifiers when in get and set.
'''
TOP_LEVEL_CLASSIFIERS = frozenset([
'isbn',
])
PUBLICATION_METADATA_FIELDS = frozenset([
# title must never be None. Should be _('Unknown')
'title',
'title', # title must never be None. Should be _('Unknown')
# Pseudo field that can be set, but if not set is auto generated
# from title and languages
'title_sort',
# Ordered list of authors. Must never be None, can be [_('Unknown')]
'authors',
# Map of sort strings for each author
'author_sort_map',
'authors', # Ordered list. Must never be None, can be [_('Unknown')]
'author_sort_map', # Map of sort strings for each author
# Pseudo field that can be set, but if not set is auto generated
# from authors and languages
'author_sort',
'book_producer',
# Dates and times must be timezone aware
'timestamp',
'timestamp', # Dates and times must be timezone aware
'pubdate',
'rights',
# So far only known publication type is periodical:calibre
# If None, means book
'publication_type',
# A UUID usually of type 4
'uuid',
'uuid', # A UUID usually of type 4
'language', # the primary language of this book
'languages', # ordered list
# Simple string, no special semantics
'publisher',
'publisher', # Simple string, no special semantics
# Absolute path to image file encoded in filesystem_encoding
'cover',
# Of the form (format, data) where format is, for e.g. 'jpeg', 'png', 'gif'...
@ -69,33 +66,62 @@ BOOK_STRUCTURE_FIELDS = frozenset([
])
USER_METADATA_FIELDS = frozenset([
# A dict of a form to be specified
# A dict of dicts similar to field_metadata. Each field description dict
# also contains a value field with the key #value#.
'user_metadata',
])
DEVICE_METADATA_FIELDS = frozenset([
# Ordered list of strings
'device_collections',
'device_collections', # Ordered list of strings
'lpath', # Unicode, / separated
# In bytes
'size',
# Mimetype of the book file being represented
'mime',
'size', # In bytes
'mime', # Mimetype of the book file being represented
])
CALIBRE_METADATA_FIELDS = frozenset([
# An application id
# Semantics to be defined. Is it a db key? a db name + key? A uuid?
'application_id',
'application_id', # An application id, currently set to the db_id.
'db_id', # the calibre primary key of the item.
'formats', # list of formats (extensions) for this book
]
)
ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
PUBLICATION_METADATA_FIELDS).union(
BOOK_STRUCTURE_FIELDS).union(
USER_METADATA_FIELDS).union(
DEVICE_METADATA_FIELDS).union(
CALIBRE_METADATA_FIELDS)
# All fields except custom fields
STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
PUBLICATION_METADATA_FIELDS).union(
BOOK_STRUCTURE_FIELDS).union(
DEVICE_METADATA_FIELDS).union(
CALIBRE_METADATA_FIELDS)
# Metadata fields that smart update must do special processing to copy.
SC_FIELDS_NOT_COPIED = frozenset(['title', 'title_sort', 'authors',
'author_sort', 'author_sort_map',
'cover_data', 'tags', 'language'])
# Metadata fields that smart update should copy only if the source is not None
SC_FIELDS_COPY_NOT_NULL = frozenset(['lpath', 'size', 'comments', 'thumbnail'])
# Metadata fields that smart update should copy without special handling
SC_COPYABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
PUBLICATION_METADATA_FIELDS).union(
BOOK_STRUCTURE_FIELDS).union(
DEVICE_METADATA_FIELDS).union(
CALIBRE_METADATA_FIELDS) - \
SC_FIELDS_NOT_COPIED.union(
SC_FIELDS_COPY_NOT_NULL)
SERIALIZABLE_FIELDS = SOCIAL_METADATA_FIELDS.union(
USER_METADATA_FIELDS).union(
PUBLICATION_METADATA_FIELDS).union(
CALIBRE_METADATA_FIELDS).union(
frozenset(['lpath'])) # I don't think we need device_collections
# Serialization of covers/thumbnails will have to be handled carefully, maybe
# as an option to the serializer class
DEVICE_METADATA_FIELDS) - \
frozenset(['device_collections', 'formats'])
# these are rebuilt when needed

View File

@ -5,9 +5,18 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import copy
import copy, traceback
from calibre import prints
from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
from calibre.ebooks.metadata.book import TOP_LEVEL_CLASSIFIERS
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
from calibre.library.field_metadata import FieldMetadata
from calibre.utils.date import isoformat, format_date
from calibre.utils.formatter import TemplateFormatter
from calibre.ebooks.metadata.book import RESERVED_METADATA_FIELDS
NULL_VALUES = {
'user_metadata': {},
@ -19,103 +28,555 @@ NULL_VALUES = {
'author_sort_map': {},
'authors' : [_('Unknown')],
'title' : _('Unknown'),
'language' : 'und'
}
field_metadata = FieldMetadata()
class SafeFormat(TemplateFormatter):
def get_value(self, key, args, kwargs):
try:
b = self.book.get_user_metadata(key, False)
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
v = ''
elif b and b['datatype'] == 'float' and b.get(key, 0.0) == 0.0:
v = ''
else:
ign, v = self.book.format_field(key.lower(), series_with_index=False)
if v is None:
return ''
if v == '':
return ''
return v
except:
return key
composite_formatter = SafeFormat()
class Metadata(object):
'''
This class must expose a superset of the API of MetaInformation in terms
of attribute access and methods. Only the __init__ method is different.
MetaInformation will simply become a function that creates and fills in
the attributes of this class.
A class representing all the metadata for a book.
Please keep the method based API of this class to a minimum. Every method
becomes a reserved field name.
'''
def __init__(self):
object.__setattr__(self, '_data', copy.deepcopy(NULL_VALUES))
def __init__(self, title, authors=(_('Unknown'),), other=None):
'''
@param title: title or ``_('Unknown')``
@param authors: List of strings or []
@param other: None or a metadata object
'''
_data = copy.deepcopy(NULL_VALUES)
object.__setattr__(self, '_data', _data)
if other is not None:
self.smart_update(other)
else:
if title:
self.title = title
if authors:
#: List of strings or []
self.author = list(authors) if authors else []# Needed for backward compatibility
self.authors = list(authors) if authors else []
def is_null(self, field):
null_val = NULL_VALUES.get(field, None)
val = getattr(self, field, None)
return not val or val == null_val
def __getattribute__(self, field):
_data = object.__getattribute__(self, '_data')
if field in RESERVED_METADATA_FIELDS:
if field in TOP_LEVEL_CLASSIFIERS:
return _data.get('classifiers').get(field, None)
if field in STANDARD_METADATA_FIELDS:
return _data.get(field, None)
try:
return object.__getattribute__(self, field)
except AttributeError:
pass
if field in _data['user_metadata'].iterkeys():
# TODO: getting user metadata values
pass
d = _data['user_metadata'][field]
val = d['#value#']
if d['datatype'] != 'composite':
return val
if val is None:
d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field
val = d['#value#'] = composite_formatter.safe_format(
d['display']['composite_template'],
self,
_('TEMPLATE ERROR'),
self).strip()
return val
raise AttributeError(
'Metadata object has no attribute named: '+ repr(field))
def __setattr__(self, field, val):
def __setattr__(self, field, val, extra=None):
_data = object.__getattribute__(self, '_data')
if field in RESERVED_METADATA_FIELDS:
if field != 'user_metadata':
if not val:
val = NULL_VALUES[field]
if field in TOP_LEVEL_CLASSIFIERS:
_data['classifiers'].update({field: val})
elif field in STANDARD_METADATA_FIELDS:
if val is None:
val = NULL_VALUES.get(field, None)
_data[field] = val
else:
raise AttributeError('You cannot set user_metadata directly.')
elif field in _data['user_metadata'].iterkeys():
# TODO: Setting custom column values
pass
if _data['user_metadata'][field]['datatype'] == 'composite':
_data['user_metadata'][field]['#value#'] = None
else:
_data['user_metadata'][field]['#value#'] = val
_data['user_metadata'][field]['#extra#'] = extra
else:
# You are allowed to stick arbitrary attributes onto this object as
# long as they dont conflict with global or user metadata names
# long as they don't conflict with global or user metadata names
# Don't abuse this privilege
self.__dict__[field] = val
@property
def user_metadata_names(self):
'The set of user metadata names this object knows about'
def __iter__(self):
return object.__getattribute__(self, '_data').iterkeys()
def has_key(self, key):
return key in object.__getattribute__(self, '_data')
def deepcopy(self):
m = Metadata(None)
m.__dict__ = copy.deepcopy(self.__dict__)
object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
return m
def get(self, field, default=None):
try:
return self.__getattribute__(field)
except AttributeError:
return default
def get_extra(self, field):
_data = object.__getattribute__(self, '_data')
return frozenset(_data['user_metadata'].iterkeys())
if field in _data['user_metadata'].iterkeys():
return _data['user_metadata'][field]['#extra#']
raise AttributeError(
'Metadata object has no attribute named: '+ repr(field))
# Old MetaInformation API {{{
def copy(self):
def set(self, field, val, extra=None):
self.__setattr__(field, val, extra)
# field-oriented interface. Intended to be the same as in LibraryDatabase
def standard_field_keys(self):
'''
return a list of all possible keys, even if this book doesn't have them
'''
return STANDARD_METADATA_FIELDS
def custom_field_keys(self):
'''
return a list of the custom fields in this book
'''
return object.__getattribute__(self, '_data')['user_metadata'].iterkeys()
def all_field_keys(self):
'''
All field keys known by this instance, even if their value is None
'''
_data = object.__getattribute__(self, '_data')
return frozenset(ALL_METADATA_FIELDS.union(_data['user_metadata'].iterkeys()))
def metadata_for_field(self, key):
'''
return metadata describing a standard or custom field.
'''
if key not in self.custom_field_keys():
return self.get_standard_metadata(key, make_copy=False)
return self.get_user_metadata(key, make_copy=False)
def all_non_none_fields(self):
'''
Return a dictionary containing all non-None metadata fields, including
the custom ones.
'''
result = {}
_data = object.__getattribute__(self, '_data')
for attr in STANDARD_METADATA_FIELDS:
v = _data.get(attr, None)
if v is not None:
result[attr] = v
for attr in _data['user_metadata'].iterkeys():
v = self.get(attr, None)
if v is not None:
result[attr] = v
if _data['user_metadata'][attr]['datatype'] == 'series':
result[attr+'_index'] = _data['user_metadata'][attr]['#extra#']
return result
# End of field-oriented interface
# Extended interfaces. These permit one to get copies of metadata dictionaries, and to
# get and set custom field metadata
def get_standard_metadata(self, field, make_copy):
'''
return field metadata from the field if it is there. Otherwise return
None. field is the key name, not the label. Return a copy if requested,
just in case the user wants to change values in the dict.
'''
if field in field_metadata and field_metadata[field]['kind'] == 'field':
if make_copy:
return copy.deepcopy(field_metadata[field])
return field_metadata[field]
return None
def get_all_standard_metadata(self, make_copy):
'''
return a dict containing all the standard field metadata associated with
the book.
'''
if not make_copy:
return field_metadata
res = {}
for k in field_metadata:
if field_metadata[k]['kind'] == 'field':
res[k] = copy.deepcopy(field_metadata[k])
return res
def get_all_user_metadata(self, make_copy):
'''
return a dict containing all the custom field metadata associated with
the book.
'''
_data = object.__getattribute__(self, '_data')
user_metadata = _data['user_metadata']
if not make_copy:
return user_metadata
res = {}
for k in user_metadata:
res[k] = copy.deepcopy(user_metadata[k])
return res
def get_user_metadata(self, field, make_copy):
'''
return field metadata from the object if it is there. Otherwise return
None. field is the key name, not the label. Return a copy if requested,
just in case the user wants to change values in the dict.
'''
_data = object.__getattribute__(self, '_data')
_data = _data['user_metadata']
if field in _data:
if make_copy:
return copy.deepcopy(_data[field])
return _data[field]
return None
def set_all_user_metadata(self, metadata):
'''
store custom field metadata into the object. Field is the key name
not the label
'''
if metadata is None:
traceback.print_stack()
else:
for key in metadata:
self.set_user_metadata(key, metadata[key])
def set_user_metadata(self, field, metadata):
'''
store custom field metadata for one column into the object. Field is
the key name not the label
'''
if field is not None:
if not field.startswith('#'):
raise AttributeError(
'Custom field name %s must begin with \'#\''%repr(field))
if metadata is None:
traceback.print_stack()
return
metadata = copy.deepcopy(metadata)
if '#value#' not in metadata:
if metadata['datatype'] == 'text' and metadata['is_multiple']:
metadata['#value#'] = []
else:
metadata['#value#'] = None
_data = object.__getattribute__(self, '_data')
_data['user_metadata'][field] = metadata
def template_to_attribute(self, other, ops):
'''
Takes a list [(src,dest), (src,dest)], evaluates the template in the
context of other, then copies the result to self[dest]. This is on a
best-efforts basis. Some assignments can make no sense.
'''
if not ops:
return
for op in ops:
try:
src = op[0]
dest = op[1]
val = composite_formatter.safe_format\
(src, other, 'PLUGBOARD TEMPLATE ERROR', other)
if dest == 'tags':
self.set(dest, [f.strip() for f in val.split(',') if f.strip()])
elif dest == 'authors':
self.set(dest, [f.strip() for f in val.split('&') if f.strip()])
else:
self.set(dest, val)
except:
traceback.print_exc()
pass
# Old Metadata API {{{
def print_all_attributes(self):
pass
for x in STANDARD_METADATA_FIELDS:
prints('%s:'%x, getattr(self, x, 'None'))
for x in self.custom_field_keys():
meta = self.get_user_metadata(x, make_copy=False)
if meta is not None:
prints(x, meta)
prints('--------------')
def smart_update(self, other, replace_metadata=False):
pass
'''
Merge the information in `other` into self. In case of conflicts, the information
in `other` takes precedence, unless the information in `other` is NULL.
'''
def copy_not_none(dest, src, attr):
v = getattr(src, attr, None)
if v not in (None, NULL_VALUES.get(attr, None)):
setattr(dest, attr, copy.deepcopy(v))
def format_series_index(self):
pass
if other.title and other.title != _('Unknown'):
self.title = other.title
if hasattr(other, 'title_sort'):
self.title_sort = other.title_sort
if other.authors and other.authors[0] != _('Unknown'):
self.authors = list(other.authors)
if hasattr(other, 'author_sort_map'):
self.author_sort_map = dict(other.author_sort_map)
if hasattr(other, 'author_sort'):
self.author_sort = other.author_sort
if replace_metadata:
# SPECIAL_FIELDS = frozenset(['lpath', 'size', 'comments', 'thumbnail'])
for attr in SC_COPYABLE_FIELDS:
setattr(self, attr, getattr(other, attr, 1.0 if \
attr == 'series_index' else None))
self.tags = other.tags
self.cover_data = getattr(other, 'cover_data',
NULL_VALUES['cover_data'])
self.set_all_user_metadata(other.get_all_user_metadata(make_copy=True))
for x in SC_FIELDS_COPY_NOT_NULL:
copy_not_none(self, other, x)
# language is handled below
else:
for attr in SC_COPYABLE_FIELDS:
copy_not_none(self, other, attr)
for x in SC_FIELDS_COPY_NOT_NULL:
copy_not_none(self, other, x)
if other.tags:
# Case-insensitive but case preserving merging
lotags = [t.lower() for t in other.tags]
lstags = [t.lower() for t in self.tags]
ot, st = map(frozenset, (lotags, lstags))
for t in st.intersection(ot):
sidx = lstags.index(t)
oidx = lotags.index(t)
self.tags[sidx] = other.tags[oidx]
self.tags += [t for t in other.tags if t.lower() in ot-st]
if getattr(other, 'cover_data', False):
other_cover = other.cover_data[-1]
self_cover = self.cover_data[-1] if self.cover_data else ''
if not self_cover: self_cover = ''
if not other_cover: other_cover = ''
if len(other_cover) > len(self_cover):
self.cover_data = other.cover_data
if callable(getattr(other, 'custom_field_keys', None)):
for x in other.custom_field_keys():
meta = other.get_user_metadata(x, make_copy=True)
if meta is not None:
self_tags = self.get(x, [])
self.set_user_metadata(x, meta) # get... did the deepcopy
other_tags = other.get(x, [])
if meta['is_multiple']:
# Case-insensitive but case preserving merging
lotags = [t.lower() for t in other_tags]
lstags = [t.lower() for t in self_tags]
ot, st = map(frozenset, (lotags, lstags))
for t in st.intersection(ot):
sidx = lstags.index(t)
oidx = lotags.index(t)
self_tags[sidx] = other.tags[oidx]
self_tags += [t for t in other.tags if t.lower() in ot-st]
setattr(self, x, self_tags)
my_comments = getattr(self, 'comments', '')
other_comments = getattr(other, 'comments', '')
if not my_comments:
my_comments = ''
if not other_comments:
other_comments = ''
if len(other_comments.strip()) > len(my_comments.strip()):
self.comments = other_comments
other_lang = getattr(other, 'language', None)
if other_lang and other_lang.lower() != 'und':
self.language = other_lang
def format_series_index(self, val=None):
from calibre.ebooks.metadata import fmt_sidx
v = self.series_index if val is None else val
try:
x = float(v)
except ValueError:
x = 1
return fmt_sidx(x)
def authors_from_string(self, raw):
pass
from calibre.ebooks.metadata import string_to_authors
self.authors = string_to_authors(raw)
def format_authors(self):
pass
from calibre.ebooks.metadata import authors_to_string
return authors_to_string(self.authors)
def format_tags(self):
pass
return u', '.join([unicode(t) for t in self.tags])
def format_rating(self):
return unicode(self.rating)
def format_field(self, key, series_with_index=True):
name, val, ign, ign = self.format_field_extended(key, series_with_index)
return (name, val)
def format_field_extended(self, key, series_with_index=True):
from calibre.ebooks.metadata import authors_to_string
'''
returns the tuple (field_name, formatted_value)
'''
if key in self.custom_field_keys():
res = self.get(key, None)
cmeta = self.get_user_metadata(key, make_copy=False)
name = unicode(cmeta['name'])
if cmeta['datatype'] != 'composite' and (res is None or res == ''):
return (name, res, None, None)
orig_res = res
cmeta = self.get_user_metadata(key, make_copy=False)
if res is None or res == '':
return (name, res, None, None)
orig_res = res
datatype = cmeta['datatype']
if datatype == 'text' and cmeta['is_multiple']:
res = u', '.join(res)
elif datatype == 'series' and series_with_index:
res = res + \
' [%s]'%self.format_series_index(val=self.get_extra(key))
elif datatype == 'datetime':
res = format_date(res, cmeta['display'].get('date_format','dd MMM yyyy'))
elif datatype == 'bool':
res = _('Yes') if res else _('No')
elif datatype == 'float' and key.endswith('_index'):
res = self.format_series_index(res)
return (name, unicode(res), orig_res, cmeta)
if key in field_metadata and field_metadata[key]['kind'] == 'field':
res = self.get(key, None)
fmeta = field_metadata[key]
name = unicode(fmeta['name'])
if res is None or res == '':
return (name, res, None, None)
orig_res = res
name = unicode(fmeta['name'])
datatype = fmeta['datatype']
if key == 'authors':
res = authors_to_string(res)
elif key == 'series_index':
res = self.format_series_index(res)
elif datatype == 'text' and fmeta['is_multiple']:
res = u', '.join(res)
elif datatype == 'series' and series_with_index:
res = res + ' [%s]'%self.format_series_index()
elif datatype == 'datetime':
res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
return (name, unicode(res), orig_res, fmeta)
return (None, None, None, None)
def __unicode__(self):
pass
from calibre.ebooks.metadata import authors_to_string
ans = []
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
fmt('Title', self.title)
if self.title_sort:
fmt('Title sort', self.title_sort)
if self.authors:
fmt('Author(s)', authors_to_string(self.authors) + \
((' [' + self.author_sort + ']') if self.author_sort else ''))
if self.publisher:
fmt('Publisher', self.publisher)
if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer)
if self.comments:
fmt('Comments', self.comments)
if self.isbn:
fmt('ISBN', self.isbn)
if self.tags:
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index())
if self.language:
fmt('Language', self.language)
if self.rating is not None:
fmt('Rating', self.rating)
if self.timestamp is not None:
fmt('Timestamp', isoformat(self.timestamp))
if self.pubdate is not None:
fmt('Published', isoformat(self.pubdate))
if self.rights is not None:
fmt('Rights', unicode(self.rights))
for key in self.custom_field_keys():
val = self.get(key, None)
if val:
(name, val) = self.format_field(key)
fmt(name, unicode(val))
return u'\n'.join(ans)
def to_html(self):
pass
from calibre.ebooks.metadata import authors_to_string
ans = [(_('Title'), unicode(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
ans += [(_('Publisher'), unicode(self.publisher))]
ans += [(_('Producer'), unicode(self.book_producer))]
ans += [(_('Comments'), unicode(self.comments))]
ans += [('ISBN', unicode(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
if self.series:
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
ans += [(_('Language'), unicode(self.language))]
if self.timestamp is not None:
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
if self.pubdate is not None:
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights))]
for key in self.custom_field_keys():
val = self.get(key, None)
if val:
(name, val) = self.format_field(key)
ans += [(name, val)]
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans)
def __str__(self):
return self.__unicode__().encode('utf-8')
def __nonzero__(self):
return True
return bool(self.title or self.author or self.comments or self.tags)
# }}}
# We don't need reserved field names for this object any more. Lets just use a
# protocol like the last char of a user field label should be _ when using this
# object
# So mi.tags returns the builtin tags and mi.tags_ returns the user tags

View File

@ -0,0 +1,143 @@
'''
Created on 4 Jun 2010
@author: charles
'''
from base64 import b64encode, b64decode
import json
import traceback
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
from calibre.constants import filesystem_encoding, preferred_encoding
from calibre.library.field_metadata import FieldMetadata
from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE
from calibre.utils.magick import Image
from calibre import isbytestring
# Translate datetimes to and from strings. The string form is the datetime in
# UTC. The returned date is also UTC
def string_to_datetime(src):
if src == "None":
return None
return parse_date(src)
def datetime_to_string(dateval):
if dateval is None or dateval == UNDEFINED_DATE:
return "None"
return isoformat(dateval)
def encode_thumbnail(thumbnail):
'''
Encode the image part of a thumbnail, then return the 3 part tuple
'''
if thumbnail is None:
return None
if not isinstance(thumbnail, (tuple, list)):
try:
img = Image()
img.load(thumbnail)
width, height = img.size
thumbnail = (width, height, thumbnail)
except:
return None
return (thumbnail[0], thumbnail[1], b64encode(str(thumbnail[2])))
def decode_thumbnail(tup):
'''
Decode an encoded thumbnail into its 3 component parts
'''
if tup is None:
return None
return (tup[0], tup[1], b64decode(tup[2]))
def object_to_unicode(obj, enc=preferred_encoding):
def dec(x):
return x.decode(enc, 'replace')
if isbytestring(obj):
return dec(obj)
if isinstance(obj, (list, tuple)):
return [dec(x) if isbytestring(x) else x for x in obj]
if isinstance(obj, dict):
ans = {}
for k, v in obj.items():
k = object_to_unicode(k)
v = object_to_unicode(v)
ans[k] = v
return ans
return obj
class JsonCodec(object):
def __init__(self):
self.field_metadata = FieldMetadata()
def encode_to_file(self, file, booklist):
file.write(json.dumps(self.encode_booklist_metadata(booklist),
indent=2, encoding='utf-8'))
def encode_booklist_metadata(self, booklist):
result = []
for book in booklist:
result.append(self.encode_book_metadata(book))
return result
def encode_book_metadata(self, book):
result = {}
for key in SERIALIZABLE_FIELDS:
result[key] = self.encode_metadata_attr(book, key)
return result
def encode_metadata_attr(self, book, key):
if key == 'user_metadata':
meta = book.get_all_user_metadata(make_copy=True)
for k in meta:
if meta[k]['datatype'] == 'datetime':
meta[k]['#value#'] = datetime_to_string(meta[k]['#value#'])
return meta
if key in self.field_metadata:
datatype = self.field_metadata[key]['datatype']
else:
datatype = None
value = book.get(key)
if key == 'thumbnail':
return encode_thumbnail(value)
elif isbytestring(value): # str includes bytes
enc = filesystem_encoding if key == 'lpath' else preferred_encoding
return object_to_unicode(value, enc=enc)
elif datatype == 'datetime':
return datetime_to_string(value)
else:
return object_to_unicode(value)
def decode_from_file(self, file, booklist, book_class, prefix):
js = []
try:
js = json.load(file, encoding='utf-8')
for item in js:
book = book_class(prefix, item.get('lpath', None))
for key in item.keys():
meta = self.decode_metadata(key, item[key])
if key == 'user_metadata':
book.set_all_user_metadata(meta)
else:
setattr(book, key, meta)
booklist.append(book)
except:
print 'exception during JSON decoding'
traceback.print_exc()
def decode_metadata(self, key, value):
if key == 'user_metadata':
for k in value:
if value[k]['datatype'] == 'datetime':
value[k]['#value#'] = string_to_datetime(value[k]['#value#'])
return value
elif key in self.field_metadata:
if self.field_metadata[key]['datatype'] == 'datetime':
return string_to_datetime(value)
if key == 'thumbnail':
return decode_thumbnail(value)
return value

View File

@ -109,7 +109,7 @@ def do_set_metadata(opts, mi, stream, stream_type):
from_opf = getattr(opts, 'from_opf', None)
if from_opf is not None:
from calibre.ebooks.metadata.opf2 import OPF
opf_mi = MetaInformation(OPF(open(from_opf, 'rb')))
opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata()
mi.smart_update(opf_mi)
for pref in config().option_set.preferences:

View File

@ -9,6 +9,7 @@ import traceback, socket, re, sys
from functools import partial
from threading import Thread, Event
from Queue import Queue, Empty
from lxml import etree
import mechanize
@ -216,6 +217,68 @@ def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
# }}}
class DoubanCovers(CoverDownload): # {{{
'Download covers from Douban.com'
DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
name = 'Douban.com covers'
description = _('Download covers from Douban.com')
author = 'Li Fanxi'
def get_cover_url(self, isbn, br, timeout=5.):
try:
url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
src = br.open(url, timeout=timeout).read()
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = Exception(_('Douban.com API timed out. Try again later.'))
raise err
else:
feed = etree.fromstring(src)
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'db': 'http://www.douban.com/xmlns/'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entries = XPath('//atom:entry')(feed)
if len(entries) < 1:
return None
try:
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
# If URL contains "book-default", the book doesn't have a cover
if u.find('book-default') != -1:
return None
except:
return None
return u
def has_cover(self, mi, ans, timeout=5.):
if not mi.isbn:
return False
br = browser()
try:
if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
self.debug('cover for', mi.isbn, 'found')
ans.set()
except Exception, e:
self.debug(e)
def get_covers(self, mi, result_queue, abort, timeout=5.):
if not mi.isbn:
return
br = browser()
try:
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
cover_data = br.open_novisit(url).read()
result_queue.put((True, cover_data, 'jpg', self.name))
except Exception, e:
result_queue.put((False, self.exception_to_string(e),
traceback.format_exc(), self.name))
# }}}
def download_cover(mi, timeout=5.): # {{{
results = Queue()
download_covers(mi, results, max_covers=1, timeout=timeout)

View File

@ -164,10 +164,10 @@ def get_cover(opf, opf_path, stream, reader=None):
return render_html_svg_workaround(cpage, default_log)
def get_metadata(stream, extract_cover=True):
""" Return metadata as a :class:`MetaInformation` object """
""" Return metadata as a :class:`Metadata` object """
stream.seek(0)
reader = OCFZipReader(stream)
mi = MetaInformation(reader.opf)
mi = reader.opf.to_book_metadata()
if extract_cover:
try:
cdata = get_cover(reader.opf, reader.opf_path, stream, reader=reader)

View File

@ -29,7 +29,7 @@ class MetadataSource(Plugin): # {{{
future use.
The fetch method must store the results in `self.results` as a list of
:class:`MetaInformation` objects. If there is an error, it should be stored
:class:`Metadata` objects. If there is an error, it should be stored
in `self.exception` and `self.tb` (for the traceback).
'''

View File

@ -8,7 +8,7 @@ import sys, re
from urllib import quote
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre import browser
@ -42,33 +42,47 @@ def fetch_metadata(url, max=100, timeout=5.):
return books
class ISBNDBMetadata(MetaInformation):
class ISBNDBMetadata(Metadata):
def __init__(self, book):
MetaInformation.__init__(self, None, [])
Metadata.__init__(self, None, [])
self.isbn = book.get('isbn13', book.get('isbn'))
self.title = book.find('titlelong').string
def tostring(e):
if not hasattr(e, 'string'):
return None
ans = e.string
if ans is not None:
ans = unicode(ans).strip()
if not ans:
ans = None
return ans
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
self.title = tostring(book.find('titlelong'))
if not self.title:
self.title = book.find('title').string
self.title = tostring(book.find('title'))
if not self.title:
self.title = _('Unknown')
self.title = unicode(self.title).strip()
au = unicode(book.find('authorstext').string).strip()
temp = au.split(',')
self.authors = []
au = tostring(book.find('authorstext'))
if au:
au = au.strip()
temp = au.split(',')
for au in temp:
if not au: continue
self.authors.extend([a.strip() for a in au.split('&amp;')])
try:
self.author_sort = book.find('authors').find('person').string
self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
pass
self.publisher = book.find('publishertext').string
self.publisher = tostring(book.find('publishertext'))
summ = book.find('summary')
if summ and hasattr(summ, 'string') and summ.string:
summ = tostring(book.find('summary'))
if summ:
self.comments = 'SUMMARY:\n'+summ.string

View File

@ -12,6 +12,7 @@ import mechanize
from calibre import browser, prints
from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
+isbn).read()
if not raw:
return mi
raw = raw.decode('utf-8', 'replace')
raw = strip_encoding_declarations(raw)
root = html.fromstring(raw)
h1 = root.xpath('//div[@class="headsummary"]/h1')
if h1 and not mi.title:

View File

@ -6,7 +6,6 @@ Support for reading the metadata from a LIT file.
import cStringIO, os
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
def get_metadata(stream):
@ -16,7 +15,7 @@ def get_metadata(stream):
src = litfile.get_metadata().encode('utf-8')
litfile = litfile._litfile
opf = OPF(cStringIO.StringIO(src), os.getcwd())
mi = MetaInformation(opf)
mi = opf.to_book_metadata()
covers = []
for item in opf.iterguide():
if 'cover' not in item.get('type', '').lower():

View File

@ -181,7 +181,7 @@ def metadata_from_filename(name, pat=None):
mi.isbn = si
except (IndexError, ValueError):
pass
if not mi.title:
if mi.is_null('title'):
mi.title = name
return mi
@ -194,7 +194,7 @@ def opf_metadata(opfpath):
try:
opf = OPF(f, os.path.dirname(opfpath))
if opf.application_id is not None:
mi = MetaInformation(opf)
mi = opf.to_book_metadata()
if hasattr(opf, 'cover') and opf.cover:
cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
if os.access(cpath, os.R_OK):

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
lxml based OPF parser.
'''
import re, sys, unittest, functools, os, mimetypes, uuid, glob, cStringIO
import re, sys, unittest, functools, os, mimetypes, uuid, glob, cStringIO, json
from urllib import unquote
from urlparse import urlparse
@ -16,11 +16,13 @@ from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.constants import __appname__, __version__, filesystem_encoding
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from calibre.ebooks.metadata import string_to_authors, MetaInformation
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang
from calibre import prints
class Resource(object):
class Resource(object): # {{{
'''
Represents a resource (usually a file on the filesystem or a URL pointing
to the web. Such resources are commonly referred to in OPF files.
@ -101,8 +103,9 @@ class Resource(object):
def __repr__(self):
return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
# }}}
class ResourceCollection(object):
class ResourceCollection(object): # {{{
def __init__(self):
self._resources = []
@ -153,10 +156,9 @@ class ResourceCollection(object):
for res in self:
res.set_basedir(path)
# }}}
class ManifestItem(Resource):
class ManifestItem(Resource): # {{{
@staticmethod
def from_opf_manifest_item(item, basedir):
@ -194,8 +196,9 @@ class ManifestItem(Resource):
return self.media_type
raise IndexError('%d out of bounds.'%index)
# }}}
class Manifest(ResourceCollection):
class Manifest(ResourceCollection): # {{{
@staticmethod
def from_opf_manifest_element(items, dir):
@ -262,7 +265,9 @@ class Manifest(ResourceCollection):
if i.id == id:
return i.mime_type
class Spine(ResourceCollection):
# }}}
class Spine(ResourceCollection): # {{{
class Item(Resource):
@ -334,7 +339,9 @@ class Spine(ResourceCollection):
for i in self:
yield i.path
class Guide(ResourceCollection):
# }}}
class Guide(ResourceCollection): # {{{
class Reference(Resource):
@ -371,6 +378,7 @@ class Guide(ResourceCollection):
self[-1].type = type
self[-1].title = ''
# }}}
class MetadataField(object):
@ -412,7 +420,29 @@ class MetadataField(object):
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
obj.set_text(elem, unicode(val))
class OPF(object):
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
from calibre.utils.config import to_json
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
for name, fm in all_user_metadata.items():
try:
fm = object_to_unicode(fm)
fm = json.dumps(fm, default=to_json, ensure_ascii=False)
except:
prints('Failed to write user metadata:', name)
import traceback
traceback.print_exc()
continue
meta = metadata_elem.makeelement('meta')
meta.set('name', 'calibre:user_metadata:'+name)
meta.set('content', fm)
meta.tail = tail
metadata_elem.append(meta)
class OPF(object): # {{{
MIMETYPE = 'application/oebps-package+xml'
PARSER = etree.XMLParser(recover=True)
NAMESPACES = {
@ -497,6 +527,43 @@ class OPF(object):
self.guide = Guide.from_opf_guide(guide, basedir) if guide else None
self.cover_data = (None, None)
self.find_toc()
self.read_user_metadata()
def read_user_metadata(self):
self._user_metadata_ = {}
temp = Metadata('x', ['x'])
from calibre.utils.config import from_json
elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
'"calibre:user_metadata:") and @content]')
for elem in elems:
name = elem.get('name')
name = ':'.join(name.split(':')[2:])
if not name or not name.startswith('#'):
continue
fm = elem.get('content')
try:
fm = json.loads(fm, object_hook=from_json)
temp.set_user_metadata(name, fm)
except:
prints('Failed to read user metadata:', name)
import traceback
traceback.print_exc()
continue
self._user_metadata_ = temp.get_all_user_metadata(True)
def to_book_metadata(self):
ans = MetaInformation(self)
for n, v in self._user_metadata_.items():
ans.set_user_metadata(n, v)
return ans
def write_user_metadata(self):
elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
'"calibre:user_metadata:") and @content]')
for elem in elems:
elem.getparent().remove(elem)
serialize_user_metadata(self.metadata,
self._user_metadata_)
def find_toc(self):
self.toc = None
@ -911,6 +978,7 @@ class OPF(object):
return elem
def render(self, encoding='utf-8'):
self.write_user_metadata()
raw = etree.tostring(self.root, encoding=encoding, pretty_print=True)
if not raw.lstrip().startswith('<?xml '):
raw = '<?xml version="1.0" encoding="%s"?>\n'%encoding.upper()+raw
@ -924,18 +992,22 @@ class OPF(object):
val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None):
setattr(self, attr, val)
temp = self.to_book_metadata()
temp.smart_update(mi, replace_metadata=replace_metadata)
self._user_metadata_ = temp.get_all_user_metadata(True)
# }}}
class OPFCreator(MetaInformation):
class OPFCreator(Metadata):
def __init__(self, base_path, *args, **kwargs):
def __init__(self, base_path, other):
'''
Initialize.
@param base_path: An absolute path to the directory in which this OPF file
will eventually be. This is used by the L{create_manifest} method
to convert paths to files into relative paths.
'''
MetaInformation.__init__(self, *args, **kwargs)
Metadata.__init__(self, title='', other=other)
self.base_path = os.path.abspath(base_path)
if self.application_id is None:
self.application_id = str(uuid.uuid4())
@ -1115,6 +1187,8 @@ class OPFCreator(MetaInformation):
item.set('title', ref.title)
guide.append(item)
serialize_user_metadata(metadata, self.get_all_user_metadata(False))
root = E.package(
metadata,
manifest,
@ -1188,7 +1262,7 @@ def metadata_to_opf(mi, as_string=True):
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
if hasattr(mi.pubdate, 'isoformat'):
factory(DC('date'), isoformat(mi.pubdate))
if mi.category:
if hasattr(mi, 'category') and mi.category:
factory(DC('type'), mi.category)
if mi.comments:
factory(DC('description'), mi.comments)
@ -1217,6 +1291,8 @@ def metadata_to_opf(mi, as_string=True):
if mi.title_sort:
meta('title_sort', mi.title_sort)
serialize_user_metadata(metadata, mi.get_all_user_metadata(False))
metadata[-1].tail = '\n' +(' '*4)
if mi.cover:
@ -1334,5 +1410,30 @@ def suite():
def test():
unittest.TextTestRunner(verbosity=2).run(suite())
def test_user_metadata():
from cStringIO import StringIO
mi = Metadata('Test title', ['test author1', 'test author2'])
um = {
'#myseries': { '#value#': u'test series\xe4', 'datatype':'text',
'is_multiple': None, 'name': u'My Series'},
'#myseries_index': { '#value#': 2.45, 'datatype': 'float',
'is_multiple': None},
'#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
'is_multiple': '|', 'name': u'My Tags'}
}
mi.set_all_user_metadata(um)
raw = metadata_to_opf(mi)
opfc = OPFCreator(os.getcwd(), other=mi)
out = StringIO()
opfc.render(out)
raw2 = out.getvalue()
f = StringIO(raw)
opf = OPF(f)
f2 = StringIO(raw2)
opf2 = OPF(f2)
assert um == opf._user_metadata_
assert um == opf2._user_metadata_
print opf.render()
if __name__ == '__main__':
test()
test_user_metadata()

View File

@ -125,7 +125,7 @@ def create_metadata(stream, options):
au = u', '.join(au)
author = au.encode('ascii', 'ignore')
md += r'{\author %s}'%(author,)
if options.category:
if options.get('category', None):
category = options.category.encode('ascii', 'ignore')
md += r'{\category %s}'%(category,)
comp = options.comment if hasattr(options, 'comment') else options.comments
@ -180,7 +180,7 @@ def set_metadata(stream, options):
src = pat.sub(r'{\\author ' + author + r'}', src)
else:
src = add_metadata_item(src, 'author', author)
category = options.category
category = options.get('category', None)
if category != None:
category = category.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)

View File

@ -441,7 +441,7 @@ class MobiReader(object):
html.tostring(elem, encoding='utf-8') + '</package>'
stream = cStringIO.StringIO(raw)
opf = OPF(stream)
self.embedded_mi = MetaInformation(opf)
self.embedded_mi = opf.to_book_metadata()
if guide is not None:
for ref in guide.xpath('descendant::reference'):
if 'cover' in ref.get('type', '').lower():

View File

@ -1696,11 +1696,12 @@ class MobiWriter(object):
header.write(pack('>I', 1))
# 0x1c - 0x1f : Text encoding ?
# GR: Language encoding for NCX entries (latin_1)
header.write(pack('>I', 0x4e4))
# header.write(pack('>I', 650001))
# GR: This needs to be either 0xFDE9 or 0x4E4
header.write(pack('>I', 0xFDE9))
# 0x20 - 0x23 : Mimicking kindleGen
header.write(pack('>I', 0xFFFFFFFF))
# 0x20 - 0x23 : Language code?
header.write(iana2mobi(str(self._oeb.metadata.language[0])))
# 0x24 - 0x27 : Number of TOC entries in INDX1
header.write(pack('>I', indxt_count + 1))
@ -1795,12 +1796,13 @@ class MobiWriter(object):
self._oeb.log.debug('Index records dumped to', t)
def _clean_text_value(self, text):
if not text:
text = u'(none)'
if text is not None and text.strip() :
text = text.strip()
if not isinstance(text, unicode):
text = text.decode('utf-8', 'replace')
text = text.encode('cp1252','replace')
text = text.encode('utf-8')
else :
text = "(none)".encode('utf-8')
return text
def _add_to_ctoc(self, ctoc_str, record_offset):
@ -2150,26 +2152,6 @@ class MobiWriter(object):
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
def _write_subchapter_node(self, indxt, indices, index, offset, length, count):
# This style works without a parent chapter, mimicking what KindleGen does,
# using a value of 0x0B for parentIndex
# Writes an INDX1 NCXEntry of entryType 0x1F - subchapter
if self.opts.verbose > 2:
# *** GR: Turn this off while I'm developing my code
#self._oeb.log.debug('Writing TOC node to IDXT:', node.title, 'href:', node.href)
pass
pos = 0xc0 + indxt.tell()
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
name = "%04X"%count
indxt.write(chr(len(name)) + name) # Write the name
indxt.write(INDXT['subchapter']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
indxt.write(decint(offset, DECINT_FORWARD)) # offset
indxt.write(decint(length, DECINT_FORWARD)) # length
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
indxt.write(decint(0xb, DECINT_FORWARD)) # parentIndex - null
def _compute_offset_length(self, i, node, entries) :
h = node.href
if h not in self._id_offsets:

View File

@ -15,7 +15,7 @@ from calibre.customize.ui import available_input_formats
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace, ZipFile
from calibre.utils.zipfile import safe_replace
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import guess_type, prints
@ -294,12 +294,8 @@ class EbookIterator(object):
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
zipf = ZipFile(zf, mode='a')
for name in zipf.namelist():
if name == 'META-INF/calibre_bookmarks.txt':
safe_replace(zf, 'META-INF/calibre_bookmarks.txt', StringIO(dat))
return
zipf.writestr('META-INF/calibre_bookmarks.txt', dat)
safe_replace(zf, 'META-INF/calibre_bookmarks.txt', StringIO(dat),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat

View File

@ -126,10 +126,9 @@ class OEBReader(object):
def _metadata_from_opf(self, opf):
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
stream = cStringIO.StringIO(etree.tostring(opf))
mi = MetaInformation(OPF(stream))
mi = OPF(stream).to_book_metadata()
if not mi.language:
mi.language = get_lang().replace('_', '-')
self.oeb.metadata.add('language', mi.language)

View File

@ -219,7 +219,10 @@ class CSSFlattener(object):
fnums = self.context.source.fnums
if size[0] in ('+', '-'):
# Oh, the warcrimes
try:
esize = 3 + force_int(size)
except:
esize = 3
if esize < 1:
esize = 1
if esize > 7:

View File

@ -12,33 +12,33 @@ from calibre import guess_type
def meta_info_to_oeb_metadata(mi, m, log):
from calibre.ebooks.oeb.base import OPF
if mi.title:
if not mi.is_null('title'):
m.clear('title')
m.add('title', mi.title)
if mi.title_sort:
if not m.title:
m.add('title', mi.title_sort)
m.title[0].file_as = mi.title_sort
if mi.authors:
if not mi.is_null('authors'):
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
for a in mi.authors:
attrib = {'role':'aut'}
if mi.author_sort:
attrib[OPF('file-as')] = mi.author_sort
m.add('creator', a, attrib=attrib)
if mi.book_producer:
if not mi.is_null('book_producer'):
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
m.add('contributor', mi.book_producer, role='bkp')
if mi.comments:
if not mi.is_null('comments'):
m.clear('description')
m.add('description', mi.comments)
if mi.publisher:
if not mi.is_null('publisher'):
m.clear('publisher')
m.add('publisher', mi.publisher)
if mi.series:
if not mi.is_null('series'):
m.clear('series')
m.add('series', mi.series)
if mi.isbn:
if not mi.is_null('isbn'):
has = False
for x in m.identifier:
if x.scheme.lower() == 'isbn':
@ -46,29 +46,29 @@ def meta_info_to_oeb_metadata(mi, m, log):
has = True
if not has:
m.add('identifier', mi.isbn, scheme='ISBN')
if mi.language:
if not mi.is_null('language'):
m.clear('language')
m.add('language', mi.language)
if mi.series_index is not None:
if not mi.is_null('series_index'):
m.clear('series_index')
m.add('series_index', mi.format_series_index())
if mi.rating is not None:
if not mi.is_null('rating'):
m.clear('rating')
m.add('rating', '%.2f'%mi.rating)
if mi.tags:
if not mi.is_null('tags'):
m.clear('subject')
for t in mi.tags:
m.add('subject', t)
if mi.pubdate is not None:
if not mi.is_null('pubdate'):
m.clear('date')
m.add('date', isoformat(mi.pubdate))
if mi.timestamp is not None:
if not mi.is_null('timestamp'):
m.clear('timestamp')
m.add('timestamp', isoformat(mi.timestamp))
if mi.rights is not None:
if not mi.is_null('rights'):
m.clear('rights')
m.add('rights', mi.rights)
if mi.publication_type is not None:
if not mi.is_null('publication_type'):
m.clear('publication_type')
m.add('publication_type', mi.publication_type)
if not m.timestamp:

View File

@ -10,13 +10,6 @@ Transform OEB content into RTF markup
import os
import re
try:
from PIL import Image
Image
except ImportError:
import Image
import cStringIO
from lxml import etree
@ -26,6 +19,7 @@ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.filenames import ascii_text
from calibre.utils.magick.draw import save_cover_data_to, identify_data
TAGS = {
'b': '\\b',
@ -153,10 +147,8 @@ class RTFMLizer(object):
return text
def image_to_hexstring(self, data):
im = Image.open(cStringIO.StringIO(data))
data = cStringIO.StringIO()
im.convert('RGB').save(data, 'JPEG')
data = data.getvalue()
data = save_cover_data_to(data, 'cover.jpg', return_data=True)
width, height = identify_data(data)[:2]
raw_hex = ''
for char in data:
@ -173,7 +165,7 @@ class RTFMLizer(object):
col += 1
hex_string += char
return (hex_string, im.size[0], im.size[1])
return (hex_string, width, height)
def clean_text(self, text):
# Remove excess spaces at beginning and end of lines

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" The GUI """
import os, sys, Queue
import os, sys, Queue, threading
from threading import RLock
from PyQt4.Qt import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, \
@ -311,11 +311,14 @@ class FunctionDispatcher(QObject):
if not queued:
typ = Qt.AutoConnection if queued is None else Qt.DirectConnection
self.dispatch_signal.connect(self.dispatch, type=typ)
self.q = Queue.Queue()
self.lock = threading.Lock()
def __call__(self, *args, **kwargs):
q = Queue.Queue()
self.dispatch_signal.emit(q, args, kwargs)
return q.get()
with self.lock:
self.dispatch_signal.emit(self.q, args, kwargs)
res = self.q.get()
return res
def dispatch(self, q, args, kwargs):
try:

View File

@ -230,9 +230,9 @@ class AddAction(InterfaceAction):
self._files_added(paths, names, infos, on_card=on_card)
# set the in-library flags, and as a consequence send the library's
# metadata for this book to the device. This sets the uuid to the
# correct value.
# correct value. Note that set_books_in_library might sync_booklists
self.gui.set_books_in_library(booklists=[model.db], reset=True)
model.reset()
self.gui.refresh_ondevice()
def add_books_from_device(self, view):
rows = view.selectionModel().selectedRows()

View File

@ -14,7 +14,7 @@ from calibre import isbytestring
from calibre.constants import filesystem_encoding
from calibre.utils.config import prefs
from calibre.gui2 import gprefs, warning_dialog, Dispatcher, error_dialog, \
question_dialog
question_dialog, info_dialog
from calibre.gui2.actions import InterfaceAction
class LibraryUsageStats(object):
@ -115,6 +115,14 @@ class ChooseLibraryAction(InterfaceAction):
type=Qt.QueuedConnection)
self.choose_menu.addAction(ac)
self.rename_separator = self.choose_menu.addSeparator()
self.create_action(spec=(_('Library backup status...'), 'lt.png', None,
None), attr='action_backup_status')
self.action_backup_status.triggered.connect(self.backup_status,
type=Qt.QueuedConnection)
self.choose_menu.addAction(self.action_backup_status)
def library_name(self):
db = self.gui.library_view.model().db
path = db.library_path
@ -206,6 +214,16 @@ class ChooseLibraryAction(InterfaceAction):
self.stats.remove(location)
self.build_menus()
def backup_status(self, location):
dirty_text = 'no'
try:
dirty_text = \
unicode(self.gui.library_view.model().db.dirty_queue_length())
except:
dirty_text = _('none')
info_dialog(self.gui, _('Backup status'), '<p>'+
_('Book metadata files remaining to be written: %s') % dirty_text,
show=True)
def switch_requested(self, location):
if not self.change_library_allowed():

View File

@ -8,15 +8,14 @@ __docformat__ = 'restructuredtext en'
import os
from functools import partial
from PyQt4.Qt import Qt, QTimer, QMenu
from PyQt4.Qt import Qt, QMenu
from calibre.gui2 import error_dialog, config, warning_dialog
from calibre.gui2 import error_dialog, config
from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.progress import BlockingBusy
class EditMetadataAction(InterfaceAction):
@ -84,52 +83,33 @@ class EditMetadataAction(InterfaceAction):
def do_download_metadata(self, ids, covers=True, set_metadata=True,
set_social_metadata=None):
db = self.gui.library_view.model().db
m = self.gui.library_view.model()
db = m.db
if set_social_metadata is None:
get_social_metadata = config['get_social_metadata']
else:
get_social_metadata = set_social_metadata
from calibre.gui2.metadata import DownloadMetadata
self._download_book_metadata = DownloadMetadata(db, ids,
get_covers=covers, set_metadata=set_metadata,
get_social_metadata=get_social_metadata)
self._download_book_metadata.start()
from calibre.gui2.metadata import DoDownload
if set_social_metadata is not None and set_social_metadata:
x = _('social metadata')
else:
x = _('covers') if covers and not set_metadata else _('metadata')
self._book_metadata_download_check = QTimer(self.gui)
self._book_metadata_download_check.timeout.connect(self.book_metadata_download_check,
type=Qt.QueuedConnection)
self._book_metadata_download_check.start(100)
self._bb_dialog = BlockingBusy(_('Downloading %s for %d book(s)')%(x,
len(ids)), parent=self.gui)
self._bb_dialog.exec_()
def book_metadata_download_check(self):
if self._download_book_metadata.is_alive():
return
self._book_metadata_download_check.stop()
self._bb_dialog.accept()
title = _('Downloading %s for %d book(s)')%(x, len(ids))
self._download_book_metadata = DoDownload(self.gui, title, db, ids,
get_covers=covers, set_metadata=set_metadata,
get_social_metadata=get_social_metadata)
m.stop_metadata_backup()
try:
self._download_book_metadata.exec_()
finally:
m.start_metadata_backup()
cr = self.gui.library_view.currentIndex().row()
x = self._download_book_metadata
self._download_book_metadata = None
if x.exception is None:
if x.updated:
self.gui.library_view.model().refresh_ids(
x.updated, cr)
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
if x.failures:
details = ['%s: %s'%(title, reason) for title,
reason in x.failures.values()]
details = '%s\n'%('\n'.join(details))
warning_dialog(self.gui, _('Failed to download some metadata'),
_('Failed to download metadata for the following:'),
det_msg=details).exec_()
else:
err = _('Failed to download metadata:')
error_dialog(self.gui, _('Error'), err, det_msg=x.tb).exec_()
def edit_metadata(self, checked, bulk=None):
'''
@ -184,12 +164,13 @@ class EditMetadataAction(InterfaceAction):
self.gui.tags_view.blockSignals(True)
try:
changed = MetadataBulkDialog(self.gui, rows,
self.gui.library_view.model().db).changed
self.gui.library_view.model()).changed
finally:
self.gui.tags_view.blockSignals(False)
if changed:
self.gui.library_view.model().resort(reset=False)
self.gui.library_view.model().research()
m = self.gui.library_view.model()
m.resort(reset=False)
m.research()
self.gui.tags_view.recount()
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()

View File

@ -138,7 +138,7 @@ class DBAdder(Thread): # {{{
self.critical[name] = open(opf, 'rb').read().decode('utf-8', 'replace')
else:
try:
mi = MetaInformation(OPF(opf))
mi = OPF(opf).to_book_metadata()
except:
import traceback
mi = MetaInformation('', [_('Unknown')])
@ -152,7 +152,8 @@ class DBAdder(Thread): # {{{
mi.application_id = None
if self.db is not None:
if cover:
cover = open(cover, 'rb').read()
with open(cover, 'rb') as f:
cover = f.read()
orig_formats = formats
formats = [f for f in formats if not f.lower().endswith('.opf')]
if prefs['add_formats_to_existing']:
@ -381,11 +382,7 @@ class Adder(QObject): # {{{
# }}}
###############################################################################
############################## END ADDER ######################################
###############################################################################
class Saver(QObject):
class Saver(QObject): # {{{
def __init__(self, parent, db, callback, rows, path, opts,
spare_server=None):
@ -446,4 +443,5 @@ class Saver(QObject):
self.pd.set_msg(_('Saved')+' '+title)
if not ok:
self.failures.add((title, tb))
# }}}

View File

@ -28,6 +28,8 @@ WEIGHTS[_('Tags')] = 4
def render_rows(data):
keys = data.keys()
# First sort by name. The WEIGHTS sort will preserve this sub-order
keys.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
keys.sort(cmp=lambda x, y: cmp(WEIGHTS[x], WEIGHTS[y]))
rows = []
for key in keys:

View File

@ -348,7 +348,11 @@ def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, pa
ans = []
column = row = comments_row = 0
for col in cols:
if not x[col]['editable']:
continue
dt = x[col]['datatype']
if dt == 'composite':
continue
if dt == 'comments':
continue
w = widget_factory(dt, col)
@ -448,9 +452,25 @@ class BulkSeries(BulkBase):
self.name_widget = w
self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent), w]
self.widgets.append(QLabel(_('Automatically number books in this series'), parent))
self.widgets.append(QLabel('', parent))
w = QWidget(parent)
layout = QHBoxLayout(w)
layout.setContentsMargins(0, 0, 0, 0)
self.remove_series = QCheckBox(parent)
self.remove_series.setText(_('Remove series'))
layout.addWidget(self.remove_series)
self.idx_widget = QCheckBox(parent)
self.widgets.append(self.idx_widget)
self.idx_widget.setText(_('Automatically number books'))
layout.addWidget(self.idx_widget)
self.force_number = QCheckBox(parent)
self.force_number.setText(_('Force numbers to start with '))
layout.addWidget(self.force_number)
self.series_start_number = QSpinBox(parent)
self.series_start_number.setMinimum(1)
self.series_start_number.setProperty("value", 1)
layout.addWidget(self.series_start_number)
layout.addItem(QSpacerItem(20, 10, QSizePolicy.Expanding, QSizePolicy.Minimum))
self.widgets.append(w)
def initialize(self, book_id):
self.idx_widget.setChecked(False)
@ -461,17 +481,26 @@ class BulkSeries(BulkBase):
def getter(self):
n = unicode(self.name_widget.currentText()).strip()
i = self.idx_widget.checkState()
return n, i
f = self.force_number.checkState()
s = self.series_start_number.value()
r = self.remove_series.checkState()
return n, i, f, s, r
def commit(self, book_ids, notify=False):
val, update_indices = self.gui_val
val = self.normalize_ui_val(val)
if val != '':
val, update_indices, force_start, at_value, clear = self.gui_val
val = '' if clear else self.normalize_ui_val(val)
if clear or val != '':
extras = []
next_index = self.db.get_next_cc_series_num_for(val, num=self.col_id)
for book_id in book_ids:
if clear:
extras.append(None)
continue
if update_indices:
if tweaks['series_index_auto_increment'] == 'next':
if force_start:
s_index = at_value
at_value += 1
elif tweaks['series_index_auto_increment'] == 'next':
s_index = next_index
next_index += 1
else:
@ -479,6 +508,8 @@ class BulkSeries(BulkBase):
else:
s_index = self.db.get_custom_extra(book_id, num=self.col_id,
index_is_id=True)
if s_index is None:
s_index = 1.0
extras.append(s_index)
self.db.set_custom_bulk(book_ids, val, extras=extras,
num=self.col_id, notify=notify)

View File

@ -34,6 +34,8 @@ from calibre.ebooks.metadata.meta import set_metadata
from calibre.constants import DEBUG
from calibre.utils.config import prefs, tweaks
from calibre.utils.magick.draw import thumbnail
from calibre.library.save_to_disk import plugboard_any_device_value, \
plugboard_any_format_value
# }}}
class DeviceJob(BaseJob): # {{{
@ -317,19 +319,40 @@ class DeviceManager(Thread): # {{{
args=[booklist, on_card],
description=_('Send collections to device'))
def _upload_books(self, files, names, on_card=None, metadata=None):
def _upload_books(self, files, names, on_card=None, metadata=None, plugboards=None):
'''Upload books to device: '''
if metadata and files and len(metadata) == len(files):
for f, mi in zip(files, metadata):
if isinstance(f, unicode):
ext = f.rpartition('.')[-1].lower()
dev_name = self.connected_device.__class__.__name__
cpb = None
if ext in plugboards:
cpb = plugboards[ext]
elif plugboard_any_format_value in plugboards:
cpb = plugboards[plugboard_any_format_value]
if cpb is not None:
if dev_name in cpb:
cpb = cpb[dev_name]
elif plugboard_any_device_value in cpb:
cpb = cpb[plugboard_any_device_value]
else:
cpb = None
if DEBUG:
prints('Using plugboard', ext, dev_name, cpb)
if ext:
try:
if DEBUG:
prints('Setting metadata in:', mi.title, 'at:',
f, file=sys.__stdout__)
with open(f, 'r+b') as stream:
set_metadata(stream, mi, stream_type=ext)
if cpb:
newmi = mi.deepcopy()
newmi.template_to_attribute(mi, cpb)
else:
newmi = mi
set_metadata(stream, newmi, stream_type=ext)
except:
if DEBUG:
prints(traceback.format_exc(), file=sys.__stdout__)
@ -338,12 +361,12 @@ class DeviceManager(Thread): # {{{
metadata=metadata, end_session=False)
def upload_books(self, done, files, names, on_card=None, titles=None,
metadata=None):
metadata=None, plugboards=None):
desc = _('Upload %d books to device')%len(names)
if titles:
desc += u':' + u', '.join(titles)
return self.create_job(self._upload_books, done, args=[files, names],
kwargs={'on_card':on_card,'metadata':metadata}, description=desc)
kwargs={'on_card':on_card,'metadata':metadata,'plugboards':plugboards}, description=desc)
def add_books_to_metadata(self, locations, metadata, booklists):
self.device.add_books_to_metadata(locations, metadata, booklists)
@ -721,14 +744,16 @@ class DeviceMixin(object): # {{{
self.device_manager.device.__class__.get_gui_name()+\
_(' detected.'), 3000)
self.device_connected = device_kind
self.refresh_ondevice_info (device_connected = True, reset_only = True)
self.library_view.set_device_connected(self.device_connected)
self.refresh_ondevice (reset_only = True)
else:
self.device_connected = None
self.status_bar.device_disconnected()
if self.current_view() != self.library_view:
self.book_details.reset_info()
self.location_manager.update_devices()
self.refresh_ondevice_info(device_connected=False)
self.library_view.set_device_connected(self.device_connected)
self.refresh_ondevice()
def info_read(self, job):
'''
@ -760,9 +785,9 @@ class DeviceMixin(object): # {{{
self.card_b_view.set_editable(self.device_manager.device.CAN_SET_METADATA)
self.sync_news()
self.sync_catalogs()
self.refresh_ondevice_info(device_connected = True)
self.refresh_ondevice()
def refresh_ondevice_info(self, device_connected, reset_only = False):
def refresh_ondevice(self, reset_only = False):
'''
Force the library view to refresh, taking into consideration new
device books information
@ -770,7 +795,7 @@ class DeviceMixin(object): # {{{
self.book_on_device(None, reset=True)
if reset_only:
return
self.library_view.set_device_connected(device_connected)
self.library_view.model().refresh_ondevice()
# }}}
@ -803,7 +828,7 @@ class DeviceMixin(object): # {{{
self.book_on_device(None, reset=True)
# We need to reset the ondevice flags in the library. Use a big hammer,
# so we don't need to worry about whether some succeeded or not.
self.refresh_ondevice_info(device_connected=True, reset_only=False)
self.refresh_ondevice(reset_only=False)
def dispatch_sync_event(self, dest, delete, specific):
rows = self.library_view.selectionModel().selectedRows()
@ -1255,10 +1280,11 @@ class DeviceMixin(object): # {{{
:param files: List of either paths to files or file like objects
'''
titles = [i.title for i in metadata]
plugboards = self.library_view.model().db.prefs.get('plugboards', {})
job = self.device_manager.upload_books(
Dispatcher(self.books_uploaded),
files, names, on_card=on_card,
metadata=metadata, titles=titles
metadata=metadata, titles=titles, plugboards=plugboards
)
self.upload_memory[job] = (metadata, on_card, memory, files)
@ -1300,7 +1326,7 @@ class DeviceMixin(object): # {{{
if not self.set_books_in_library(self.booklists(), reset=True):
self.upload_booklists()
self.book_on_device(None, reset=True)
self.refresh_ondevice_info(device_connected = True)
self.refresh_ondevice()
view = self.card_a_view if on_card == 'carda' else \
self.card_b_view if on_card == 'cardb' else self.memory_view

View File

@ -6,7 +6,9 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QWidget, QListWidgetItem, Qt, QVariant, SIGNAL
from calibre.gui2 import error_dialog
from calibre.gui2.device_drivers.configwidget_ui import Ui_ConfigWidget
from calibre.utils.formatter import validation_formatter
class ConfigWidget(QWidget, Ui_ConfigWidget):
@ -77,3 +79,15 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
def use_author_sort(self):
return self.opt_use_author_sort.isChecked()
def validate(self):
tmpl = unicode(self.opt_save_template.text())
try:
validation_formatter.validate(tmpl)
return True
except Exception, err:
error_dialog(self, _('Invalid template'),
'<p>'+_('The template %s is invalid:')%tmpl + \
'<br>'+unicode(err), show=True)
return False

View File

@ -6,6 +6,7 @@ __license__ = 'GPL v3'
from PyQt4.Qt import Qt, QDialog, QTableWidgetItem, QAbstractItemView
from calibre.ebooks.metadata import author_to_author_sort
from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog
class tableItem(QTableWidgetItem):
@ -109,6 +110,12 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
if col == 0:
item = self.table.item(row, 0)
aut = unicode(item.text()).strip()
amper = aut.find('&')
if amper >= 0:
error_dialog(self.parent(), _('Invalid author name'),
_('Author names cannot contain & characters.')).exec_()
aut = aut.replace('&', '%')
self.table.item(row, 0).setText(aut)
c = self.table.item(row, 1)
c.setText(author_to_author_sort(aut))
item = c

View File

@ -3,57 +3,131 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Dialog to edit metadata in bulk'''
from threading import Thread
import re
from PyQt4.Qt import QDialog, QGridLayout
from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \
pyqtSignal
from PyQt4 import QtGui
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.ebooks.metadata import string_to_authors, \
authors_to_string
from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2.dialogs.progress import BlockingBusy
from calibre.gui2 import error_dialog, Dispatcher
from calibre.gui2 import error_dialog
from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.utils.config import dynamic
class Worker(Thread):
class MyBlockingBusy(QDialog):
do_one_signal = pyqtSignal()
phases = ['',
_('Title/Author'),
_('Standard metadata'),
_('Custom metadata'),
_('Search/Replace'),
]
def __init__(self, msg, args, db, ids, cc_widgets, s_r_func,
parent=None, window_title=_('Working')):
QDialog.__init__(self, parent)
self._layout = QVBoxLayout()
self.setLayout(self._layout)
self.msg_text = msg
self.msg = QLabel(msg+' ') # Ensure dialog is wide enough
#self.msg.setWordWrap(True)
self.font = QFont()
self.font.setPointSize(self.font.pointSize() + 8)
self.msg.setFont(self.font)
self.pi = ProgressIndicator(self)
self.pi.setDisplaySize(100)
self._layout.addWidget(self.pi, 0, Qt.AlignHCenter)
self._layout.addSpacing(15)
self._layout.addWidget(self.msg, 0, Qt.AlignHCenter)
self.setWindowTitle(window_title)
self.resize(self.sizeHint())
self.start()
def __init__(self, args, db, ids, cc_widgets, callback):
Thread.__init__(self)
self.args = args
self.db = db
self.ids = ids
self.error = None
self.callback = callback
self.cc_widgets = cc_widgets
self.s_r_func = s_r_func
self.do_one_signal.connect(self.do_one_safe, Qt.QueuedConnection)
def doit(self):
def start(self):
self.pi.startAnimation()
def stop(self):
self.pi.stopAnimation()
def accept(self):
self.stop()
return QDialog.accept(self)
def exec_(self):
self.current_index = 0
self.current_phase = 1
self.do_one_signal.emit()
return QDialog.exec_(self)
def do_one_safe(self):
try:
if self.current_index >= len(self.ids):
self.current_phase += 1
self.current_index = 0
if self.current_phase > 4:
self.db.commit()
return self.accept()
id = self.ids[self.current_index]
percent = int((self.current_index*100)/float(len(self.ids)))
self.msg.setText(self.msg_text.format(self.phases[self.current_phase],
percent))
self.do_one(id)
except Exception, err:
import traceback
try:
err = unicode(err)
except:
err = repr(err)
self.error = (err, traceback.format_exc())
return self.accept()
def do_one(self, id):
remove, add, au, aus, do_aus, rating, pub, do_series, \
do_autonumber, do_remove_format, remove_format, do_swap_ta, \
do_remove_conv, do_auto_author, series = self.args
do_remove_conv, do_auto_author, series, do_series_restart, \
series_start_value, do_title_case, clear_series = self.args
# first loop: do author and title. These will commit at the end of each
# operation, because each operation modifies the file system. We want to
# try hard to keep the DB and the file system in sync, even in the face
# of exceptions or forced exits.
for id in self.ids:
if self.current_phase == 1:
title_set = False
if do_swap_ta:
title = self.db.title(id, index_is_id=True)
aum = self.db.authors(id, index_is_id=True)
if aum:
aum = [a.strip().replace('|', ',') for a in aum.split(',')]
new_title = authors_to_string(aum)
if do_title_case:
new_title = new_title.title()
self.db.set_title(id, new_title, notify=False)
title_set = True
if title:
new_authors = string_to_authors(title)
self.db.set_authors(id, new_authors, notify=False)
if do_title_case and not title_set:
title = self.db.title(id, index_is_id=True)
self.db.set_title(id, title.title(), notify=False)
if au:
self.db.set_authors(id, string_to_authors(au), notify=False)
elif self.current_phase == 2:
# All of these just affect the DB, so we can tolerate a total rollback
for id in self.ids:
if do_auto_author:
x = self.db.author_sort_from_book(id, index_is_id=True)
if x:
@ -68,7 +142,14 @@ class Worker(Thread):
if pub:
self.db.set_publisher(id, pub, notify=False, commit=False)
if clear_series:
self.db.set_series(id, '', notify=False, commit=False)
if do_series:
if do_series_restart:
next = series_start_value
series_start_value += 1
else:
next = self.db.get_next_series_num_for(series)
self.db.set_series(id, series, notify=False, commit=False)
num = next if do_autonumber and series else 1.0
@ -79,42 +160,44 @@ class Worker(Thread):
if do_remove_conv:
self.db.delete_conversion_options(id, 'PIPE', commit=False)
self.db.commit()
elif self.current_phase == 3:
# both of these are fast enough to just do them all
for w in self.cc_widgets:
w.commit(self.ids)
self.db.bulk_modify_tags(self.ids, add=add, remove=remove,
notify=False)
def run(self):
try:
self.doit()
except Exception, err:
import traceback
try:
err = unicode(err)
except:
err = repr(err)
self.error = (err, traceback.format_exc())
self.callback()
self.current_index = len(self.ids)
elif self.current_phase == 4:
self.s_r_func(id)
# do the next one
self.current_index += 1
self.do_one_signal.emit()
class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
s_r_functions = {
'' : lambda x: x,
s_r_functions = { '' : lambda x: x,
_('Lower Case') : lambda x: x.lower(),
_('Upper Case') : lambda x: x.upper(),
_('Title Case') : lambda x: x.title(),
}
def __init__(self, window, rows, db):
s_r_match_modes = [ _('Character match'),
_('Regular Expression'),
]
s_r_replace_modes = [ _('Replace field'),
_('Prepend to field'),
_('Append to field'),
]
def __init__(self, window, rows, model):
QDialog.__init__(self, window)
Ui_MetadataBulkDialog.__init__(self)
self.setupUi(self)
self.db = db
self.ids = [db.id(r) for r in rows]
self.model = model
self.db = model.db
self.ids = [self.db.id(r) for r in rows]
self.box_title.setText('<p>' +
_('Editing meta information for <b>%d books</b>') %
len(rows))
@ -135,8 +218,9 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.series.currentIndexChanged[int].connect(self.series_changed)
self.series.editTextChanged.connect(self.series_changed)
self.tag_editor_button.clicked.connect(self.tag_editor)
self.autonumber_series.stateChanged[int].connect(self.auto_number_changed)
if len(db.custom_column_label_map) == 0:
if len(self.db.custom_field_keys(include_composites=False)) == 0:
self.central_widget.removeTab(1)
else:
self.create_custom_column_editors()
@ -148,86 +232,165 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.search_for.initialize('bulk_edit_search_for')
self.replace_with.initialize('bulk_edit_replace_with')
self.test_text.initialize('bulk_edit_test_test')
fields = ['']
self.all_fields = ['']
self.writable_fields = ['']
fm = self.db.field_metadata
for f in fm:
if (f in ['author_sort'] or (
fm[f]['datatype'] == 'text' or fm[f]['datatype'] == 'series')
fm[f]['datatype'] in ['text', 'series'])
and fm[f].get('search_terms', None)
and f not in ['formats', 'ondevice']):
fields.append(f)
fields.sort()
self.search_field.addItems(fields)
self.search_field.setMaxVisibleItems(min(len(fields), 20))
self.all_fields.append(f)
self.writable_fields.append(f)
if fm[f]['datatype'] == 'composite':
self.all_fields.append(f)
self.all_fields.sort()
self.writable_fields.sort()
self.search_field.setMaxVisibleItems(20)
self.destination_field.setMaxVisibleItems(20)
offset = 10
self.s_r_number_of_books = min(7, len(self.ids))
self.s_r_number_of_books = min(10, len(self.ids))
for i in range(1,self.s_r_number_of_books+1):
w = QtGui.QLabel(self.tabWidgetPage3)
w.setText(_('Book %d:')%i)
self.gridLayout1.addWidget(w, i+offset, 0, 1, 1)
self.testgrid.addWidget(w, i+offset, 0, 1, 1)
w = QtGui.QLineEdit(self.tabWidgetPage3)
w.setReadOnly(True)
name = 'book_%d_text'%i
setattr(self, name, w)
self.book_1_text.setObjectName(name)
self.gridLayout1.addWidget(w, i+offset, 1, 1, 1)
self.testgrid.addWidget(w, i+offset, 1, 1, 1)
w = QtGui.QLineEdit(self.tabWidgetPage3)
w.setReadOnly(True)
name = 'book_%d_result'%i
setattr(self, name, w)
self.book_1_text.setObjectName(name)
self.gridLayout1.addWidget(w, i+offset, 2, 1, 1)
self.testgrid.addWidget(w, i+offset, 2, 1, 1)
self.s_r_heading.setText('<p>'+
_('Search and replace in text fields using '
'regular expressions. The search text is an '
'arbitrary python-compatible regular expression. '
'The replacement text can contain backreferences '
'to parenthesized expressions in the pattern. '
'The search is not anchored, and can match and '
'replace multiple times on the same string. See '
'<a href="http://docs.python.org/library/re.html"> '
'this reference</a> '
'for more information, and in particular the \'sub\' '
'function.') + '<p>' + _(
'Note: <b>you can destroy your library</b> '
'using this feature. Changes are permanent. There '
'is no undo function. You are strongly encouraged '
'to back up your library before proceeding.'))
self.main_heading = _(
'<b>You can destroy your library using this feature.</b> '
'Changes are permanent. There is no undo function. '
' This feature is experimental, and there may be bugs. '
'You are strongly encouraged to back up your library '
'before proceeding.<p>'
'Search and replace in text fields using character matching '
'or regular expressions. ')
self.character_heading = _(
'In character mode, the field is searched for the entered '
'search text. The text is replaced by the specified replacement '
'text everywhere it is found in the specified field. After '
'replacement is finished, the text can be changed to '
'upper-case, lower-case, or title-case. If the case-sensitive '
'check box is checked, the search text must match exactly. If '
'it is unchecked, the search text will match both upper- and '
'lower-case letters'
)
self.regexp_heading = _(
'In regular expression mode, the search text is an '
'arbitrary python-compatible regular expression. The '
'replacement text can contain backreferences to parenthesized '
'expressions in the pattern. The search is not anchored, '
'and can match and replace multiple times on the same string. '
'The modification functions (lower-case etc) are applied to the '
'matched text, not to the field as a whole. '
'The destination box specifies the field where the result after '
'matching and replacement is to be assigned. You can replace '
'the text in the field, or prepend or append the matched text. '
'See <a href="http://docs.python.org/library/re.html"> '
'this reference</a> for more information on python\'s regular '
'expressions, and in particular the \'sub\' function.'
)
self.search_mode.addItems(self.s_r_match_modes)
self.search_mode.setCurrentIndex(dynamic.get('s_r_search_mode', 0))
self.replace_mode.addItems(self.s_r_replace_modes)
self.replace_mode.setCurrentIndex(0)
self.s_r_search_mode = 0
self.s_r_error = None
self.s_r_obj = None
self.replace_func.addItems(sorted(self.s_r_functions.keys()))
self.search_field.currentIndexChanged[str].connect(self.s_r_field_changed)
self.search_mode.currentIndexChanged[int].connect(self.s_r_search_mode_changed)
self.search_field.currentIndexChanged[int].connect(self.s_r_search_field_changed)
self.destination_field.currentIndexChanged[str].connect(self.s_r_destination_field_changed)
self.replace_mode.currentIndexChanged[int].connect(self.s_r_paint_results)
self.replace_func.currentIndexChanged[str].connect(self.s_r_paint_results)
self.search_for.editTextChanged[str].connect(self.s_r_paint_results)
self.replace_with.editTextChanged[str].connect(self.s_r_paint_results)
self.test_text.editTextChanged[str].connect(self.s_r_paint_results)
self.comma_separated.stateChanged.connect(self.s_r_paint_results)
self.case_sensitive.stateChanged.connect(self.s_r_paint_results)
self.central_widget.setCurrentIndex(0)
def s_r_field_changed(self, txt):
txt = unicode(txt)
for i in range(0, self.s_r_number_of_books):
if txt:
fm = self.db.field_metadata[txt]
id = self.ids[i]
val = self.db.get_property(id, index_is_id=True,
loc=fm['rec_index'])
self.search_for.completer().setCaseSensitivity(Qt.CaseSensitive)
self.replace_with.completer().setCaseSensitivity(Qt.CaseSensitive)
self.s_r_search_mode_changed(self.search_mode.currentIndex())
def s_r_get_field(self, mi, field):
if field:
fm = self.db.metadata_for_field(field)
val = mi.get(field, None)
if val is None:
val = ''
if fm['is_multiple']:
val = [t.strip() for t in val.split(fm['is_multiple']) if t.strip()]
if val:
val.sort(cmp=lambda x,y: cmp(x.lower(), y.lower()))
val = val[0]
if txt == 'authors':
val = val.replace('|', ',')
val = []
elif not fm['is_multiple']:
val = [val]
elif field == 'authors':
val = [v.replace(',', '|') for v in val]
else:
val = ''
else:
val = ''
val = []
return val
def s_r_search_field_changed(self, idx):
for i in range(0, self.s_r_number_of_books):
w = getattr(self, 'book_%d_text'%(i+1))
w.setText(val)
mi = self.db.get_metadata(self.ids[i], index_is_id=True)
src = unicode(self.search_field.currentText())
t = self.s_r_get_field(mi, src)
w.setText(''.join(t[0:1]))
if self.search_mode.currentIndex() == 0:
self.destination_field.setCurrentIndex(idx)
else:
self.s_r_paint_results(None)
def s_r_destination_field_changed(self, txt):
txt = unicode(txt)
self.comma_separated.setEnabled(True)
if txt:
fm = self.db.metadata_for_field(txt)
if fm['is_multiple']:
self.comma_separated.setEnabled(False)
self.comma_separated.setChecked(True)
self.s_r_paint_results(None)
def s_r_search_mode_changed(self, val):
self.search_field.clear()
self.destination_field.clear()
if val == 0:
self.search_field.addItems(self.writable_fields)
self.destination_field.addItems(self.writable_fields)
self.destination_field.setCurrentIndex(0)
self.destination_field.setVisible(False)
self.destination_field_label.setVisible(False)
self.replace_mode.setCurrentIndex(0)
self.replace_mode.setVisible(False)
self.replace_mode_label.setVisible(False)
self.comma_separated.setVisible(False)
self.s_r_heading.setText('<p>'+self.main_heading + self.character_heading)
else:
self.search_field.addItems(self.all_fields)
self.destination_field.addItems(self.writable_fields)
self.destination_field.setVisible(True)
self.destination_field_label.setVisible(True)
self.replace_mode.setVisible(True)
self.replace_mode_label.setVisible(True)
self.comma_separated.setVisible(True)
self.s_r_heading.setText('<p>'+self.main_heading + self.regexp_heading)
self.s_r_paint_results(None)
def s_r_set_colors(self):
@ -242,17 +405,75 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
getattr(self, 'book_%d_result'%(i+1)).setText('')
def s_r_func(self, match):
rf = self.s_r_functions[unicode(self.replace_func.currentText())]
rv = unicode(self.replace_with.text())
val = match.expand(rv)
return rf(val)
rfunc = self.s_r_functions[unicode(self.replace_func.currentText())]
rtext = unicode(self.replace_with.text())
rtext = match.expand(rtext)
return rfunc(rtext)
def s_r_do_regexp(self, mi):
src_field = unicode(self.search_field.currentText())
src = self.s_r_get_field(mi, src_field)
result = []
rfunc = self.s_r_functions[unicode(self.replace_func.currentText())]
for s in src:
t = self.s_r_obj.sub(self.s_r_func, s)
if self.search_mode.currentIndex() == 0:
t = rfunc(t)
result.append(t)
return result
def s_r_do_destination(self, mi, val):
src = unicode(self.search_field.currentText())
if src == '':
return ''
dest = unicode(self.destination_field.currentText())
if dest == '':
if self.db.metadata_for_field(src)['datatype'] == 'composite':
raise Exception(_('You must specify a destination when source is a composite field'))
dest = src
dest_mode = self.replace_mode.currentIndex()
if dest_mode != 0:
dest_val = mi.get(dest, '')
if dest_val is None:
dest_val = []
elif isinstance(dest_val, list):
if dest == 'authors':
dest_val = [v.replace(',', '|') for v in dest_val]
else:
dest_val = [dest_val]
else:
dest_val = []
if len(val) > 0:
if src == 'authors':
val = [v.replace(',', '|') for v in val]
if dest_mode == 1:
val.extend(dest_val)
elif dest_mode == 2:
val[0:0] = dest_val
return val
def s_r_replace_mode_separator(self):
if self.comma_separated.isChecked():
return ','
return ''
def s_r_paint_results(self, txt):
self.s_r_error = None
self.s_r_set_colors()
if self.case_sensitive.isChecked():
flags = 0
else:
flags = re.I
try:
self.s_r_obj = re.compile(unicode(self.search_for.text()))
except re.error as e:
if self.search_mode.currentIndex() == 0:
self.s_r_obj = re.compile(re.escape(unicode(self.search_for.text())), flags)
else:
self.s_r_obj = re.compile(unicode(self.search_for.text()), flags)
except Exception as e:
self.s_r_obj = None
self.s_r_error = e
self.s_r_set_colors()
@ -261,66 +482,72 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
try:
self.test_result.setText(self.s_r_obj.sub(self.s_r_func,
unicode(self.test_text.text())))
except re.error as e:
except Exception as e:
self.s_r_error = e
self.s_r_set_colors()
return
for i in range(0,self.s_r_number_of_books):
wt = getattr(self, 'book_%d_text'%(i+1))
mi = self.db.get_metadata(self.ids[i], index_is_id=True)
wr = getattr(self, 'book_%d_result'%(i+1))
try:
wr.setText(self.s_r_obj.sub(self.s_r_func, unicode(wt.text())))
except re.error as e:
result = self.s_r_do_regexp(mi)
t = self.s_r_do_destination(mi, result[0:1])
t = self.s_r_replace_mode_separator().join(t)
wr.setText(t)
except Exception as e:
self.s_r_error = e
self.s_r_set_colors()
break
def do_search_replace(self):
field = unicode(self.search_field.currentText())
if not field or not self.s_r_obj:
def do_search_replace(self, id):
source = unicode(self.search_field.currentText())
if not source or not self.s_r_obj:
return
dest = unicode(self.destination_field.currentText())
if not dest:
dest = source
dfm = self.db.field_metadata[dest]
fm = self.db.field_metadata[field]
def apply_pattern(val):
try:
return self.s_r_obj.sub(self.s_r_func, val)
except:
return val
for id in self.ids:
val = self.db.get_property(id, index_is_id=True,
loc=fm['rec_index'])
mi = self.db.get_metadata(id, index_is_id=True,)
val = mi.get(source)
if val is None:
continue
if fm['is_multiple']:
res = []
for val in [t.strip() for t in val.split(fm['is_multiple'])]:
v = apply_pattern(val).strip()
if v:
res.append(v)
val = res
if fm['is_custom']:
return
val = self.s_r_do_regexp(mi)
val = self.s_r_do_destination(mi, val)
if dfm['is_multiple']:
if dfm['is_custom']:
# The standard tags and authors values want to be lists.
# All custom columns are to be strings
val = fm['is_multiple'].join(val)
elif field == 'authors':
val = [v.replace('|', ',') for v in val]
val = dfm['is_multiple'].join(val)
if dest == 'authors' and len(val) == 0:
error_dialog(self, _('Search/replace invalid'),
_('Authors cannot be set to the empty string. '
'Book title %s not processed')%mi.title,
show=True)
return
else:
val = apply_pattern(val)
val = self.s_r_replace_mode_separator().join(val)
if dest == 'title' and len(val) == 0:
error_dialog(self, _('Search/replace invalid'),
_('Title cannot be set to the empty string. '
'Book title %s not processed')%mi.title,
show=True)
return
if fm['is_custom']:
extra = self.db.get_custom_extra(id, label=fm['label'], index_is_id=True)
self.db.set_custom(id, val, label=fm['label'], extra=extra,
if dfm['is_custom']:
extra = self.db.get_custom_extra(id, label=dfm['label'], index_is_id=True)
self.db.set_custom(id, val, label=dfm['label'], extra=extra,
commit=False)
else:
if field == 'comments':
if dest == 'comments':
setter = self.db.set_comment
else:
setter = getattr(self.db, 'set_'+field)
setter = getattr(self.db, 'set_'+dest)
if dest in ['title', 'authors']:
setter(id, val, notify=False)
else:
setter(id, val, notify=False, commit=False)
self.db.commit()
def create_custom_column_editors(self):
w = self.central_widget.widget(1)
@ -343,11 +570,11 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def initalize_authors(self):
all_authors = self.db.all_authors()
all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_authors.sort(cmp=lambda x, y : cmp(x[1].lower(), y[1].lower()))
for i in all_authors:
id, name = i
name = authors_to_string([name.strip().replace('|', ',') for n in name.split(',')])
name = name.strip().replace('|', ',')
self.authors.addItem(name)
self.authors.setEditText('')
@ -378,6 +605,16 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.tags.update_tags_cache(self.db.all_tags())
self.remove_tags.update_tags_cache(self.db.all_tags())
def auto_number_changed(self, state):
if state:
self.series_numbering_restarts.setEnabled(True)
self.series_start_number.setEnabled(True)
else:
self.series_numbering_restarts.setEnabled(False)
self.series_numbering_restarts.setChecked(False)
self.series_start_number.setEnabled(False)
self.series_start_number.setValue(1)
def accept(self):
if len(self.ids) < 1:
return QDialog.accept(self)
@ -404,33 +641,42 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
rating = self.rating.value()
pub = unicode(self.publisher.text())
do_series = self.write_series
clear_series = self.clear_series.isChecked()
series = unicode(self.series.currentText()).strip()
do_autonumber = self.autonumber_series.isChecked()
do_series_restart = self.series_numbering_restarts.isChecked()
series_start_value = self.series_start_number.value()
do_remove_format = self.remove_format.currentIndex() > -1
remove_format = unicode(self.remove_format.currentText())
do_swap_ta = self.swap_title_and_author.isChecked()
do_remove_conv = self.remove_conversion_settings.isChecked()
do_auto_author = self.auto_author_sort.isChecked()
do_title_case = self.change_title_to_title_case.isChecked()
args = (remove, add, au, aus, do_aus, rating, pub, do_series,
do_autonumber, do_remove_format, remove_format, do_swap_ta,
do_remove_conv, do_auto_author, series)
do_remove_conv, do_auto_author, series, do_series_restart,
series_start_value, do_title_case, clear_series)
bb = BlockingBusy(_('Applying changes to %d books. This may take a while.')
%len(self.ids), parent=self)
self.worker = Worker(args, self.db, self.ids,
bb = MyBlockingBusy(_('Applying changes to %d books.\nPhase {0} {1}%%.')
%len(self.ids), args, self.db, self.ids,
getattr(self, 'custom_column_widgets', []),
Dispatcher(bb.accept, parent=bb))
self.worker.start()
bb.exec_()
self.do_search_replace, parent=self)
if self.worker.error is not None:
# The metadata backup thread causes database commits
# which can slow down bulk editing of large numbers of books
self.model.stop_metadata_backup()
try:
bb.exec_()
finally:
self.model.start_metadata_backup()
if bb.error is not None:
return error_dialog(self, _('Failed'),
self.worker.error[0], det_msg=self.worker.error[1],
bb.error[0], det_msg=bb.error[1],
show=True)
self.do_search_replace()
dynamic['s_r_search_mode'] = self.search_mode.currentIndex()
self.db.clean()
return QDialog.accept(self)

View File

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>679</width>
<height>685</height>
<width>752</width>
<height>715</height>
</rect>
</property>
<property name="windowTitle">
@ -225,6 +225,8 @@
</widget>
</item>
<item row="7" column="1">
<layout class="QHBoxLayout" name="HLayout_34">
<item>
<widget class="EnComboBox" name="series">
<property name="toolTip">
<string>List of known series. You can add new series.</string>
@ -243,6 +245,89 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="clear_series">
<property name="toolTip">
<string>If checked, the series will be cleared</string>
</property>
<property name="text">
<string>Clear series</string>
</property>
</widget>
</item>
<item>
<spacer name="HSpacer_344">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>00</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="8" column="1" colspan="2">
<layout class="QHBoxLayout" name="HLayout_3">
<item>
<widget class="QCheckBox" name="autonumber_series">
<property name="toolTip">
<string>If not checked, the series number for the books will be set to 1.
If checked, selected books will be automatically numbered, in the order
you selected them. So if you selected Book A and then Book B,
Book A will have series number 1 and Book B series number 2.</string>
</property>
<property name="text">
<string>Automatically number books in this series</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="series_numbering_restarts">
<property name="enabled">
<bool>false</bool>
</property>
<property name="toolTip">
<string>Series will normally be renumbered from the highest number in the database
for that series. Checking this box will tell calibre to start numbering
from the value in the box</string>
</property>
<property name="text">
<string>Force numbers to start with </string>
</property>
</widget>
</item>
<item>
<widget class="QSpinBox" name="series_start_number">
<property name="enabled">
<bool>false</bool>
</property>
<property name="minimum">
<number>1</number>
</property>
<property name="value">
<number>1</number>
</property>
</widget>
</item>
<item>
<spacer name="HSpacer_34">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>10</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="9" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
@ -270,16 +355,14 @@
</property>
</widget>
</item>
<item row="8" column="1">
<widget class="QCheckBox" name="autonumber_series">
<property name="toolTip">
<string>Selected books will be automatically numbered,
in the order you selected them.
So if you selected Book A and then Book B,
Book A will have series number 1 and Book B series number 2.</string>
</property>
<item row="12" column="0" colspan="2">
<widget class="QCheckBox" name="change_title_to_title_case">
<property name="text">
<string>Automatically number books in this series</string>
<string>Change title to title case</string>
</property>
<property name="toolTip">
<string>Force the title to be in title case. If both this and swap authors are checked,
title and author are swapped before the title case is set</string>
</property>
</widget>
</item>
@ -295,7 +378,7 @@ Future conversion of these books will use the default settings.</string>
</property>
</widget>
</item>
<item row="12" column="0" colspan="3">
<item row="15" column="0" colspan="3">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -319,7 +402,7 @@ Future conversion of these books will use the default settings.</string>
<attribute name="title">
<string>&amp;Search and replace (experimental)</string>
</attribute>
<layout class="QGridLayout" name="gridLayout">
<layout class="QGridLayout" name="vargrid">
<property name="sizeConstraint">
<enum>QLayout::SetMinimumSize</enum>
</property>
@ -351,6 +434,47 @@ Future conversion of these books will use the default settings.</string>
</widget>
</item>
<item row="3" column="1">
<widget class="QComboBox" name="search_field">
<property name="toolTip">
<string>The name of the field that you want to search</string>
</property>
</widget>
</item>
<item row="3" column="2">
<layout class="QHBoxLayout" name="HLayout_3">
<item>
<widget class="QLabel" name="xlabel_24">
<property name="text">
<string>Search mode:</string>
</property>
<property name="buddy">
<cstring>search_mode</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="search_mode">
<property name="toolTip">
<string>Choose whether to use basic text matching or advanced regular expression matching</string>
</property>
</widget>
</item>
<item>
<spacer name="HSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>10</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="4" column="0">
<widget class="QLabel" name="xlabel_2">
<property name="text">
<string>&amp;Search for:</string>
@ -360,7 +484,33 @@ Future conversion of these books will use the default settings.</string>
</property>
</widget>
</item>
<item row="3" column="2">
<item row="4" column="1">
<widget class="HistoryLineEdit" name="search_for">
<property name="toolTip">
<string>Enter the what you are looking for, either plain text or a regular expression, depending on the mode</string>
</property>
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>100</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
</widget>
</item>
<item row="4" column="2">
<widget class="QCheckBox" name="case_sensitive">
<property name="toolTip">
<string>Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored</string>
</property>
<property name="text">
<string>Case sensitive</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="xlabel_4">
<property name="text">
<string>&amp;Replace with:</string>
@ -370,29 +520,114 @@ Future conversion of these books will use the default settings.</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QComboBox" name="search_field"/>
</item>
<item row="4" column="1">
<widget class="HistoryLineEdit" name="search_for"/>
</item>
<item row="4" column="2">
<widget class="HistoryLineEdit" name="replace_with"/>
</item>
<item row="5" column="1">
<widget class="HistoryLineEdit" name="replace_with">
<property name="toolTip">
<string>The replacement text. The matched search text will be replaced with this string</string>
</property>
</widget>
</item>
<item row="5" column="2">
<layout class="QHBoxLayout" name="verticalLayout">
<item>
<widget class="QLabel" name="label_41">
<property name="text">
<string>Apply function &amp;after replace:</string>
<string>Apply function after replace:</string>
</property>
<property name="buddy">
<cstring>replace_func</cstring>
</property>
</widget>
</item>
<item row="5" column="2">
<widget class="QComboBox" name="replace_func"/>
<item>
<widget class="QComboBox" name="replace_func">
<property name="toolTip">
<string>Specify how the text is to be processed after matching and replacement. In character mode, the entire
field is processed. In regular expression mode, only the matched text is processed</string>
</property>
</widget>
</item>
<item>
<spacer name="HSpacer_1">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>10</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="6" column="0">
<widget class="QLabel" name="destination_field_label">
<property name="text">
<string>&amp;Destination field:</string>
</property>
<property name="buddy">
<cstring>destination_field</cstring>
</property>
</widget>
</item>
<item row="6" column="1">
<widget class="QComboBox" name="destination_field">
<property name="toolTip">
<string>The field that the text will be put into after all replacements. If blank, the source field is used.</string>
</property>
</widget>
</item>
<item row="6" column="2">
<layout class="QHBoxLayout" name="verticalLayout">
<item>
<widget class="QLabel" name="replace_mode_label">
<property name="text">
<string>Mode:</string>
</property>
<property name="buddy">
<cstring>replace_mode</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="replace_mode">
<property name="toolTip">
<string>Specify how the text should be copied into the destination.</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="comma_separated">
<property name="toolTip">
<string>If the replace mode is prepend or append, then this box indicates whether a comma or
nothing should be put between the original text and the inserted text</string>
</property>
<property name="text">
<string>use comma</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<spacer name="zHSpacer_1">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>10</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="7" column="1">
<widget class="QLabel" name="xlabel_3">
<property name="text">
<string>Test &amp;text</string>
@ -402,8 +637,8 @@ Future conversion of these books will use the default settings.</string>
</property>
</widget>
</item>
<item row="6" column="2">
<widget class="QLabel" name="label_5">
<item row="7" column="2">
<widget class="QLabel" name="label_51">
<property name="text">
<string>Test re&amp;sult</string>
</property>
@ -412,19 +647,33 @@ Future conversion of these books will use the default settings.</string>
</property>
</widget>
</item>
<item row="7" column="0">
<item row="9" column="0" colspan="4">
<widget class="QScrollArea" name="scrollArea11">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="widgetResizable">
<bool>true</bool>
</property>
<widget class="QWidget" name="gridLayoutWidget_2">
<layout class="QGridLayout" name="testgrid">
<item row="8" column="0">
<widget class="QLabel" name="label_31">
<property name="text">
<string>Your test:</string>
</property>
</widget>
</item>
<item row="7" column="1">
<item row="8" column="1">
<widget class="HistoryLineEdit" name="test_text"/>
</item>
<item row="7" column="2">
<item row="8" column="2">
<widget class="QLineEdit" name="test_result"/>
</item>
</layout>
</widget>
</widget>
</item>
<item row="20" column="1">
<spacer name="verticalSpacer">
<property name="orientation">
@ -433,7 +682,7 @@ Future conversion of these books will use the default settings.</string>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
<height>0</height>
</size>
</property>
</spacer>

View File

@ -819,7 +819,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
fname = err.filename if err.filename else 'file'
return error_dialog(self, _('Permission denied'),
_('Could not open %s. Is it being used by another'
' program?')%fname, show=True)
' program?')%fname, det_msg=traceback.format_exc(),
show=True)
raise
self.save_state()
QDialog.accept(self)

View File

@ -630,10 +630,16 @@ Using this button to create author sort will change author sort from red to gree
<property name="toolTip">
<string>Remove border (if any) from cover</string>
</property>
<property name="text">
<string>T&amp;rim</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/trim.png</normaloff>:/images/trim.png</iconset>
</property>
<property name="toolButtonStyle">
<enum>Qt::ToolButtonTextBesideIcon</enum>
</property>
</widget>
</item>
<item>

View File

@ -57,6 +57,10 @@ class SchedulerDialog(QDialog, Ui_Dialog):
self.old_news.setValue(gconf['oldest_news'])
def keyPressEvent(self, ev):
if ev.key() not in (Qt.Key_Enter, Qt.Key_Return):
return QDialog.keyPressEvent(self, ev)
def break_cycles(self):
self.disconnect(self.recipe_model, SIGNAL('searched(PyQt_PyObject)'),
self.search_done)

View File

@ -32,7 +32,7 @@
<string>&amp;Explode ePub</string>
</property>
<property name="icon">
<iconset>
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset>
</property>
</widget>
@ -49,7 +49,7 @@
<string>&amp;Rebuild ePub</string>
</property>
<property name="icon">
<iconset>
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
</property>
</widget>
@ -63,7 +63,7 @@
<string>&amp;Cancel</string>
</property>
<property name="icon">
<iconset>
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/window-close.png</normaloff>:/images/window-close.png</iconset>
</property>
</widget>
@ -71,7 +71,7 @@
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window. Rebuild the ePub, updating your calibre library.</string>
<string>&lt;p&gt;Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the epub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string>
</property>
<property name="wordWrap">
<bool>true</bool>

View File

@ -152,7 +152,7 @@ class SearchBar(QWidget): # {{{
l.addWidget(x)
x.setToolTip(_("Advanced search"))
self.label = x = QLabel('&Search:')
self.label = x = QLabel(_('&Search:'))
l.addWidget(self.label)
x.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Minimum)

View File

@ -15,10 +15,11 @@ from PyQt4.Qt import QColor, Qt, QModelIndex, QSize, \
QStyledItemDelegate, QCompleter, \
QComboBox
from calibre.gui2 import UNDEFINED_QDATE
from calibre.gui2 import UNDEFINED_QDATE, error_dialog
from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
from calibre.utils.date import now, format_date
from calibre.utils.config import tweaks
from calibre.utils.formatter import validation_formatter
from calibre.gui2.dialogs.comments_dialog import CommentsDialog
class RatingDelegate(QStyledItemDelegate): # {{{
@ -303,6 +304,33 @@ class CcBoolDelegate(QStyledItemDelegate): # {{{
val = 2 if val is None else 1 if not val else 0
editor.setCurrentIndex(val)
# }}}
class CcTemplateDelegate(QStyledItemDelegate): # {{{
def __init__(self, parent):
'''
Delegate for custom_column bool data.
'''
QStyledItemDelegate.__init__(self, parent)
def createEditor(self, parent, option, index):
return EnLineEdit(parent)
def setModelData(self, editor, model, index):
val = unicode(editor.text())
try:
validation_formatter.validate(val)
except Exception, err:
error_dialog(self.parent(), _('Invalid template'),
'<p>'+_('The template %s is invalid:')%val + \
'<br>'+str(err), show=True)
model.setData(index, QVariant(val), Qt.EditRole)
def setEditorData(self, editor, index):
m = index.model()
val = m.custom_columns[m.column_map[index.column()]]['display']['composite_template']
editor.setText(val)
# }}}

View File

@ -21,7 +21,7 @@ from calibre.utils.date import dt_factory, qt_to_dt, isoformat
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
REGEXP_MATCH, CoverCache
REGEXP_MATCH, CoverCache, MetadataBackup
from calibre.library.cli import parse_series_string
from calibre import strftime, isbytestring, prepare_string_for_xml
from calibre.constants import filesystem_encoding
@ -89,6 +89,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.alignment_map = {}
self.buffer_size = buffer
self.cover_cache = None
self.metadata_backup = None
self.bool_yes_icon = QIcon(I('ok.png'))
self.bool_no_icon = QIcon(I('list_remove.png'))
self.bool_blank_icon = QIcon(I('blank.png'))
@ -120,6 +121,9 @@ class BooksModel(QAbstractTableModel): # {{{
def set_device_connected(self, is_connected):
self.device_connected = is_connected
self.refresh_ondevice()
def refresh_ondevice(self):
self.db.refresh_ondevice()
self.refresh() # does a resort()
self.research()
@ -129,7 +133,7 @@ class BooksModel(QAbstractTableModel): # {{{
def set_database(self, db):
self.db = db
self.custom_columns = self.db.field_metadata.get_custom_field_metadata()
self.custom_columns = self.db.field_metadata.custom_field_metadata()
self.column_map = list(self.orig_headers.keys()) + \
list(self.custom_columns)
def col_idx(name):
@ -151,13 +155,28 @@ class BooksModel(QAbstractTableModel): # {{{
self.database_changed.emit(db)
if self.cover_cache is not None:
self.cover_cache.stop()
# Would like to to a join here, but the thread might be waiting to
# do something on the GUI thread. Deadlock.
self.cover_cache = CoverCache(db, FunctionDispatcher(self.db.cover))
self.cover_cache.start()
self.stop_metadata_backup()
self.start_metadata_backup()
def refresh_cover(event, ids):
if event == 'cover' and self.cover_cache is not None:
self.cover_cache.refresh(ids)
db.add_listener(refresh_cover)
def start_metadata_backup(self):
self.metadata_backup = MetadataBackup(self.db)
self.metadata_backup.start()
def stop_metadata_backup(self):
if getattr(self, 'metadata_backup', None) is not None:
self.metadata_backup.stop()
# Would like to to a join here, but the thread might be waiting to
# do something on the GUI thread. Deadlock.
def refresh_ids(self, ids, current_row=-1):
rows = self.db.refresh_ids(ids)
if rows:
@ -318,7 +337,11 @@ class BooksModel(QAbstractTableModel): # {{{
data[_('Series')] = \
_('Book <font face="serif">%s</font> of %s.')%\
(sidx, prepare_string_for_xml(series))
mi = self.db.get_metadata(idx)
for key in mi.custom_field_keys():
name, val = mi.format_field(key)
if val:
data[name] = val
return data
def set_cache(self, idx):
@ -367,7 +390,6 @@ class BooksModel(QAbstractTableModel): # {{{
return ans
def get_metadata(self, rows, rows_are_ids=False, full_metadata=False):
# Should this add the custom columns? It doesn't at the moment
metadata, _full_metadata = [], []
if not rows_are_ids:
rows = [self.db.id(row.row()) for row in rows]
@ -616,8 +638,9 @@ class BooksModel(QAbstractTableModel): # {{{
for col in self.custom_columns:
idx = self.custom_columns[col]['rec_index']
datatype = self.custom_columns[col]['datatype']
if datatype in ('text', 'comments'):
self.dc[col] = functools.partial(text_type, idx=idx, mult=self.custom_columns[col]['is_multiple'])
if datatype in ('text', 'comments', 'composite'):
self.dc[col] = functools.partial(text_type, idx=idx,
mult=self.custom_columns[col]['is_multiple'])
elif datatype in ('int', 'float'):
self.dc[col] = functools.partial(number_type, idx=idx)
elif datatype == 'datetime':
@ -692,7 +715,8 @@ class BooksModel(QAbstractTableModel): # {{{
return flags
def set_custom_column_data(self, row, colhead, value):
typ = self.custom_columns[colhead]['datatype']
cc = self.custom_columns[colhead]
typ = cc['datatype']
label=self.db.field_metadata.key_to_label(colhead)
s_index = None
if typ in ('text', 'comments'):
@ -718,8 +742,18 @@ class BooksModel(QAbstractTableModel): # {{{
val = qt_to_dt(val, as_utc=False)
elif typ == 'series':
val, s_index = parse_series_string(self.db, label, value.toString())
self.db.set_custom(self.db.id(row), val, extra=s_index,
elif typ == 'composite':
tmpl = unicode(value.toString()).strip()
disp = cc['display']
disp['composite_template'] = tmpl
self.db.set_custom_column_metadata(cc['colnum'], display = disp)
self.refresh(reset=True)
return True
id = self.db.id(row)
self.db.set_custom(id, val, extra=s_index,
label=label, num=None, append=False, notify=True)
self.refresh_ids([id], current_row=row)
return True
def setData(self, index, value, role):
@ -764,6 +798,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.db.set_pubdate(id, qt_to_dt(val, as_utc=False))
else:
self.db.set(row, column, val)
self.refresh_rows([row], row)
self.dataChanged.emit(index, index)
return True
@ -887,7 +922,7 @@ class DeviceBooksModel(BooksModel): # {{{
}
self.marked_for_deletion = {}
self.search_engine = OnDeviceSearch(self)
self.editable = True
self.editable = ['title', 'authors', 'collections']
self.book_in_library = None
def mark_for_deletion(self, job, rows, rows_are_ids=False):
@ -933,13 +968,13 @@ class DeviceBooksModel(BooksModel): # {{{
if self.map[index.row()] in self.indices_to_be_deleted():
return Qt.ItemIsUserCheckable # Can't figure out how to get the disabled flag in python
flags = QAbstractTableModel.flags(self, index)
if index.isValid() and self.editable:
if index.isValid():
cname = self.column_map[index.column()]
if cname in ('title', 'authors') or \
(cname == 'collections' and \
callable(getattr(self.db, 'supports_collections', None)) and \
if cname in self.editable and \
(cname != 'collections' or \
(callable(getattr(self.db, 'supports_collections', None)) and \
self.db.supports_collections() and \
prefs['manage_device_metadata']=='manual'):
prefs['manage_device_metadata']=='manual')):
flags |= Qt.ItemIsEditable
return flags
@ -1054,6 +1089,9 @@ class DeviceBooksModel(BooksModel): # {{{
img = QImage()
if hasattr(cdata, 'image_path'):
img.load(cdata.image_path)
elif cdata:
if isinstance(cdata, (tuple, list)):
img.loadFromData(cdata[-1])
else:
img.loadFromData(cdata)
if img.isNull():
@ -1220,7 +1258,14 @@ class DeviceBooksModel(BooksModel): # {{{
def set_editable(self, editable):
# Cannot edit if metadata is sent on connect. Reason: changes will
# revert to what is in the library on next connect.
self.editable = editable and prefs['manage_device_metadata']!='on_connect'
if isinstance(editable, list):
self.editable = editable
elif editable:
self.editable = ['title', 'authors', 'collections']
else:
self.editable = []
if prefs['manage_device_metadata']=='on_connect':
self.editable = []
def set_search_restriction(self, s):
pass

View File

@ -9,11 +9,11 @@ import os
from functools import partial
from PyQt4.Qt import QTableView, Qt, QAbstractItemView, QMenu, pyqtSignal, \
QModelIndex, QIcon
QModelIndex, QIcon, QItemSelection
from calibre.gui2.library.delegates import RatingDelegate, PubDateDelegate, \
TextDelegate, DateDelegate, TagsDelegate, CcTextDelegate, \
CcBoolDelegate, CcCommentsDelegate, CcDateDelegate
CcBoolDelegate, CcCommentsDelegate, CcDateDelegate, CcTemplateDelegate
from calibre.gui2.library.models import BooksModel, DeviceBooksModel
from calibre.utils.config import tweaks, prefs
from calibre.gui2 import error_dialog, gprefs
@ -47,6 +47,7 @@ class BooksView(QTableView): # {{{
self.cc_text_delegate = CcTextDelegate(self)
self.cc_bool_delegate = CcBoolDelegate(self)
self.cc_comments_delegate = CcCommentsDelegate(self)
self.cc_template_delegate = CcTemplateDelegate(self)
self.display_parent = parent
self._model = modelcls(self)
self.setModel(self._model)
@ -391,6 +392,8 @@ class BooksView(QTableView): # {{{
self.setItemDelegateForColumn(cm.index(colhead), self.cc_bool_delegate)
elif cc['datatype'] == 'rating':
self.setItemDelegateForColumn(cm.index(colhead), self.rating_delegate)
elif cc['datatype'] == 'composite':
self.setItemDelegateForColumn(cm.index(colhead), self.cc_template_delegate)
else:
dattr = colhead+'_delegate'
delegate = colhead if hasattr(self, dattr) else 'text'
@ -485,29 +488,29 @@ class BooksView(QTableView): # {{{
Select rows identified by identifiers. identifiers can be a set of ids,
row numbers or QModelIndexes.
'''
selmode = self.selectionMode()
self.setSelectionMode(QAbstractItemView.MultiSelection)
try:
rows = set([x.row() if hasattr(x, 'row') else x for x in
identifiers])
if using_ids:
rows = set([])
identifiers = set(identifiers)
m = self.model()
for row in range(m.rowCount(QModelIndex())):
for row in xrange(m.rowCount(QModelIndex())):
if m.id(row) in identifiers:
rows.add(row)
rows = list(sorted(rows))
if rows:
row = list(sorted(rows))[0]
row = rows[0]
if change_current:
self.set_current_row(row, select=False)
if scroll:
self.scroll_to_row(row)
self.clearSelection()
for r in rows:
self.selectRow(r)
finally:
self.setSelectionMode(selmode)
sm = self.selectionModel()
sel = QItemSelection()
m = self.model()
max_col = m.columnCount(QModelIndex()) - 1
for row in rows:
sel.select(m.index(row, 0), m.index(row, max_col))
sm.select(sel, sm.ClearAndSelect)
def close(self):
self._model.close()

Some files were not shown because too many files have changed in this diff Show More