[Sync] Sync with trunk. Revision 10065.
189
Changelog.yaml
@ -19,6 +19,191 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.12
|
||||
date: 2011-07-29
|
||||
|
||||
new features:
|
||||
- title: "Content server: Return the correct last modified date when serving ebook files. Also allow getting of book metadata as /get/opf/<book_id>"
|
||||
|
||||
- title: "Driver for the COBY MP977"
|
||||
|
||||
- title: "Get Books: Remove epub bud store. Add Ozon.ru and e-knigni.net stores. Fix broken amazon UK and DE stores."
|
||||
tickets: [816091]
|
||||
|
||||
- title: "Add a new tweak to Preferences->Tweaks that allows auto generation of series numbers when importing books with a series name, but no number"
|
||||
tickets: [815573]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a regression in 0.8.11 that broke calibre on linux systems that use a file system encoding that cannot support cyrillic characters"
|
||||
tickets: [815224]
|
||||
|
||||
- title: "Fix long titles not wrapping in cover browser"
|
||||
tickets: [816595]
|
||||
|
||||
- title: "When adding books, handle the case of files without read permission more gracefully."
|
||||
tickets: [814771]
|
||||
|
||||
- title: "When changing metadata in EPUB files do not use the opf: namespace prefix on newly created elements. Apparently, FBReaderJ doesn't understand XML namespaces."
|
||||
tickets: [814722]
|
||||
|
||||
- title: "Prevent metadata download from returning published dates earlier than 101 A.D."
|
||||
|
||||
- title: "Fix a bug where dates before 101AD in the database could cause errors"
|
||||
tickets: [814964]
|
||||
|
||||
- title: "Fix an error in the book details panel if the user sets the default author link to blank"
|
||||
|
||||
improved recipes:
|
||||
- The Economist
|
||||
- Instapaper
|
||||
- Corren
|
||||
|
||||
new recipes:
|
||||
- title: Counterpunch
|
||||
author: O. Emmerson
|
||||
|
||||
- title: National Geographic (PL)
|
||||
author: Marcin Urban
|
||||
|
||||
- title: Caros Amigos
|
||||
author: Pablo Aldama
|
||||
|
||||
- title: Aksiyon Dergisi
|
||||
author: thomass
|
||||
|
||||
- title: Dnevnik (MK) and +Info
|
||||
author: Darko Spasovski
|
||||
|
||||
- title: Dagens Industri
|
||||
author: Jonas Svensson
|
||||
|
||||
|
||||
- version: 0.8.11
|
||||
date: 2011-07-22
|
||||
|
||||
new features:
|
||||
- title: "When doing a conversion from some format to the same format, save the original file"
|
||||
description: "When calibre does a conversion from the same format to the same format, for
|
||||
example, from EPUB to EPUB, the original file is saved as original_epub, so that in case the
|
||||
conversion is poor, you can change the settings and run it again. The original is automatically used
|
||||
every time you run a conversion with that format as input. If you want to disable this,
|
||||
there is a tweak that prevents calibre from saving the originals in Preferences->Tweaks. You can
|
||||
easily replace the converted version with the original in the Edit metadata dialog by right
|
||||
clicking on the list of formats in the top right corner."
|
||||
type: major
|
||||
|
||||
- title: "Conversion pipeline: Add an option to control the height of the blank lines inserted by calibre"
|
||||
|
||||
- title: "Drivers for bq DaVinci, Samsung Galaxy ACE GT-S5830 and Medion e-reader"
|
||||
|
||||
- title: "Get Books: Add stores Chitanka and Bookoteka. Remove epubbuy.de at store's request"
|
||||
|
||||
- title: "Content server: Add a link at the bottom of the mobile interface to switch to the full interface."
|
||||
tickets: [812525]
|
||||
|
||||
- title: "Update the kindle icon shown when a Kindle is connected to use a picture of the Kindle 3"
|
||||
tickets: [810852]
|
||||
|
||||
- title: "MOBI Output: When converting epub documents that have a start element in their guide, use it to mark the starting position at which the MOBI file will be opened."
|
||||
tickets: [804755]
|
||||
|
||||
- title: "News download: Add a default Accept header to all requests"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression that broke loading translations from .po files in the working directory"
|
||||
|
||||
- title: "Fix conversion dialog not allowing series numbers larger than 9999"
|
||||
tickets: [813281]
|
||||
|
||||
- title: "Conversion pipeline: When adding/removing entries to the manifest, ignore unparseable URLs instead of erroring out on them"
|
||||
|
||||
- title: "SD Card in Azbooka not being detected"
|
||||
tickets: [812750]
|
||||
|
||||
- title: "Conversion pipeline: Strip out large blocks of contiguous space (more than 10000 contiguous blanks) as these slow down the conversion process and are almost always indicative of an error in the input document."
|
||||
|
||||
- title: "ebook-convert: Abort if a keyboard interrupt is raised during parsing"
|
||||
|
||||
- title: "Regex builder: Show a nicer error message when the user has the file open in another program on windows."
|
||||
tickets: [811641]
|
||||
|
||||
- title: "When converting in the GUI, set all identifiers present in the book's metadata in the output file, if the output format supports them."
|
||||
|
||||
improved recipes:
|
||||
- NBObline
|
||||
- JBPress
|
||||
- Instapaper
|
||||
- Die Zeit
|
||||
- Wired (UK)
|
||||
|
||||
new recipes:
|
||||
- title: Utrinski Vesnik
|
||||
author: Darko Spasovski
|
||||
|
||||
- title: IDG.se
|
||||
author: zapt0
|
||||
|
||||
- title: Los Andes
|
||||
author: Darko Miletic
|
||||
|
||||
- title: De Luns a Venres
|
||||
author: Susana Sotelo Docío
|
||||
|
||||
- title: "Nikkei News subscription version"
|
||||
author: Ado Nishimura
|
||||
|
||||
- version: 0.8.10
|
||||
date: 2011-07-15
|
||||
|
||||
new features:
|
||||
- title: "Add a right click menu to the cover browser. It allows you to view a book, edit metadata etc. from within the cover browser. The menu can be customized in Preferences->Toolbars"
|
||||
|
||||
- title: "Allow selecting and stopping multiple jobs at once in the jobs window"
|
||||
tickets: [810349]
|
||||
|
||||
- title: "When editing metadata directly in the book list, have a little pop up menu so that all existing values can be accessed by mouse only. For example, when you edit authors, you can use the mouse to select an existing author."
|
||||
|
||||
- title: "Get Books: Add ebook.nl and fix price parsing for the legimi store"
|
||||
|
||||
- title: "Drivers for Samsung Infuse and Motorola XPERT"
|
||||
|
||||
- title: "Tag Browser: Make hierarchical items work in group searched terms."
|
||||
|
||||
bug fixes:
|
||||
- title: "Allow setting numbers larger than 99 in custom series columns"
|
||||
|
||||
- title: "Fix a bug that caused the same news download sent via a USB connection to the device on two different days resulting in a duplicate on the device"
|
||||
|
||||
- title: "Ensure English in the list of interface languages in Preferences is always listed in English, so that it does not become hard to find"
|
||||
|
||||
- title: "SNB Output: Fix bug in handling unicode file names"
|
||||
|
||||
- title: "Fix sorting problem in manage categories. Fix poor performance problem when dropping multiple books onto a user category."
|
||||
|
||||
- title: "Remove 'empty field' error dialogs in bulk search/replace, instead setting the fields to their default value."
|
||||
|
||||
- title: "Fix regression that broke communicating with Kobo devices using outdated firmware"
|
||||
tickets: [807832]
|
||||
|
||||
- title: "LRF Input: Fix conversion of LRF files with non ascii titles on some windows systems"
|
||||
tickets: [807641]
|
||||
|
||||
improved recipes:
|
||||
- Time
|
||||
- Freakonomics Blog
|
||||
- io9
|
||||
- "Computer Act!ve"
|
||||
|
||||
new recipes:
|
||||
- title: Techcrunch and Pecat
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Vio Mundo, IDG Now and Tojolaco
|
||||
author: Diniz Bortoletto
|
||||
|
||||
- title: Geek and Poke, Automatiseringgids IT
|
||||
author: DrMerry
|
||||
|
||||
- version: 0.8.9
|
||||
date: 2011-07-08
|
||||
|
||||
@ -32,7 +217,7 @@
|
||||
- title: "Conversion pipeline: Add option to control if duplicate entries are allowed when generating the Table of Contents from links."
|
||||
tickets: [806095]
|
||||
|
||||
- title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downlaoding published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not."
|
||||
- title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downloading published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not."
|
||||
tickets: [798309]
|
||||
|
||||
- title: "Get Books: Remove OpenLibrary since it has the same files as archive.org. Allow direct downloading from Project Gutenberg."
|
||||
@ -617,7 +802,7 @@
|
||||
|
||||
|
||||
- version: 0.8.0
|
||||
date: 2010-05-06
|
||||
date: 2011-05-06
|
||||
|
||||
new features:
|
||||
- title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
|
||||
|
758
imgsrc/random.svg
Normal file
@ -0,0 +1,758 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg10643"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.46+devel"
|
||||
sodipodi:docname="pointer.svgz"
|
||||
inkscape:output_extension="org.inkscape.output.svgz.inkscape"
|
||||
inkscape:export-filename="/home/pinheiro/pics/oxygen-icons/scalable/actions/small/32x32/pointer.png"
|
||||
inkscape:export-xdpi="90"
|
||||
inkscape:export-ydpi="90"
|
||||
version="1.0">
|
||||
<defs
|
||||
id="defs10645">
|
||||
<inkscape:perspective
|
||||
sodipodi:type="inkscape:persp3d"
|
||||
inkscape:vp_x="0 : 12 : 1"
|
||||
inkscape:vp_y="0 : 1000 : 0"
|
||||
inkscape:vp_z="24 : 12 : 1"
|
||||
inkscape:persp3d-origin="12 : 8 : 1"
|
||||
id="perspective108" />
|
||||
<linearGradient
|
||||
id="linearGradient3233">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3235" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop3237" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3866">
|
||||
<stop
|
||||
id="stop3868"
|
||||
offset="0"
|
||||
style="stop-color:#fff299;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop3870"
|
||||
offset="1"
|
||||
style="stop-color:#dcd8bd;stop-opacity:0;" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient11059">
|
||||
<stop
|
||||
style="stop-color:#727272;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop11061" />
|
||||
<stop
|
||||
id="stop11067"
|
||||
offset="0.5"
|
||||
style="stop-color:#a6a6a6;stop-opacity:1;" />
|
||||
<stop
|
||||
style="stop-color:#cdcdcd;stop-opacity:1;"
|
||||
offset="0.75"
|
||||
id="stop11069" />
|
||||
<stop
|
||||
style="stop-color:#acacac;stop-opacity:1;"
|
||||
offset="1"
|
||||
id="stop11063" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient10925">
|
||||
<stop
|
||||
style="stop-color:#bf0303;stop-opacity:0;"
|
||||
offset="0"
|
||||
id="stop10927" />
|
||||
<stop
|
||||
id="stop10978"
|
||||
offset="0.39309064"
|
||||
style="stop-color:#bf0303;stop-opacity:0;" />
|
||||
<stop
|
||||
id="stop10935"
|
||||
offset="0.46538317"
|
||||
style="stop-color:#bf0303;stop-opacity:0.49803922;" />
|
||||
<stop
|
||||
style="stop-color:#bf0303;stop-opacity:1;"
|
||||
offset="0.5"
|
||||
id="stop10976" />
|
||||
<stop
|
||||
id="stop10933"
|
||||
offset="0.5"
|
||||
style="stop-color:#bf0303;stop-opacity:1;" />
|
||||
<stop
|
||||
style="stop-color:#bf0303;stop-opacity:0.49803922;"
|
||||
offset="0.55339807"
|
||||
id="stop10937" />
|
||||
<stop
|
||||
id="stop10980"
|
||||
offset="0.60542935"
|
||||
style="stop-color:#bf0303;stop-opacity:0;" />
|
||||
<stop
|
||||
style="stop-color:#bf0303;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop10929" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient10901">
|
||||
<stop
|
||||
id="stop10903"
|
||||
offset="0"
|
||||
style="stop-color:#fff299;stop-opacity:0;" />
|
||||
<stop
|
||||
style="stop-color:#fff299;stop-opacity:1;"
|
||||
offset="0.5"
|
||||
id="stop10909" />
|
||||
<stop
|
||||
id="stop10905"
|
||||
offset="1"
|
||||
style="stop-color:#fff299;stop-opacity:0;" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient10854">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop10856" />
|
||||
<stop
|
||||
id="stop10862"
|
||||
offset="0.5"
|
||||
style="stop-color:#000000;stop-opacity:0;" />
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="1"
|
||||
id="stop10858" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient10711">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop10713" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop10715" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient10875"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8967678,0.05935673,-0.05873468,0.8873664,-5.4012494,0.1392525)"
|
||||
spreadMethod="reflect"
|
||||
cx="18.708233"
|
||||
cy="24.759357"
|
||||
fx="18.708233"
|
||||
fy="24.759357"
|
||||
r="13.169441" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10925"
|
||||
id="radialGradient10931"
|
||||
cx="9.996233"
|
||||
cy="23.364098"
|
||||
fx="7.6629176"
|
||||
fy="18.295921"
|
||||
r="8.7188435"
|
||||
gradientTransform="matrix(3.0577456,1.8802807,-0.9054531,1.4724637,3.4545267,-24.480143)"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient10968"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8967678,0.05935673,-0.05873468,0.8873664,-5.4012494,0.1392525)"
|
||||
spreadMethod="reflect"
|
||||
cx="18.708233"
|
||||
cy="24.759357"
|
||||
fx="18.708233"
|
||||
fy="24.759357"
|
||||
r="13.169441" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10925"
|
||||
id="radialGradient10971"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.7002217,0.5715519,-0.4374946,2.0668853,-4.8632848,-26.818351)"
|
||||
cx="9.1802711"
|
||||
cy="24.942194"
|
||||
fx="6.0336409"
|
||||
fy="17.669048"
|
||||
r="8.7188435" />
|
||||
<clipPath
|
||||
clipPathUnits="userSpaceOnUse"
|
||||
id="clipPath10999">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccc"
|
||||
id="path11001"
|
||||
d="M 3.6413483,1.9681703 3.779696,17.490509 14.887308,19.785771 21.079035,17.498126 3.6413483,1.9681703 z"
|
||||
style="fill:#ff80ff;fill-opacity:1;fill-rule:evenodd;stroke:none" />
|
||||
</clipPath>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10925"
|
||||
id="radialGradient11003"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.7002217,0.5715519,-0.4374946,2.0668853,-4.8632848,-26.818351)"
|
||||
cx="8.2921495"
|
||||
cy="23.935163"
|
||||
fx="8.2488832"
|
||||
fy="19.781427"
|
||||
r="8.7188435" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10925"
|
||||
id="radialGradient11030"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.7002217,0.5715519,-0.4374946,2.0668853,-4.8632848,-26.818351)"
|
||||
cx="8.2921495"
|
||||
cy="23.935163"
|
||||
fx="8.2488832"
|
||||
fy="19.781427"
|
||||
r="8.7188435" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient11032"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8967678,0.05935673,-0.05873468,0.8873664,-5.4012494,0.1392525)"
|
||||
spreadMethod="reflect"
|
||||
cx="18.708233"
|
||||
cy="24.759357"
|
||||
fx="18.708233"
|
||||
fy="24.759357"
|
||||
r="13.169441" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10925"
|
||||
id="radialGradient11034"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(2.7002217,0.5715519,-0.4374946,2.0668853,-4.8632848,-26.818351)"
|
||||
cx="8.2921495"
|
||||
cy="23.935163"
|
||||
fx="8.2488832"
|
||||
fy="19.781427"
|
||||
r="8.7188435" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3294"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.7030075,0.6357809,-0.8060735,0.8913044,14.84311,-8.1934483)"
|
||||
spreadMethod="reflect"
|
||||
cx="16.993044"
|
||||
cy="20.648924"
|
||||
fx="16.993044"
|
||||
fy="20.648924"
|
||||
r="13.169441" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="linearGradient3297"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9823337,0,0,0.9823337,0.03300739,0.6182451)"
|
||||
spreadMethod="pad"
|
||||
x1="19.879225"
|
||||
y1="12.061514"
|
||||
x2="16.034332"
|
||||
y2="15.552854" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="linearGradient3353"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9823337,0,0,0.9823337,0.03300739,0.6182451)"
|
||||
spreadMethod="pad"
|
||||
x1="19.879225"
|
||||
y1="12.061514"
|
||||
x2="16.034332"
|
||||
y2="15.552854" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3355"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.7030075,0.6357809,-0.8060735,0.8913044,14.84311,-8.1934483)"
|
||||
spreadMethod="reflect"
|
||||
cx="16.993044"
|
||||
cy="20.648924"
|
||||
fx="16.993044"
|
||||
fy="20.648924"
|
||||
r="13.169441" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="linearGradient3362"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9823337,0,0,0.9823337,0.03300739,0.6182451)"
|
||||
spreadMethod="pad"
|
||||
x1="19.879225"
|
||||
y1="12.061514"
|
||||
x2="16.034332"
|
||||
y2="15.552854" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3364"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8341244,0.2489558,-0.2435026,0.8158514,0.7851109,-0.01382395)"
|
||||
spreadMethod="reflect"
|
||||
cx="17.54755"
|
||||
cy="21.708042"
|
||||
fx="17.54755"
|
||||
fy="21.708042"
|
||||
r="13.169441" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3367"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8151394,0.2358626,-0.2306962,0.7972824,0.7011221,-1.0582457)"
|
||||
spreadMethod="reflect"
|
||||
cx="17.54755"
|
||||
cy="21.708042"
|
||||
fx="17.54755"
|
||||
fy="21.708042"
|
||||
r="13.169441" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="linearGradient3370"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9575785,-0.00803118,0.00803118,0.9575785,-0.0268605,-0.4359562)"
|
||||
spreadMethod="pad"
|
||||
x1="19.879225"
|
||||
y1="12.061514"
|
||||
x2="16.034332"
|
||||
y2="15.552854" />
|
||||
<linearGradient
|
||||
y2="19.626715"
|
||||
x2="10.711697"
|
||||
y1="18.63658"
|
||||
x1="9.7192469"
|
||||
gradientTransform="matrix(3.547255,-0.03993894,0.03993894,3.547255,-27.397339,-48.790495)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3488"
|
||||
xlink:href="#linearGradient10711"
|
||||
inkscape:collect="always" />
|
||||
<radialGradient
|
||||
r="1.15625"
|
||||
fy="20.478674"
|
||||
fx="11.413477"
|
||||
cy="20.478674"
|
||||
cx="11.413477"
|
||||
spreadMethod="pad"
|
||||
gradientTransform="matrix(1.7083003,-0.01851949,0.01798426,1.6589328,-8.4797796,-13.189665)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="radialGradient3486"
|
||||
xlink:href="#linearGradient3330"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
y2="19.626715"
|
||||
x2="10.711697"
|
||||
y1="18.63658"
|
||||
x1="9.7192469"
|
||||
gradientTransform="matrix(3.5474799,0,0,3.5474799,-26.927898,-62.356391)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3475"
|
||||
xlink:href="#linearGradient10711"
|
||||
inkscape:collect="always" />
|
||||
<radialGradient
|
||||
r="1.15625"
|
||||
fy="20.478674"
|
||||
fx="11.413477"
|
||||
cy="20.478674"
|
||||
cx="11.413477"
|
||||
spreadMethod="pad"
|
||||
gradientTransform="matrix(1.7083003,-0.01851949,0.01798426,1.6589328,-8.4797796,-13.189665)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="radialGradient3473"
|
||||
xlink:href="#linearGradient3330"
|
||||
inkscape:collect="always" />
|
||||
<radialGradient
|
||||
spreadMethod="reflect"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.7809876,0.01449707,-0.0055455,0.2987498,-0.2924169,2.0957246)"
|
||||
r="11.765625"
|
||||
fy="10.911069"
|
||||
fx="1.1416299"
|
||||
cy="10.911069"
|
||||
cx="1.1416299"
|
||||
id="radialGradient3317"
|
||||
xlink:href="#linearGradient3206"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
y2="26.641653"
|
||||
x2="16.836901"
|
||||
y1="6.8943019"
|
||||
x1="5.6869311"
|
||||
gradientTransform="translate(0,-7.2094174)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3265"
|
||||
xlink:href="#linearGradient3267"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
y2="17.133453"
|
||||
x2="16.836901"
|
||||
y1="-2.6138983"
|
||||
x1="5.6869311"
|
||||
gradientTransform="translate(0,2.298783)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3261"
|
||||
xlink:href="#linearGradient3267"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
gradientTransform="translate(0,-4.8361309)"
|
||||
y2="24.268368"
|
||||
x2="16.836901"
|
||||
y1="4.5210156"
|
||||
x1="5.6869311"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3257"
|
||||
xlink:href="#linearGradient3267"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
gradientTransform="translate(0,-2.4628444)"
|
||||
y2="21.895081"
|
||||
x2="16.836901"
|
||||
y1="2.1477292"
|
||||
x1="5.6869311"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3249"
|
||||
xlink:href="#linearGradient3267"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
gradientUnits="userSpaceOnUse"
|
||||
y2="19.432236"
|
||||
x2="16.836901"
|
||||
y1="-0.31511527"
|
||||
x1="5.6869311"
|
||||
id="linearGradient3239"
|
||||
xlink:href="#linearGradient3267"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
y2="19.626715"
|
||||
x2="10.711697"
|
||||
y1="18.384007"
|
||||
x1="9.8687286"
|
||||
gradientTransform="matrix(3.6334443,0,0,3.6334443,-27.580699,-51.677773)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="linearGradient3220"
|
||||
xlink:href="#linearGradient10711"
|
||||
inkscape:collect="always" />
|
||||
<radialGradient
|
||||
r="1.15625"
|
||||
fy="20.478674"
|
||||
fx="11.413477"
|
||||
cy="20.478674"
|
||||
cx="11.413477"
|
||||
spreadMethod="pad"
|
||||
gradientTransform="matrix(1.7083003,-0.01851949,0.01798426,1.6589328,-8.4797796,-13.189665)"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
id="radialGradient3218"
|
||||
xlink:href="#linearGradient10711"
|
||||
inkscape:collect="always" />
|
||||
<linearGradient
|
||||
id="linearGradient2657">
|
||||
<stop
|
||||
id="stop2659"
|
||||
offset="0"
|
||||
style="stop-color:#ff80ff;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop2661"
|
||||
offset="1"
|
||||
style="stop-color:#ff80ff;stop-opacity:0;" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3206">
|
||||
<stop
|
||||
style="stop-color:#b1d28f;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3208" />
|
||||
<stop
|
||||
style="stop-color:#b1d28f;stop-opacity:1;"
|
||||
offset="1"
|
||||
id="stop3210" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3241">
|
||||
<stop
|
||||
id="stop3243"
|
||||
offset="0"
|
||||
style="stop-color:#000000;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop3245"
|
||||
offset="1"
|
||||
style="stop-color:#debc85;stop-opacity:0" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3267">
|
||||
<stop
|
||||
style="stop-color:#debc85;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3269" />
|
||||
<stop
|
||||
style="stop-color:#debc85;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop3271" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3273">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3275" />
|
||||
<stop
|
||||
style="stop-color:#debc85;stop-opacity:0"
|
||||
offset="1"
|
||||
id="stop3277" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3279">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3281" />
|
||||
<stop
|
||||
style="stop-color:#debc85;stop-opacity:0"
|
||||
offset="1"
|
||||
id="stop3283" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3285">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop3287" />
|
||||
<stop
|
||||
style="stop-color:#debc85;stop-opacity:0"
|
||||
offset="1"
|
||||
id="stop3289" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient3330">
|
||||
<stop
|
||||
style="stop-color:#ff80ff;stop-opacity:0;"
|
||||
offset="0"
|
||||
id="stop3332" />
|
||||
<stop
|
||||
style="stop-color:#666666;stop-opacity:1;"
|
||||
offset="1"
|
||||
id="stop3334" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient4021"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9318803,-0.2210697,0.2308678,0.9731826,-3.9252239,2.7241703)"
|
||||
spreadMethod="pad"
|
||||
cx="11.074039"
|
||||
cy="20.428291"
|
||||
fx="11.074039"
|
||||
fy="20.428291"
|
||||
r="1.15625" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="linearGradient4023"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8514941,0.5243642,-0.5243642,0.8514941,24.154135,2.8247022)"
|
||||
x1="21.461079"
|
||||
y1="23.349636"
|
||||
x2="22.96941"
|
||||
y2="28.038134" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="linearGradient4030"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.8514941,0.5243642,-0.5243642,0.8514941,18.007546,-15.657615)"
|
||||
x1="21.461079"
|
||||
y1="23.349636"
|
||||
x2="22.96941"
|
||||
y2="28.038134" />
|
||||
<filter
|
||||
inkscape:collect="always"
|
||||
x="-0.20028582"
|
||||
width="1.4005716"
|
||||
y="-0.11837127"
|
||||
height="1.2367425"
|
||||
id="filter3484">
|
||||
<feGaussianBlur
|
||||
inkscape:collect="always"
|
||||
stdDeviation="0.97202214"
|
||||
id="feGaussianBlur3486" />
|
||||
</filter>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3490"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1.1086176,-0.4093269,0.6608062,1.7897223,-9.2289678,-4.0397151)"
|
||||
spreadMethod="reflect"
|
||||
cx="8.8133469"
|
||||
cy="14.235861"
|
||||
fx="8.8133469"
|
||||
fy="14.235861"
|
||||
r="5.3238101" />
|
||||
<clipPath
|
||||
clipPathUnits="userSpaceOnUse"
|
||||
id="clipPath3496">
|
||||
<rect
|
||||
style="opacity:0.62633481;fill:none;stroke:#000000;stroke-width:0.19602102;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
|
||||
id="rect3498"
|
||||
width="13.277639"
|
||||
height="22.63365"
|
||||
x="5.309958"
|
||||
y="1.2316679"
|
||||
ry="1.171887" />
|
||||
</clipPath>
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3508"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1.1106976,-0.4036489,0.6516398,1.7930801,-9.2127514,-4.7972628)"
|
||||
spreadMethod="reflect"
|
||||
cx="8.8133469"
|
||||
cy="14.235861"
|
||||
fx="8.8133469"
|
||||
fy="14.235861"
|
||||
r="5.3238101" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient3233"
|
||||
id="linearGradient3240"
|
||||
x1="9.4485903"
|
||||
y1="2.761672"
|
||||
x2="7.6776314"
|
||||
y2="19.013866"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(4.1741381,0,0,4.1613891,14.977639,14.527008)" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient10711"
|
||||
id="radialGradient3253"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(4.7157796,-1.4428762,2.6160831,8.4980426,-22.699134,-22.277012)"
|
||||
spreadMethod="reflect"
|
||||
cx="8.2230186"
|
||||
cy="14.316785"
|
||||
fx="8.2230186"
|
||||
fy="14.316785"
|
||||
r="5.3238101" />
|
||||
<filter
|
||||
inkscape:collect="always"
|
||||
id="filter3757"
|
||||
x="-0.14567212"
|
||||
width="1.2913442"
|
||||
y="-0.098205952"
|
||||
height="1.1964119">
|
||||
<feGaussianBlur
|
||||
inkscape:collect="always"
|
||||
stdDeviation="0.79012916"
|
||||
id="feGaussianBlur3759" />
|
||||
</filter>
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="2"
|
||||
inkscape:cx="8.5584572"
|
||||
inkscape:cy="52.628863"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="true"
|
||||
inkscape:grid-bbox="true"
|
||||
inkscape:document-units="px"
|
||||
inkscape:window-width="1280"
|
||||
inkscape:window-height="742"
|
||||
inkscape:window-x="296"
|
||||
inkscape:window-y="56"
|
||||
showguides="true"
|
||||
inkscape:guide-bbox="true"
|
||||
width="24px"
|
||||
height="24px"
|
||||
inkscape:object-paths="false"
|
||||
inkscape:object-nodes="true"
|
||||
inkscape:snap-nodes="false"
|
||||
inkscape:snap-global="false">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid3664"
|
||||
empspacing="2"
|
||||
visible="true"
|
||||
enabled="true"
|
||||
spacingx="2.6666px"
|
||||
spacingy="2.6666px" />
|
||||
<sodipodi:guide
|
||||
orientation="1,0"
|
||||
position="10.507812,7.328125"
|
||||
id="guide3666" />
|
||||
<sodipodi:guide
|
||||
orientation="0,1"
|
||||
position="10.292968,7.5546875"
|
||||
id="guide3668" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata10648">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1"
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer">
|
||||
<path
|
||||
style="font-size:medium;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-indent:0;text-align:start;text-decoration:none;line-height:normal;letter-spacing:normal;word-spacing:normal;text-transform:none;direction:ltr;block-progression:tb;writing-mode:lr-tb;text-anchor:start;opacity:0.18099551;color:#000000;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2;marker:none;visibility:visible;display:inline;overflow:visible;filter:url(#filter3757);enable-background:accumulate;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
|
||||
d="m 5,19 c 5.24e-5,0.523584 0.4764155,0.999948 1,1 l 3.59375,0 2.5,2.5625 c 0.272702,0.267764 0.706204,0.357015 1.0625,0.21875 l 1.25,-0.46875 c 0.353635,-0.127466 0.619754,-0.46962 0.65625,-0.84375 l 0.34375,-3.3125 2.40625,-3 c 0.296435,-0.374818 0.26821,-0.967546 -0.0625,-1.3125 L 5.2034921,1.0488435 5,19 z"
|
||||
id="path3670"
|
||||
sodipodi:nodetypes="cccccccccccc"
|
||||
transform="matrix(3.8351065,0,0,3.8305733,20.000787,24.35592)" />
|
||||
<path
|
||||
style="fill:#201020;fill-rule:evenodd;stroke:#595959;stroke-width:5.33333349;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
|
||||
d="m 40.022468,18.688398 0,66.582224 16.696553,0 L 68.436432,97.122145 73.70512,95.153883 75.247683,80.05609 86.03678,66.636329 40.022468,18.688398 z"
|
||||
id="path3502"
|
||||
sodipodi:nodetypes="cccccccc" />
|
||||
<path
|
||||
sodipodi:nodetypes="cccccccc"
|
||||
id="path3504"
|
||||
d="m 40.022468,18.688397 0,66.58222 16.696554,0 11.717412,11.851511 5.268688,-1.968253 1.54256,-15.09779 L 85.892267,66.67168 40.022468,18.688397 z"
|
||||
style="fill:#c4c4c4;fill-opacity:1;fill-rule:evenodd;stroke:none" />
|
||||
<path
|
||||
style="fill:url(#radialGradient3253);fill-opacity:1;fill-rule:evenodd;stroke:none"
|
||||
d="m 40.022468,18.688397 0,66.58222 16.696554,0 11.717412,11.851511 5.268688,-1.968253 1.54256,-15.09779 10.56036,-13.22296 -45.785574,-48.144728 z"
|
||||
id="path3506"
|
||||
sodipodi:nodetypes="cccccccc" />
|
||||
<path
|
||||
style="fill:none;stroke:url(#linearGradient3240);stroke-width:2.667;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
|
||||
d="M 41.38037,83.915714 58.396539,84.11082 69.204513,94.929961 72.193882,93.656427 73.839587,79.399575 84.08764,66.742537 41.448646,22.246495 41.38037,83.915714 z"
|
||||
id="path2253"
|
||||
sodipodi:nodetypes="cccccccc" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 25 KiB |
53
recipes/aksiyon_derigisi.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Aksiyon (BasicNewsRecipe):
|
||||
|
||||
title = u'Aksiyon Dergisi'
|
||||
__author__ = u'thomass'
|
||||
description = 'Haftalık haber dergisi '
|
||||
oldest_article =13
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Aksiyon'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
|
||||
|
||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
remove_empty_feeds= True
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
||||
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||
( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
||||
( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
||||
( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
||||
( u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
|
||||
( u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
|
||||
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
||||
( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
||||
( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
||||
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
||||
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
||||
( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
||||
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
|
||||
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
17
recipes/caros_amigos.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
__copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1311839910(BasicNewsRecipe):
|
||||
title = u'Caros Amigos'
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
language = 'pt_BR'
|
||||
__author__ = 'Pablo Aldama'
|
||||
|
||||
feeds = [(u'Caros Amigos', u'http://carosamigos.terra.com.br/index/index.php?format=feed&type=rss')]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['blog']})
|
||||
,dict(name='div', attrs={'class':['blogcontent']})
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':'addtoany'})]
|
||||
|
23
recipes/carta_capital.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1312361378(BasicNewsRecipe):
|
||||
title = u'Carta capital'
|
||||
__author__ = 'Pablo Aldama'
|
||||
language = 'pt_BR'
|
||||
oldest_article = 9
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'Politica', u'http://www.cartacapital.com.br/category/politica/feed')
|
||||
,(u'Economia', u'http://www.cartacapital.com.br/category/economia/feed')
|
||||
,(u'Cultura', u'http://www.cartacapital.com.br/category/cultura/feed')
|
||||
,(u'Internacional', u'http://www.cartacapital.com.br/category/internacional/feed')
|
||||
,(u'Saude', u'http://www.cartacapital.com.br/category/saude/feed')
|
||||
,(u'Sociedade', u'http://www.cartacapital.com.br/category/sociedade/feed')
|
||||
,(u'Tecnologia', u'http://www.cartacapital.com.br/category/tecnologia/feed')
|
||||
,(u'Carta na escola', u'http://www.cartacapital.com.br/category/carta-na-escola/feed')
|
||||
,(u'Carta fundamental', u'http://www.cartacapital.com.br/category/carta-fundamental/feed')
|
||||
,(u'Carta verde', u'http://www.cartacapital.com.br/category/carta-verde/feed')
|
||||
|
||||
]
|
||||
def print_version(self, url):
|
||||
return url + '/print'
|
@ -1,39 +1,34 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPLv3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1255797795(BasicNewsRecipe):
|
||||
title = u'Corren'
|
||||
language = 'sv'
|
||||
class AdvancedUserRecipe1311446032(BasicNewsRecipe):
|
||||
title = 'Corren'
|
||||
__author__ = 'Jonas Svensson'
|
||||
simultaneous_downloads = 1
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
description = 'News from Sweden'
|
||||
publisher = 'Corren'
|
||||
category = 'news, politics, Sweden'
|
||||
oldest_article = 2
|
||||
delay = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_attributes = ['onload']
|
||||
timefmt = ''
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-1'
|
||||
language = 'sv'
|
||||
|
||||
feeds = [
|
||||
(u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'),
|
||||
(u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'),
|
||||
(u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'),
|
||||
(u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'),
|
||||
(u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'),
|
||||
(u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'),
|
||||
(u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'),
|
||||
(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'),
|
||||
(u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'),
|
||||
(u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/')
|
||||
(u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/')
|
||||
,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/')
|
||||
,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234')
|
||||
,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
url = url.replace("ekonomi/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("bostad/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("kultur/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("motor/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("sport/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("asikter/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx")
|
||||
url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx")
|
||||
return url.replace("nyheter/artikel.aspx", "Print.aspx")
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})]
|
||||
remove_tags = [
|
||||
dict(name='ul',attrs={'class':'functions'})
|
||||
,dict(name='a',attrs={'href':'javascript*'})
|
||||
,dict(name='div',attrs={'class':'box'})
|
||||
,dict(name='div',attrs={'class':'functionsbottom'})
|
||||
]
|
||||
|
40
recipes/counterpunch.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
import re
|
||||
from lxml.html import parse
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Counterpunch(BasicNewsRecipe):
|
||||
'''
|
||||
Parses counterpunch.com for articles
|
||||
'''
|
||||
title = 'Counterpunch'
|
||||
description = 'Daily political opinion from www.Counterpunch.com'
|
||||
language = 'en'
|
||||
__author__ = 'O. Emmerson'
|
||||
keep_only_tags = [dict(name='td', attrs={'width': '522'})]
|
||||
max_articles_per_feed = 10
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
title, url = 'Counterpunch', 'http://www.counterpunch.com'
|
||||
articles = self.parse_page(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def parse_page(self, url):
|
||||
parsed_page = parse(url).getroot()
|
||||
articles = []
|
||||
unwanted_text = re.compile('Website\ of\ the|I\ urge\ you|Subscribe\ now|DONATE|\@asis\.com|donation\ button|click\ over\ to\ our')
|
||||
parsed_articles = [a for a in parsed_page.cssselect("html>body>table tr>td>p[class='style2']") if not unwanted_text.search(a.text_content())]
|
||||
for art in parsed_articles:
|
||||
try:
|
||||
author = art.text
|
||||
title = art.cssselect("a")[0].text + ' by {0}'.format(author)
|
||||
art_url = 'http://www.counterpunch.com/' + art.cssselect("a")[0].attrib['href']
|
||||
articles.append({'title': title, 'url': art_url})
|
||||
except Exception as e:
|
||||
e
|
||||
#print('Handler Error: ', e, 'title :', a.text_content())
|
||||
pass
|
||||
return articles
|
||||
|
32
recipes/dagens_industri.recipe
Normal file
@ -0,0 +1,32 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPLv3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1311450855(BasicNewsRecipe):
|
||||
title = u'Dagens Industri'
|
||||
__author__ = 'Jonas Svensson'
|
||||
description = 'Economy news from Sweden'
|
||||
publisher = 'DI'
|
||||
category = 'news, politics, Sweden'
|
||||
oldest_article = 2
|
||||
delay = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'sv'
|
||||
|
||||
feeds = [(u'DI', u'http://di.se/rss')]
|
||||
|
||||
keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':'article-actions clear'})
|
||||
,dict(name='div',attrs={'class':'article-action-popup'})
|
||||
,dict(name='div',attrs={'class':'header'})
|
||||
,dict(name='div',attrs={'class':'content clear'})
|
||||
,dict(name='div',attrs={'id':'articleAdvertisementDiv'})
|
||||
,dict(name='ul',attrs={'class':'action-list'})
|
||||
]
|
98
recipes/dnevnik_mk.recipe
Normal file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = 'Darko Spasovski'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
|
||||
'''
|
||||
dnevnik.com.mk
|
||||
'''
|
||||
|
||||
import re
|
||||
import datetime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import browser
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class Dnevnik(BasicNewsRecipe):
|
||||
|
||||
INDEX = 'http://www.dnevnik.com.mk'
|
||||
__author__ = 'Darko Spasovski'
|
||||
title = 'Dnevnik - mk'
|
||||
description = 'Daily Macedonian newspaper'
|
||||
masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif'
|
||||
language = 'mk'
|
||||
publication_type = 'newspaper'
|
||||
category = 'news, Macedonia'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
## Remove anything before the start of the article.
|
||||
(r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
|
||||
|
||||
## Remove anything after the end of the article.
|
||||
(r'<!--Article end.*?</body>', lambda match : '</body>'),
|
||||
]
|
||||
]
|
||||
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
.WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description,
|
||||
'tags' : category,
|
||||
'language' : language,
|
||||
'linearize_tables' : True
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
datum = datetime.datetime.today().strftime('%d.%m.%Y')
|
||||
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
|
||||
feeds = []
|
||||
for section in soup.findAll('td', attrs={'class':'WB_DNEVNIK_ArhivaFormTitle'}):
|
||||
sectionTitle = section.contents[0].string
|
||||
if sectionTitle.lower().startswith('online'):
|
||||
# Skip online articles
|
||||
continue
|
||||
containerTable = section.findPrevious(name='table').findNextSibling(name='table')
|
||||
if containerTable==None:
|
||||
print 'No container table found - page layout may have been changed.'
|
||||
continue
|
||||
articles = []
|
||||
for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
|
||||
title = self.tag_to_string(article, use_alt=True).strip()
|
||||
articles.append({'title': title, 'url':'http://www.dnevnik.com.mk/' + article['href'], 'description':'', 'date':''})
|
||||
if articles:
|
||||
feeds.append((sectionTitle, articles))
|
||||
return sorted(feeds, key=lambda section: self.get_weight(section))
|
||||
|
||||
def get_weight(self, section):
|
||||
"""
|
||||
Returns 'weight' of a section.
|
||||
Used for sorting the sections based on their 'natural' order in the printed edition.
|
||||
"""
|
||||
natural_order = { u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
|
||||
u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
|
||||
u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
|
||||
u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13 }
|
||||
if section[0].string.lower() in natural_order:
|
||||
return natural_order[section[0].string.lower()]
|
||||
else:
|
||||
return 999 # section names not on the list go to the bottom
|
||||
|
||||
def get_cover_url(self):
|
||||
datum = datetime.datetime.today().strftime('%d.%m.%Y')
|
||||
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
|
||||
anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
|
||||
if anchor != None:
|
||||
raw = browser().open_novisit(self.INDEX + '/' + anchor['href']).read()
|
||||
cover_soup = BeautifulSoup(raw)
|
||||
url = cover_soup.find('div', attrs={'class':'WB_DNEVNIK_Datum2'}).findNext('img')['src']
|
||||
return self.INDEX + '/' + url
|
||||
return ''
|
||||
|
@ -6,10 +6,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
economist.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
from collections import OrderedDict
|
||||
|
||||
import string, time, re
|
||||
import time, re
|
||||
|
||||
class Economist(BasicNewsRecipe):
|
||||
|
||||
@ -22,10 +22,12 @@ class Economist(BasicNewsRecipe):
|
||||
' perspective. Best downloaded on Friday mornings (GMT)')
|
||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||
oldest_article = 7.0
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
|
||||
#cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info', 'share_inline_header']}),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header', 'related-items']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
@ -67,52 +69,54 @@ class Economist(BasicNewsRecipe):
|
||||
return self.economist_parse_index()
|
||||
|
||||
def economist_parse_index(self):
|
||||
soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
|
||||
convertEntities=BeautifulSoup.HTML_ENTITIES)
|
||||
index_started = False
|
||||
feeds = {}
|
||||
ans = []
|
||||
key = None
|
||||
for tag in soup.findAll(['h1', 'h2']):
|
||||
text = ''.join(tag.findAll(text=True))
|
||||
if tag.name in ('h1', 'h2') and 'Classified ads' in text:
|
||||
break
|
||||
if tag.name == 'h1':
|
||||
if 'The world this week' in text or 'The world this year' in text:
|
||||
index_started = True
|
||||
if not index_started:
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
feeds = OrderedDict()
|
||||
for section in soup.findAll(attrs={'class':lambda x: x and 'section' in
|
||||
x}):
|
||||
h4 = section.find('h4')
|
||||
if h4 is None:
|
||||
continue
|
||||
text = string.capwords(text)
|
||||
if text not in feeds.keys():
|
||||
feeds[text] = []
|
||||
if text not in ans:
|
||||
ans.append(text)
|
||||
key = text
|
||||
section_title = self.tag_to_string(h4).strip()
|
||||
if not section_title:
|
||||
continue
|
||||
if key is None:
|
||||
self.log('Found section: %s'%section_title)
|
||||
articles = []
|
||||
for h5 in section.findAll('h5'):
|
||||
article_title = self.tag_to_string(h5).strip()
|
||||
if not article_title:
|
||||
continue
|
||||
a = tag.find('a', href=True)
|
||||
data = h5.findNextSibling(attrs={'class':'article'})
|
||||
if data is None: continue
|
||||
a = data.find('a', href=True)
|
||||
if a is None: continue
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
article_title += ': %s'%self.tag_to_string(a).strip()
|
||||
articles.append({'title':article_title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
if not articles:
|
||||
# We have last or first section
|
||||
for art in section.findAll(attrs={'class':'article'}):
|
||||
a = art.find('a', href=True)
|
||||
if a is not None:
|
||||
url=a['href']
|
||||
id_ = re.search(r'story_id=(\d+)', url).group(1)
|
||||
url = 'http://www.economist.com/node/%s/print'%id_
|
||||
if url.startswith('Printer'):
|
||||
url = '/'+url
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.economist.com' + url
|
||||
try:
|
||||
subtitle = tag.previousSibling.contents[0].contents[0]
|
||||
text = subtitle + ': ' + text
|
||||
except:
|
||||
pass
|
||||
article = dict(title=text,
|
||||
url = url,
|
||||
description='', content='', date='')
|
||||
feeds[key].append(article)
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
title = self.tag_to_string(a)
|
||||
if title:
|
||||
articles.append({'title':title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
ans = [(key, feeds[key]) for key in ans if feeds.has_key(key)]
|
||||
if articles:
|
||||
feeds[section_title] = articles
|
||||
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
if not ans:
|
||||
raise Exception('Could not find any articles. Has your subscription expired?')
|
||||
raise Exception('Could not find any articles, either the '
|
||||
'economist.com server is having trouble and you should '
|
||||
'try later or the website format has changed and the '
|
||||
'recipe needs to be updated.')
|
||||
return ans
|
||||
|
||||
def eco_find_image_tables(self, soup):
|
||||
|
@ -16,11 +16,12 @@ class Economist(BasicNewsRecipe):
|
||||
' Much slower than the print edition based version.')
|
||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||
oldest_article = 7.0
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
|
||||
#cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header']}),
|
||||
'share_inline_header', 'related-items']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
|
58
recipes/el_colombiano.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
|
||||
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
|
||||
title = u'Periódico El Colombiano'
|
||||
language = 'es_CO'
|
||||
__author__ = 'BIGO-CAVA'
|
||||
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
|
||||
remove_tags_before = dict(id='contenidoArt')
|
||||
remove_tags_after = dict(id='enviaTips')
|
||||
remove_tags_after = dict(id='zonaPata')
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
extra_css = """
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||
"""
|
||||
|
||||
|
||||
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
|
||||
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),
|
||||
(u'Colombia', u'http://www.elcolombiano.com/rss/Colombia.xml'),
|
||||
(u'Economia', u'http://www.elcolombiano.com/rss/Economia.xml'),
|
||||
(u'Internacional', u'http://www.elcolombiano.com/rss/Internacional.xml'),
|
||||
(u'Politica', u'http://www.elcolombiano.com/rss/Politica.xml'),
|
||||
(u'Cultura', u'http://www.elcolombiano.com/rss/Cultura.xml'),
|
||||
(u'Entretenimiento', u'http://www.elcolombiano.com/rss/Farandula.xml'),
|
||||
(u'Tecnologia', u'http://www.elcolombiano.com/rss/Tecnologia.xml'),
|
||||
(u'Television', u'http://www.elcolombiano.com/rss/Television.xml'),
|
||||
(u'Vida y Sociedad', u'http://www.elcolombiano.com/rss/Vida.xml'),
|
||||
(u'Turismo', u'http://www.elcolombiano.com/rss/Turismo.xm'),
|
||||
(u'Salud', u'http://www.elcolombiano.com/rss/Salud.xml'),
|
||||
(u'Ciencia', u'http://www.elcolombiano.com/rss/Ciencia.xml')]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'objetosRelacionados'}),
|
||||
dict(name='div', attrs={'class':'notasRelacionadas contenedor'}),
|
||||
dict(name='div', attrs={'class':'comentarios'}),
|
||||
dict(name='div', attrs={'class':'mapaDelSitio'}),
|
||||
dict(name='div', attrs={'class':'creditos'}),
|
||||
dict(name='div', attrs={'class':'votos'}),
|
||||
dict(name='div', attrs={'class':'divopt2'}),
|
||||
dict(name='div', attrs={'class':'comentarios'}),
|
||||
dict(name='div', attrs={'class':'pestanasLateral'}),
|
||||
dict(name='div', attrs={'class':'resumenSeccion'}),
|
||||
dict(name='div', attrs={'class':'zonaComercial'}),
|
||||
dict(name='div', attrs={'id':'zonaPata'})]
|
53
recipes/el_tiempo.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
|
||||
|
||||
class ColombiaElTiempo02(BasicNewsRecipe):
|
||||
title = u'Periódico el Tiempo'
|
||||
language = 'es_CO'
|
||||
__author__ = 'BIGO-CAVA'
|
||||
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
|
||||
remove_tags_before = dict(id='fb-root')
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
|
||||
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
||||
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
|
||||
dict(name='div', attrs={'class':'caja-facebook'}),
|
||||
dict(name='div', attrs={'class':'caja-twitter'}),
|
||||
dict(name='div', attrs={'class':'caja-buzz'}),
|
||||
dict(name='div', attrs={'class':'ico-mail2'}),
|
||||
dict(name='div', attrs={'id':'caja-instapaper'}),
|
||||
dict(name='div', attrs={'class':'modulo herramientas'})]
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
extra_css = """
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||
"""
|
||||
|
||||
|
||||
feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
|
||||
(u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
|
||||
(u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
|
||||
(u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
|
||||
(u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
|
||||
(u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
|
||||
(u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
|
||||
(u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
|
||||
(u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
|
||||
(u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
|
||||
(u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
|
||||
(u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
|
||||
(u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
|
||||
(u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]
|
@ -1,25 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__copyright__ = '2011, Starson17'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Freakonomics(BasicNewsRecipe):
|
||||
|
||||
title = 'Freakonomics Blog'
|
||||
description = 'The Hidden side of everything'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = '1.02'
|
||||
__date__ = '11 July 2011'
|
||||
language = 'en'
|
||||
cover_url = 'http://ilkerugur.files.wordpress.com/2009/04/freakonomics.jpg'
|
||||
use_embedded_content= False
|
||||
no_stylesheets = True
|
||||
oldest_article = 30
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
max_articles_per_feed = 50
|
||||
|
||||
feeds = [('Blog', 'http://feeds.feedburner.com/freakonomicsblog')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'header'}),
|
||||
dict(name='h1'),
|
||||
dict(name='h2'),
|
||||
dict(name='div', attrs={'class':'entry-content'}),
|
||||
]
|
||||
feeds = [(u'Freakonomics Blog', u'http://www.freakonomics.com/feed/')]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['content']})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['simple_socialmedia']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['simple_socialmedia','single-fb-share','wp-polls']})]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
|
@ -1,5 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.feeds import Feed
|
||||
|
||||
@ -46,4 +45,3 @@ class GC_gl(BasicNewsRecipe):
|
||||
}
|
||||
newArticles.append(newArt)
|
||||
masterFeed.append((feed.title,newArticles))
|
||||
|
||||
|
@ -12,7 +12,7 @@ from datetime import date
|
||||
|
||||
class Guardian(BasicNewsRecipe):
|
||||
|
||||
title = u'The Guardian / The Observer'
|
||||
title = u'The Guardian and The Observer'
|
||||
if date.today().weekday() == 6:
|
||||
base_url = "http://www.guardian.co.uk/theobserver"
|
||||
else:
|
||||
|
BIN
recipes/icons/losandes.png
Normal file
After Width: | Height: | Size: 285 B |
BIN
recipes/icons/national_geographic_pl.png
Normal file
After Width: | Height: | Size: 894 B |
BIN
recipes/icons/techcrunch.png
Normal file
After Width: | Height: | Size: 119 B |
33
recipes/idg_se.recipe
Normal file
@ -0,0 +1,33 @@
|
||||
__license__ = 'GPLv3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class IDGse(BasicNewsRecipe):
|
||||
title = 'IDG'
|
||||
description = 'IDG.se'
|
||||
language = 'se'
|
||||
__author__ = 'zapt0'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
encoding = 'ISO-8859-1'
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
|
||||
|
||||
def print_version(self,url):
|
||||
return url + '?articleRenderMode=print&m=print'
|
||||
|
||||
def get_cover_url(this):
|
||||
return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='div', attrs={'class':['divColumn1Article']}),
|
||||
]
|
||||
#remove ads
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['preamble_ad']}),
|
||||
dict(name='ul', attrs={'class':['share']})
|
||||
]
|
||||
|
@ -1,4 +1,3 @@
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
@ -9,14 +8,24 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
oldest_article = 365
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'text_controls_toggle'})
|
||||
,dict(name='script')
|
||||
,dict(name='div', attrs={'id':'text_controls'})
|
||||
,dict(name='div', attrs={'id':'editing_controls'})
|
||||
,dict(name='div', attrs={'class':'bar bottom'})
|
||||
]
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
INDEX = u'http://www.instapaper.com'
|
||||
LOGIN = INDEX + u'/user/login'
|
||||
|
||||
|
||||
|
||||
feeds = [(u'Instapaper Unread', u'http://www.instapaper.com/u'), (u'Instapaper Starred', u'http://www.instapaper.com/starred')]
|
||||
feeds = [
|
||||
(u'Instapaper Unread', u'http://www.instapaper.com/u'),
|
||||
(u'Instapaper Starred', u'http://www.instapaper.com/starred')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -34,21 +43,28 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class':'titleRow'}):
|
||||
description = self.tag_to_string(item.div)
|
||||
for item in soup.findAll('div', attrs={'class':'cornerControls'}):
|
||||
#description = self.tag_to_string(item.div)
|
||||
atag = item.a
|
||||
if atag and atag.has_key('href'):
|
||||
url = atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
'url' :url
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
def print_version(self, url):
|
||||
return 'http://www.instapaper.com' + url
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find('title').contents[0].strip()
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for link_tag in soup.findAll(attrs={"id" : "story"}):
|
||||
link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
|
||||
|
||||
return soup
|
||||
|
@ -16,16 +16,14 @@ class i09(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
masthead_url = 'http://cache.gawkerassets.com/assets/io9.com/img/logo.png'
|
||||
extra_css = '''
|
||||
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
|
||||
img{margin-bottom: 1em}
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
@ -33,13 +31,11 @@ class i09(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
keep_only_tags = [dict(attrs={'class':'content permalink'})]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags = [dict(attrs={'class':'contactinfo'})]
|
||||
remove_tags_after = dict(attrs={'class':'contactinfo'})
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/io9/vip?format=xml')]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/io9/full')]
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -18,6 +18,7 @@ class IrishTimes(BasicNewsRecipe):
|
||||
oldest_article = 1.0
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads= 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
@ -25,17 +26,17 @@ class IrishTimes(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
|
||||
('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'),
|
||||
('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'),
|
||||
('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'),
|
||||
('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'),
|
||||
('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'),
|
||||
('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'),
|
||||
('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'),
|
||||
('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
|
||||
('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
|
||||
('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
|
||||
('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
|
||||
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
|
||||
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
|
||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
|
||||
('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'),
|
||||
('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'),
|
||||
('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'),
|
||||
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
|
||||
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
|
||||
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
|
||||
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
|
||||
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
|
||||
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
|
||||
@ -49,10 +50,16 @@ class IrishTimes(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
#u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
u = url.find('irishtimes')
|
||||
u = 'http://www.irishtimes.com' + url[u + 12:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace('0Bhtml/story01.htm', '_pf.html')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.link
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
import urllib2
|
||||
import urllib2, re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JBPress(BasicNewsRecipe):
|
||||
@ -40,3 +40,12 @@ class JBPress(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
url = urllib2.urlopen(url).geturl() # resolve redirect.
|
||||
return url.replace('/-/', '/print/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# remove breadcrumb
|
||||
h3s = soup.findAll('h3')
|
||||
for h3 in h3s:
|
||||
if re.compile('^JBpress>').match(h3.string):
|
||||
h3.extract()
|
||||
return soup
|
||||
|
||||
|
78
recipes/losandes.recipe
Normal file
@ -0,0 +1,78 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.losandes.com.ar
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LosAndes(BasicNewsRecipe):
|
||||
title = 'Los Andes'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Mendoza, Argentina y el resto del mundo'
|
||||
publisher = 'Los Andes'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.losandes.com.ar/graficos/losandes.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
h1,h2{font-family: "Times New Roman",Times,serif}
|
||||
.fechaNota{font-weight: bold; color: gray}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link'])
|
||||
,dict(attrs={'class':['cabecera', 'url']})
|
||||
]
|
||||
remove_tags_before=dict(attrs={'class':'cabecera'})
|
||||
remove_tags_after=dict(attrs={'class':'url'})
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas Noticias' , u'http://www.losandes.com.ar/servicios/rss.asp?r=78' )
|
||||
,(u'Politica' , u'http://www.losandes.com.ar/servicios/rss.asp?r=68' )
|
||||
,(u'Economia nacional' , u'http://www.losandes.com.ar/servicios/rss.asp?r=65' )
|
||||
,(u'Economia internacional' , u'http://www.losandes.com.ar/servicios/rss.asp?r=505')
|
||||
,(u'Internacionales' , u'http://www.losandes.com.ar/servicios/rss.asp?r=66' )
|
||||
,(u'Turismo' , u'http://www.losandes.com.ar/servicios/rss.asp?r=502')
|
||||
,(u'Fincas' , u'http://www.losandes.com.ar/servicios/rss.asp?r=504')
|
||||
,(u'Isha nos habla' , u'http://www.losandes.com.ar/servicios/rss.asp?r=562')
|
||||
,(u'Estilo' , u'http://www.losandes.com.ar/servicios/rss.asp?r=81' )
|
||||
,(u'Cultura' , u'http://www.losandes.com.ar/servicios/rss.asp?r=503')
|
||||
,(u'Policiales' , u'http://www.losandes.com.ar/servicios/rss.asp?r=70' )
|
||||
,(u'Deportes' , u'http://www.losandes.com.ar/servicios/rss.asp?r=69' )
|
||||
,(u'Sociedad' , u'http://www.losandes.com.ar/servicios/rss.asp?r=67' )
|
||||
,(u'Opinion' , u'http://www.losandes.com.ar/servicios/rss.asp?r=80' )
|
||||
,(u'Editorial' , u'http://www.losandes.com.ar/servicios/rss.asp?r=76' )
|
||||
,(u'Mirador' , u'http://www.losandes.com.ar/servicios/rss.asp?r=79' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
artid = url.rpartition('.')[0].rpartition('-')[2]
|
||||
return "http://www.losandes.com.ar/includes/modulos/imprimir.asp?tipo=noticia&id=" + artid
|
||||
|
||||
def get_cover_url(self):
|
||||
month = strftime("%m").lstrip('0')
|
||||
day = strftime("%d").lstrip('0')
|
||||
year = strftime("%Y")
|
||||
return "http://www.losandes.com.ar/fotografias/fotosnoticias/" + year + "/" + month + "/" + day + "/th_tapa.jpg"
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
44
recipes/luns_a_venres.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LV_gl(BasicNewsRecipe):
|
||||
title = u'De Luns a Venres (RSS)'
|
||||
__author__ = u'Susana Sotelo Docío'
|
||||
description = u'O gratuíto galego'
|
||||
publisher = u'Galiciaé'
|
||||
category = u'news'
|
||||
encoding = 'utf-8'
|
||||
language = 'gl'
|
||||
direction = 'ltr'
|
||||
cover_url = 'http://lv.galiciae.com/new_estilos/lv/logo.gif'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
center_navbar = False
|
||||
|
||||
feeds = [
|
||||
(u'Galicia', u'http://lv.galiciae.com/cache/rss/sec_galicia_gl.rss'),
|
||||
(u'Cultura', u'http://lv.galiciae.com/cache/rss/sec_cultura_gl.rss'),
|
||||
(u'Mundo', u'http://lv.galiciae.com/cache/rss/sec_mundo_gl.rss'),
|
||||
(u'Cidadanía', u'http://lv.galiciae.com/cache/rss/sec_ciudadania_gl.rss'),
|
||||
(u'Tecnoloxía', u'http://lv.galiciae.com/cache/rss/sec_tecnologia_gl.rss'),
|
||||
(u'España', u'http://lv.galiciae.com/cache/rss/sec_espana_gl.rss'),
|
||||
(u'Deportes', u'http://lv.galiciae.com/cache/rss/sec_deportes_gl.rss'),
|
||||
(u'Economía', u'http://lv.galiciae.com/cache/rss/sec_economia_gl.rss'),
|
||||
(u'Lercheo', u'http://lv.galiciae.com/cache/rss/sec_gente_gl.rss'),
|
||||
(u'Medio ambiente', u'http://lv.galiciae.com/cache/rss/sec_medioambiente_gl.rss'),
|
||||
(u'España/Mundo', u'http://lv.galiciae.com/cache/rss/sec_espanamundo_gl.rss'),
|
||||
(u'Sociedade', u'http://lv.galiciae.com/cache/rss/sec_sociedad_gl.rss'),
|
||||
(u'Ciencia', u'http://lv.galiciae.com/cache/rss/sec_ciencia_gl.rss'),
|
||||
(u'Motor', u'http://lv.galiciae.com/cache/rss/sec_motor_gl.rss'),
|
||||
(u'Coches', u'http://lv.galiciae.com/cache/rss/sec_coches_gl.rss'),
|
||||
(u'Motos', u'http://lv.galiciae.com/cache/rss/sec_motos_gl.rss'),
|
||||
(u'Industriais', u'http://lv.galiciae.com/cache/rss/sec_industriales_gl.rss')
|
||||
]
|
||||
|
||||
extra_css = u' p{text-align:left} '
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\nencoding="' + encoding + '"\ntags="' + category + '"\noverride_css=" p {text-align:left; text-indent: 0cm} "'
|
||||
|
||||
def print_version(self, url):
|
||||
url += '?imprimir&lang=gl'
|
||||
return url
|
||||
|
52
recipes/national_geographic_pl.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'Marcin Urban 2011'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class recipeMagic(BasicNewsRecipe):
|
||||
title = 'National Geographic PL'
|
||||
__author__ = 'Marcin Urban 2011'
|
||||
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
|
||||
cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'G+J Gruner+Jahr Polska'
|
||||
category = 'news, PL,'
|
||||
language = 'pl'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
h1{text-align: center;}
|
||||
h2{font-size: medium; font-weight: bold;}
|
||||
.authordate {font-size: small; color: #696969;}
|
||||
p.lead {font-weight: bold; text-align: center;}
|
||||
.fot{font-size: x-small; color: #666666;} '''
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'add_inf'}),
|
||||
dict(name='div', attrs={'class':'add_f'}),
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
|
||||
|
@ -1,11 +1,10 @@
|
||||
EMAILADDRESS = 'hoge@foobar.co.jp'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NBOnline(BasicNewsRecipe):
|
||||
title = u'Nikkei Business Online'
|
||||
language = 'ja'
|
||||
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
|
||||
description = u'Nikkei Business Online.\u6CE8\uFF1A\u30E6\u30FC\u30B6\u30FC\u540D\u306Bemail\u30A2\u30C9\u30EC\u30B9\u3068\u30E6\u30FC\u30B6\u30FC\u540D\u3092\u30BB\u30DF\u30B3\u30ED\u30F3\u3067\u533A\u5207\u3063\u3066\u5165\u308C\u3066\u304F\u3060\u3055\u3044\u3002\u4F8B\uFF1Aemail@address.jp;username . PLEASE NOTE: You need to put your email address and username into username filed separeted by ; (semi-colon).'
|
||||
__author__ = 'Ado Nishimura'
|
||||
needs_subscription = True
|
||||
oldest_article = 7
|
||||
@ -23,8 +22,8 @@ class NBOnline(BasicNewsRecipe):
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
|
||||
br.select_form(name='loginActionForm')
|
||||
br['email'] = EMAILADDRESS
|
||||
br['userId'] = self.username
|
||||
br['email'] = self.username.split(';')[0]
|
||||
br['userId'] = self.username.split(';')[1]
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
88
recipes/nikkei_news.recipe
Normal file
@ -0,0 +1,88 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
#import pprint, sys
|
||||
#pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
class NikkeiNet_paper_subscription(BasicNewsRecipe):
|
||||
title = u'\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\uFF08\u671D\u520A\u30FB\u5915\u520A\uFF09'
|
||||
__author__ = 'Ado Nishimura'
|
||||
description = u'\u65E5\u7D4C\u96FB\u5B50\u7248\u306B\u3088\u308B\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\u3002\u671D\u520A\u30FB\u5915\u520A\u306F\u53D6\u5F97\u6642\u9593\u306B\u3088\u308A\u5207\u308A\u66FF\u308F\u308A\u307E\u3059\u3002\u8981\u8CFC\u8AAD'
|
||||
needs_subscription = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 30
|
||||
language = 'ja'
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
|
||||
remove_tags_before = {'class':"cmn-indent"}
|
||||
remove_tags = [
|
||||
# {'class':"cmn-article_move"},
|
||||
# {'class':"cmn-pr_list"},
|
||||
# {'class':"cmnc-zoom"},
|
||||
{'class':"cmn-hide"},
|
||||
{'name':'form'},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-indent"}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
#pp.pprint(self.parse_index())
|
||||
#exit(1)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
print "----------------------------open top page----------------------------------------"
|
||||
br.open('http://www.nikkei.com/')
|
||||
print "----------------------------open first login form--------------------------------"
|
||||
link = br.links(url_regex="www.nikkei.com/etc/accounts/login").next()
|
||||
br.follow_link(link)
|
||||
#response = br.response()
|
||||
#print response.get_data()
|
||||
print "----------------------------JS redirect(send autoPostForm)-----------------------"
|
||||
br.select_form(name='autoPostForm')
|
||||
br.submit()
|
||||
#response = br.response()
|
||||
print "----------------------------got login form---------------------------------------"
|
||||
br.select_form(name='LA0210Form01')
|
||||
br['LA0210Form01:LA0210Email'] = self.username
|
||||
br['LA0210Form01:LA0210Password'] = self.password
|
||||
br.submit()
|
||||
#response = br.response()
|
||||
print "----------------------------JS redirect------------------------------------------"
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
def cleanup(self):
|
||||
print "----------------------------logout-----------------------------------------------"
|
||||
self.browser.open('https://regist.nikkei.com/ds/etc/accounts/logout')
|
||||
|
||||
def parse_index(self):
|
||||
print "----------------------------get index of paper-----------------------------------"
|
||||
result = []
|
||||
soup = self.index_to_soup('http://www.nikkei.com/paper/')
|
||||
#soup = self.index_to_soup(self.test_data())
|
||||
for sect in soup.findAll('div', 'cmn-section kn-special JSID_baseSection'):
|
||||
sect_title = sect.find('h3', 'cmnc-title').string
|
||||
sect_result = []
|
||||
for elem in sect.findAll(attrs={'class':['cmn-article_title']}):
|
||||
url = 'http://www.nikkei.com' + elem.span.a['href']
|
||||
url = re.sub("/article/", "/print-article/", url) # print version.
|
||||
span = elem.span.a.span
|
||||
if ((span is not None) and (len(span.contents) > 1)):
|
||||
title = span.contents[1].string
|
||||
sect_result.append(dict(title=title, url=url, date='',
|
||||
description='', content=''))
|
||||
result.append([sect_title, sect_result])
|
||||
#pp.pprint(result)
|
||||
|
47
recipes/plus_info.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = 'Darko Spasovski'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
|
||||
|
||||
'''
|
||||
www.plusinfo.mk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class PlusInfo(BasicNewsRecipe):
|
||||
|
||||
INDEX = 'www.plusinfo.mk'
|
||||
title = u'+info'
|
||||
__author__ = 'Darko Spasovski'
|
||||
description = 'Macedonian news portal'
|
||||
publication_type = 'newsportal'
|
||||
category = 'news, Macedonia'
|
||||
language = 'mk'
|
||||
masthead_url = 'http://www.plusinfo.mk/style/images/logo.jpg'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'vest'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['komentari_holder', 'objava']})]
|
||||
|
||||
feeds = [(u'Македонија', u'http://www.plusinfo.mk/rss/makedonija'),
|
||||
(u'Бизнис', u'http://www.plusinfo.mk/rss/biznis'),
|
||||
(u'Скопје', u'http://www.plusinfo.mk/rss/skopje'),
|
||||
(u'Култура', u'http://www.plusinfo.mk/rss/kultura'),
|
||||
(u'Свет', u'http://www.plusinfo.mk/rss/svet'),
|
||||
(u'Сцена', u'http://www.plusinfo.mk/rss/scena'),
|
||||
(u'Здравје', u'http://www.plusinfo.mk/rss/zdravje'),
|
||||
(u'Магазин', u'http://www.plusinfo.mk/rss/magazin'),
|
||||
(u'Спорт', u'http://www.plusinfo.mk/rss/sport')]
|
||||
|
||||
# uncomment the following block if you want the print version (note: it lacks photos)
|
||||
# def print_version(self,url):
|
||||
# segments = url.split('/')
|
||||
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5:])
|
||||
# return printURL
|
36
recipes/portafolio.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
|
||||
title = u'Periódico Portafolio Colombia'
|
||||
language = 'es_CO'
|
||||
__author__ = 'BIGO-CAVA'
|
||||
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
|
||||
remove_tags_before = dict(id='contenidoArt')
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
|
||||
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
extra_css = """
|
||||
p{text-align: justify; font-size: 100%}
|
||||
body{ text-align: left; font-size:100% }
|
||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||
"""
|
||||
|
||||
|
||||
feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'),
|
||||
(u'Economia', u'http://www.portafolio.co/economia/feed'),
|
||||
(u'Internacional', u'http://www.portafolio.co/internacional/feed'),
|
||||
(u'Indicadores', u'http://www.portafolio.co/indicadores/feed'),
|
||||
(u'Opinion', u'http://www.portafolio.co/opinion/feed'),
|
||||
(u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'),
|
||||
(u'Herramientas', u'http://www.portafolio.co/herramientas/feed')]
|
63
recipes/techcrunch.recipe
Normal file
@ -0,0 +1,63 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
techcrunch.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TechCrunch(BasicNewsRecipe):
|
||||
title = 'TechCrunch'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'IT News'
|
||||
publisher = 'AOL Inc.'
|
||||
category = 'news, IT'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/site-logo.png'
|
||||
extra_css = """
|
||||
body{font-family: Helvetica,Arial,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['meta','link'])]
|
||||
remove_attributes=['lang']
|
||||
keep_only_tags=[
|
||||
dict(name='h1', attrs={'class':'headline'})
|
||||
,dict(attrs={'class':['author','post-time','body-copy']})
|
||||
]
|
||||
|
||||
feeds = [(u'News', u'http://feeds.feedburner.com/TechCrunch/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
24
recipes/tijolaco.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Tijolaco(BasicNewsRecipe):
|
||||
title = u'Tijolaco.com'
|
||||
__author__ = u'Diniz Bortolotto'
|
||||
description = u'Posts do Blog Tijola\xe7o.com'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
encoding = 'utf8'
|
||||
publisher = u'Brizola Neto'
|
||||
category = 'politics, Brazil'
|
||||
language = 'pt_BR'
|
||||
publication_type = 'politics portal'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'Blog Tijola\xe7o.com', u'http://feeds.feedburner.com/Tijolacoblog')]
|
||||
|
||||
reverse_article_order = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
|
||||
remove_tags = [dict(name='span', attrs={'class':'com'})]
|
@ -8,47 +8,33 @@ time.com
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from lxml import html
|
||||
|
||||
class Time(BasicNewsRecipe):
|
||||
#recipe_disabled = ('This recipe has been disabled as TIME no longer'
|
||||
# ' publish complete articles on the web.')
|
||||
title = u'Time'
|
||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly magazine'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = ''' h1 {font-family:georgia,serif;color:#000000;}
|
||||
.mainHd{font-family:georgia,serif;color:#000000;}
|
||||
h2 {font-family:Arial,Sans-serif;}
|
||||
.name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
|
||||
.date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
|
||||
.byline{font-family:Arial,Sans-serif; font-size:x-small ;}
|
||||
.photoBkt{ font-size:x-small ;}
|
||||
.vertPhoto{font-size:x-small ;}
|
||||
.credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
|
||||
.credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
|
||||
.artTxt{font-family:georgia,serif;}
|
||||
#content{font-family:georgia,serif;}
|
||||
.caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
|
||||
.credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
|
||||
a:link{color:#CC0000;}
|
||||
.breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
|
||||
'''
|
||||
|
||||
|
||||
keep_only_tags = [ dict(name ="div",attrs = {"id" :["content"]}) ,
|
||||
dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
|
||||
remove_tags = [ dict(name ="div",attrs = {'class':['articleFooterNav','listsByTopic','articleTools2','relatedContent','sideContent','topBannerWrap','articlePagination','nextUp',"rtCol","pagination","enlarge","contentTools2",]}),
|
||||
dict(name ="span",attrs = {'class':['see']}),
|
||||
dict(name ="div",attrs = {'id':['header','articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
|
||||
dict(name ="a",attrs = {'class':['listLink']}),
|
||||
dict(name ="ul",attrs = {'id':['shareSocial','tabs']}),
|
||||
dict(name ="li",attrs = {'class':['back']}),
|
||||
dict(name ="ul",attrs = {'class':['navCount']}),
|
||||
keep_only_tags = [
|
||||
{
|
||||
'class':['artHd', 'articleContent',
|
||||
'entry-title','entry-meta', 'entry-content', 'thumbnail']
|
||||
},
|
||||
]
|
||||
remove_tags = [
|
||||
{'class':['content-tools', 'quigo', 'see',
|
||||
'first-tier-social-tools', 'navigation', 'enlarge lightbox']},
|
||||
{'id':['share-tools']},
|
||||
{'rel':'lightbox'},
|
||||
]
|
||||
|
||||
recursions = 10
|
||||
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*']
|
||||
|
||||
@ -56,10 +42,11 @@ class Time(BasicNewsRecipe):
|
||||
r'<meta .+/>'), lambda m:'')]
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.time.com/time/magazine')
|
||||
img = soup.find('a', title="View Large Cover", href=True)
|
||||
if img is not None:
|
||||
cover_url = 'http://www.time.com'+img['href']
|
||||
raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True)
|
||||
root = html.fromstring(raw)
|
||||
img = root.xpath('//a[.="View Large Cover" and @href]')
|
||||
if img:
|
||||
cover_url = 'http://www.time.com' + img[0].get('href')
|
||||
try:
|
||||
nsoup = self.index_to_soup(cover_url)
|
||||
img = nsoup.find('img', src=re.compile('archive/covers'))
|
||||
@ -70,46 +57,48 @@ class Time(BasicNewsRecipe):
|
||||
|
||||
|
||||
feeds = []
|
||||
parent = soup.find(id='tocGuts')
|
||||
for seched in parent.findAll(attrs={'class':'toc_seched'}):
|
||||
section = self.tag_to_string(seched).capitalize()
|
||||
articles = list(self.find_articles(seched))
|
||||
parent = root.xpath('//div[@class="content-main-aside"]')[0]
|
||||
for sec in parent.xpath(
|
||||
'descendant::section[contains(@class, "sec-mag-section")]'):
|
||||
h3 = sec.xpath('./h3')
|
||||
if h3:
|
||||
section = html.tostring(h3[0], encoding=unicode,
|
||||
method='text').strip().capitalize()
|
||||
self.log('Found section', section)
|
||||
articles = list(self.find_articles(sec))
|
||||
if articles:
|
||||
feeds.append((section, articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def find_articles(self, seched):
|
||||
for a in seched.findNextSiblings( attrs={'class':['toc_hed','rule2']}):
|
||||
if a.name in "div":
|
||||
break
|
||||
else:
|
||||
def find_articles(self, sec):
|
||||
|
||||
for article in sec.xpath('./article'):
|
||||
h2 = article.xpath('./*[@class="entry-title"]')
|
||||
if not h2: continue
|
||||
a = h2[0].xpath('./a[@href]')
|
||||
if not a: continue
|
||||
title = html.tostring(a[0], encoding=unicode,
|
||||
method='text').strip()
|
||||
if not title: continue
|
||||
url = a[0].get('href')
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.time.com'+url
|
||||
desc = ''
|
||||
p = article.xpath('./*[@class="entry-content"]')
|
||||
if p:
|
||||
desc = html.tostring(p[0], encoding=unicode,
|
||||
method='text')
|
||||
self.log('\t', title, ':\n\t\t', desc)
|
||||
yield {
|
||||
'title' : self.tag_to_string(a),
|
||||
'url' : 'http://www.time.com'+a['href'],
|
||||
'title' : title,
|
||||
'url' : url,
|
||||
'date' : '',
|
||||
'description' : self.article_description(a)
|
||||
'description' : desc
|
||||
}
|
||||
|
||||
|
||||
|
||||
def article_description(self, a):
|
||||
ans = []
|
||||
while True:
|
||||
t = a.nextSibling
|
||||
if t is None:
|
||||
break
|
||||
a = t
|
||||
if getattr(t, 'name', False):
|
||||
if t.get('class', '') == 'toc_parens' or t.name == 'br':
|
||||
continue
|
||||
if t.name in ('div', 'a'):
|
||||
break
|
||||
ans.append(self.tag_to_string(t))
|
||||
else:
|
||||
ans.append(unicode(t))
|
||||
return u' '.join(ans).replace(u'\xa0', u'').strip()
|
||||
|
||||
def postprocess_html(self,soup,first):
|
||||
for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
|
||||
tag.extract()
|
||||
return soup
|
||||
|
||||
|
@ -64,7 +64,7 @@ class UnitedDaily(BasicNewsRecipe):
|
||||
|
||||
__author__ = 'Eddie Lau'
|
||||
__version__ = '1.1'
|
||||
language = 'zh-TW'
|
||||
language = 'zh_TW'
|
||||
publisher = 'United Daily News Group'
|
||||
description = 'United Daily (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
|
71
recipes/utrinski.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
|
||||
'''
|
||||
utrinski.com.mk
|
||||
'''
|
||||
|
||||
import re
|
||||
import datetime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class UtrinskiVesnik(BasicNewsRecipe):
|
||||
|
||||
__author__ = 'Darko Spasovski'
|
||||
INDEX = 'http://www.utrinski.com.mk/'
|
||||
title = 'Utrinski Vesnik'
|
||||
description = 'Daily Macedonian newspaper'
|
||||
masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
|
||||
language = 'mk'
|
||||
remove_javascript = True
|
||||
publication_type = 'newspaper'
|
||||
category = 'news, Macedonia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
## Remove anything before the start of the article.
|
||||
(r'<body.*?Article start-->', lambda match: '<body>'),
|
||||
|
||||
## Remove anything after the end of the article.
|
||||
(r'<!--Article end.*?</body>', lambda match : '</body>'),
|
||||
]
|
||||
]
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
.WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description,
|
||||
'tags' : category,
|
||||
'language' : language,
|
||||
'linearize_tables' : True
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
feeds = []
|
||||
for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_TOCTitleBig'}):
|
||||
sectionTitle = section.contents[0].string
|
||||
tocItemTable = section.findAllPrevious('table')[1]
|
||||
if tocItemTable is None: continue
|
||||
articles = []
|
||||
while True:
|
||||
tocItemTable = tocItemTable.nextSibling
|
||||
if tocItemTable is None: break
|
||||
article = tocItemTable.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_TocItem'})
|
||||
if len(article)==0: break
|
||||
title = self.tag_to_string(article[0], use_alt=True).strip()
|
||||
articles.append({'title': title, 'url':'http://www.utrinski.com.mk/' + article[0]['href'], 'description':'', 'date':''})
|
||||
if articles:
|
||||
feeds.append((sectionTitle, articles))
|
||||
return feeds
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
datum = datetime.datetime.today().strftime('%d_%m_%Y')
|
||||
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'
|
30
recipes/vio_mundo.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class VioMundo(BasicNewsRecipe):
|
||||
title = 'Blog VioMundo'
|
||||
__author__ = 'Diniz Bortolotto'
|
||||
description = 'Posts do Blog VioMundo'
|
||||
publisher = 'Luiz Carlos Azenha'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 20
|
||||
category = 'news, politics, Brazil'
|
||||
language = 'pt_BR'
|
||||
publication_type = 'news and politics portal'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'Blog VioMundo', u'http://www.viomundo.com.br/feed')]
|
||||
|
||||
reverse_article_order = True
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '/print/'
|
||||
|
||||
remove_tags_after = dict(id='BlogContent')
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\|\ <u>.*</p>'),
|
||||
lambda match: '</p>')
|
||||
]
|
@ -1,28 +1,29 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2011, Starson17 <Starson17 at gmail.com>'
|
||||
'''
|
||||
www.wired.co.uk
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Wired_UK(BasicNewsRecipe):
|
||||
title = 'Wired Magazine - UK edition'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = 'v1.30'
|
||||
__date__ = '15 July 2011'
|
||||
description = 'Gaming news'
|
||||
publisher = 'Conde Nast Digital'
|
||||
category = 'news, games, IT, gadgets'
|
||||
oldest_article = 32
|
||||
oldest_article = 40
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
|
||||
#masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
|
||||
language = 'en_GB'
|
||||
extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
|
||||
index = 'http://www.wired.co.uk/wired-magazine.aspx'
|
||||
index = 'http://www.wired.co.uk'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -31,26 +32,25 @@ class Wired_UK(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['layoutColumn1']})]
|
||||
remove_tags = [dict(name='div',attrs={'class':['articleSidebar1','commentAddBox linkit','commentCountBox commentCountBoxBig']})]
|
||||
remove_tags_after = dict(name='div',attrs={'class':['mainCopy entry-content','mainCopy']})
|
||||
'''
|
||||
remove_attributes = ['height','width']
|
||||
,dict(name=['object','embed','iframe','link'])
|
||||
,dict(attrs={'class':['opts','comment','stories']})
|
||||
]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'stories'})
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
'''
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
maincontent = soup.find('div',attrs={'class':'main-content'})
|
||||
recentcontent = soup.find('ul',attrs={'class':'linkList3'})
|
||||
mfeed = []
|
||||
if maincontent:
|
||||
st = maincontent.find(attrs={'class':'most-wired-box'})
|
||||
if st:
|
||||
for itt in st.findAll('a',href=True):
|
||||
url = 'http://www.wired.co.uk' + itt['href']
|
||||
title = self.tag_to_string(itt)
|
||||
if recentcontent:
|
||||
for li in recentcontent.findAll('li'):
|
||||
a = li.h2.a
|
||||
url = self.index + a['href'] + '?page=all'
|
||||
title = self.tag_to_string(a)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed.append({
|
||||
@ -59,16 +59,91 @@ class Wired_UK(BasicNewsRecipe):
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Articles', mfeed))
|
||||
totalfeeds.append(('Wired UK Magazine Latest News', mfeed))
|
||||
popmagcontent = soup.findAll('div',attrs={'class':'sidebarLinkList'})
|
||||
magcontent = popmagcontent[1]
|
||||
mfeed2 = []
|
||||
if magcontent:
|
||||
a = magcontent.h3.a
|
||||
if a:
|
||||
url = self.index + a['href'] + '?page=all'
|
||||
title = self.tag_to_string(a)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed2.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
for li in magcontent.findAll('li'):
|
||||
a = li.a
|
||||
url = self.index + a['href'] + '?page=all'
|
||||
title = self.tag_to_string(a)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed2.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Wired UK Magazine Features', mfeed2))
|
||||
|
||||
magsoup = self.index_to_soup(self.index + '/magazine')
|
||||
startcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titleStart'}).parent
|
||||
mfeed3 = []
|
||||
if startcontent:
|
||||
for li in startcontent.findAll('li'):
|
||||
a = li.a
|
||||
url = self.index + a['href'] + '?page=all'
|
||||
title = self.tag_to_string(a)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed3.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Wired UK Magazine More', mfeed3))
|
||||
|
||||
playcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titlePlay'}).parent
|
||||
mfeed4 = []
|
||||
if playcontent:
|
||||
for li in playcontent.findAll('li'):
|
||||
a = li.a
|
||||
url = self.index + a['href'] + '?page=all'
|
||||
title = self.tag_to_string(a)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed4.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Wired UK Magazine Play', mfeed4))
|
||||
return totalfeeds
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.index)
|
||||
cover_item = soup.find('span', attrs={'class':'cover'})
|
||||
cover_url = ''
|
||||
soup = self.index_to_soup(self.index + '/magazine/archive')
|
||||
cover_item = soup.find('div', attrs={'class':'image linkme'})
|
||||
if cover_item:
|
||||
cover_url = cover_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?page=all'
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name='p'):
|
||||
if tag.find(name='span', text=re.compile(r'This article was taken from.*', re.DOTALL|re.IGNORECASE)):
|
||||
tag.extract()
|
||||
return soup
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
@ -15,6 +15,7 @@ class ZeitDe(BasicNewsRecipe):
|
||||
encoding = 'UTF-8'
|
||||
|
||||
__author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
|
||||
no_stylesheets = True
|
||||
|
||||
max_articles_per_feed = 40
|
||||
|
||||
|
@ -2,18 +2,21 @@
|
||||
# -*- coding: utf-8 mode: python -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Steffen Siebert <calibre at steffensiebert.de>'
|
||||
__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
|
||||
__docformat__ = 'restructuredtext de'
|
||||
__version__ = '1.2'
|
||||
__version__ = '1.5'
|
||||
|
||||
"""
|
||||
Die Zeit EPUB
|
||||
"""
|
||||
|
||||
import os, urllib2, zipfile, re
|
||||
import os, zipfile, re, cStringIO
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import walk
|
||||
from urlparse import urlparse
|
||||
from contextlib import closing
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
|
||||
class ZeitEPUBAbo(BasicNewsRecipe):
|
||||
|
||||
@ -22,49 +25,112 @@ class ZeitEPUBAbo(BasicNewsRecipe):
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
|
||||
__author__ = 'Steffen Siebert and Tobias Isenberg'
|
||||
__author__ = 'Steffen Siebert, revised by Tobias Isenberg (with some code by Kovid Goyal)'
|
||||
needs_subscription = True
|
||||
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True,
|
||||
# fixing the wrong left margin
|
||||
'mobi_ignore_margins' : True,
|
||||
'keep_ligatures' : True,
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
# filtering for correct dashes
|
||||
(re.compile(r' - '), lambda match: ' – '), # regular "Gedankenstrich"
|
||||
(re.compile(r' -,'), lambda match: ' –,'), # "Gedankenstrich" before a comma
|
||||
(re.compile(r'(?<=\d)-(?=\d)'), lambda match: '–'), # number-number
|
||||
# filtering for correct dashes ("Gedankenstrich" and "bis")
|
||||
(re.compile(u' (-|\u2212)(?=[ ,])'), lambda match: u' \u2013'),
|
||||
(re.compile(r'(?<=\d)-(?=\d)'), lambda match: u'\u2013'), # number-number
|
||||
(re.compile(u'(?<=\d,)-(?= ?\u20AC)'), lambda match: u'\u2013'), # ,- Euro
|
||||
# fix the number dash number dash for the title image that was broken by the previous line
|
||||
(re.compile(u'(?<=\d\d\d\d)\u2013(?=\d?\d\.png)'), lambda match: '-'),
|
||||
# filtering for certain dash cases
|
||||
(re.compile(r'Bild - Zeitung'), lambda match: 'Bild-Zeitung'), # the obvious
|
||||
(re.compile(r'EMail'), lambda match: 'E-Mail'), # the obvious
|
||||
(re.compile(r'SBahn'), lambda match: 'S-Bahn'), # the obvious
|
||||
(re.compile(r'UBoot'), lambda match: 'U-Boot'), # the obvious
|
||||
(re.compile(r'T Shirt'), lambda match: 'T-Shirt'), # the obvious
|
||||
(re.compile(r'TShirt'), lambda match: 'T-Shirt'), # the obvious
|
||||
# the next two lines not only fix errors but also create new ones. this is due to additional errors in
|
||||
# the typesetting such as missing commas or wrongly placed dashes. but more is fixed than broken.
|
||||
(re.compile(r'(?<!und|der|\w\w,) -(?=\w)'), lambda match: '-'), # space too much before a connecting dash
|
||||
(re.compile(r'(?<=\w)- (?!und\b|oder\b|wie\b|aber\b|auch\b|sondern\b|bis\b|&|&\s|bzw\.|auf\b|eher\b)'), lambda match: '-'), # space too much after a connecting dash
|
||||
# filtering for missing spaces before the month in long dates
|
||||
(re.compile(u'(?<=\d)\.(?=(Januar|Februar|M\u00E4rz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember))'), lambda match: '. '),
|
||||
# filtering for other missing spaces
|
||||
(re.compile(r'Stuttgart21'), lambda match: 'Stuttgart 21'), # the obvious
|
||||
(re.compile(u'(?<=\d)(?=\u20AC)'), lambda match: u'\u2013'), # Zahl[no space]Euro
|
||||
(re.compile(r':(?=[^\d\s</])'), lambda match: ': '), # missing space after colon
|
||||
(re.compile(u'\u00AB(?=[^\-\.:;,\?!<\)\s])'), lambda match: u'\u00AB '), # missing space after closing quotation
|
||||
(re.compile(u'(?<=[^\s\(>])\u00BB'), lambda match: u' \u00BB'), # missing space before opening quotation
|
||||
(re.compile(r'(?<=[a-z])(?=(I|II|III|IV|V|VI|VII|VIII|IX|X|XI|XII|XIII|XIV|XV|XVI|XVII|XVIII|XIX|XX)\.)'), lambda match: ' '), # missing space before Roman numeral
|
||||
(re.compile(r'(?<=(I|V|X)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
|
||||
(re.compile(r'(?<=(II|IV|VI|IX|XI|XV|XX)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
|
||||
(re.compile(r'(?<=(III|VII|XII|XIV|XVI|XIX)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
|
||||
(re.compile(r'(?<=(VIII|XIII|XVII)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
|
||||
(re.compile(r'(?<=(XVIII)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
|
||||
(re.compile(r'(?<=[A-Za-zÄÖÜäöü]),(?=[A-Za-zÄÖÜäöü])'), lambda match: ', '), # missing space after comma
|
||||
(re.compile(r'(?<=[a-zäöü])\.(?=[A-ZÄÖÜ][A-Za-zÄÖÜäöü])'), lambda match: '. '), # missing space after full-stop
|
||||
(re.compile(r'(?<=[uU]\.) (?=a\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously
|
||||
(re.compile(r'(?<=[iI]\.) (?=A\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously
|
||||
(re.compile(r'(?<=[zZ]\.) (?=B\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously
|
||||
(re.compile(r'(?<=\w\.) (?=[A-Z][a-z]*@)'), lambda match: ''), # fix e-mail address that was potentially broken previously
|
||||
(re.compile(r'(?<=\d)[Pp]rozent'), lambda match: ' Prozent'),
|
||||
(re.compile(r'\.\.\.\.+'), lambda match: '...'), # too many dots (....)
|
||||
(re.compile(r'(?<=[^\s])\.\.\.'), lambda match: ' ...'), # spaces before ...
|
||||
(re.compile(r'\.\.\.(?=[^\s])'), lambda match: '... '), # spaces after ...
|
||||
(re.compile(r'(?<=[\[\(]) \.\.\. (?=[\]\)])'), lambda match: '...'), # fix special cases of ... in brackets
|
||||
(re.compile(u'(?<=[\u00BB\u203A]) \.\.\.'), lambda match: '...'), # fix special cases of ... after a quotation mark
|
||||
(re.compile(u'\.\.\. (?=[\u00AB\u2039,])'), lambda match: '...'), # fix special cases of ... before a quotation mark or comma
|
||||
# fix missing spaces between numbers and any sort of units, possibly with dot
|
||||
(re.compile(r'(?<=\d)(?=(Femto|Piko|Nano|Mikro|Milli|Zenti|Dezi|Hekto|Kilo|Mega|Giga|Tera|Peta|Tausend|Trilli|Kubik|Quadrat|Meter|Uhr|Jahr|Schuljahr|Seite))'), lambda match: ' '),
|
||||
(re.compile(r'(?<=\d\.)(?=(Femto|Piko|Nano|Mikro|Milli|Zenti|Dezi|Hekto|Kilo|Mega|Giga|Tera|Peta|Tausend|Trilli|Kubik|Quadrat|Meter|Uhr|Jahr|Schuljahr|Seite))'), lambda match: ' '),
|
||||
# fix wrong spaces
|
||||
(re.compile(r'(?<=<p class="absatz">[A-ZÄÖÜ]) (?=[a-zäöü\-])'), lambda match: ''), # at beginning of paragraphs
|
||||
(re.compile(u' \u00AB'), lambda match: u'\u00AB '), # before closing quotation
|
||||
(re.compile(u'\u00BB '), lambda match: u' \u00BB'), # after opening quotation
|
||||
# filtering for spaces in large numbers for better readability
|
||||
(re.compile(r'(?<=\d\d)(?=\d\d\d[ ,\.;\)<\?!-])'), lambda match: u'\u2008'), # end of the number with some character following
|
||||
(re.compile(r'(?<=\d\d)(?=\d\d\d. )'), lambda match: u'\u2008'), # end of the number with full-stop following, then space is necessary (avoid file names)
|
||||
(re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
|
||||
(re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
|
||||
(re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
|
||||
(re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
|
||||
# filtering for unicode characters that are missing on the Kindle,
|
||||
# try to replace them with meaningful work-arounds
|
||||
(re.compile(u'\u2080'), lambda match: '<span style="font-size: 50%;">0</span>'), # subscript-0
|
||||
(re.compile(u'\u2081'), lambda match: '<span style="font-size: 50%;">1</span>'), # subscript-1
|
||||
(re.compile(u'\u2082'), lambda match: '<span style="font-size: 50%;">2</span>'), # subscript-2
|
||||
(re.compile(u'\u2083'), lambda match: '<span style="font-size: 50%;">3</span>'), # subscript-3
|
||||
(re.compile(u'\u2084'), lambda match: '<span style="font-size: 50%;">4</span>'), # subscript-4
|
||||
(re.compile(u'\u2085'), lambda match: '<span style="font-size: 50%;">5</span>'), # subscript-5
|
||||
(re.compile(u'\u2086'), lambda match: '<span style="font-size: 50%;">6</span>'), # subscript-6
|
||||
(re.compile(u'\u2087'), lambda match: '<span style="font-size: 50%;">7</span>'), # subscript-7
|
||||
(re.compile(u'\u2088'), lambda match: '<span style="font-size: 50%;">8</span>'), # subscript-8
|
||||
(re.compile(u'\u2089'), lambda match: '<span style="font-size: 50%;">9</span>'), # subscript-9
|
||||
(re.compile(u'\u2080'), lambda match: '<span style="font-size: 40%;">0</span>'), # subscript-0
|
||||
(re.compile(u'\u2081'), lambda match: '<span style="font-size: 40%;">1</span>'), # subscript-1
|
||||
(re.compile(u'\u2082'), lambda match: '<span style="font-size: 40%;">2</span>'), # subscript-2
|
||||
(re.compile(u'\u2083'), lambda match: '<span style="font-size: 40%;">3</span>'), # subscript-3
|
||||
(re.compile(u'\u2084'), lambda match: '<span style="font-size: 40%;">4</span>'), # subscript-4
|
||||
(re.compile(u'\u2085'), lambda match: '<span style="font-size: 40%;">5</span>'), # subscript-5
|
||||
(re.compile(u'\u2086'), lambda match: '<span style="font-size: 40%;">6</span>'), # subscript-6
|
||||
(re.compile(u'\u2087'), lambda match: '<span style="font-size: 40%;">7</span>'), # subscript-7
|
||||
(re.compile(u'\u2088'), lambda match: '<span style="font-size: 40%;">8</span>'), # subscript-8
|
||||
(re.compile(u'\u2089'), lambda match: '<span style="font-size: 40%;">9</span>'), # subscript-9
|
||||
# always chance CO2
|
||||
(re.compile(r'CO2'), lambda match: 'CO<span style="font-size: 40%;">2</span>'), # CO2
|
||||
# remove *** paragraphs
|
||||
(re.compile(r'<p class="absatz">\*\*\*</p>'), lambda match: ''),
|
||||
# better layout for the top line of each article
|
||||
(re.compile(u'(?<=DIE ZEIT N\u00B0 \d /) (?=\d\d)'), lambda match: ' 20'), # proper year in edition number
|
||||
(re.compile(u'(?<=DIE ZEIT N\u00B0 \d\d /) (?=\d\d)'), lambda match: ' 20'), # proper year in edition number
|
||||
(re.compile(u'(?<=>)(?=DIE ZEIT N\u00B0 \d\d / 20\d\d)'), lambda match: u' \u2014 '), # m-dash between category and DIE ZEIT
|
||||
]
|
||||
|
||||
def build_index(self):
|
||||
domain = "http://premium.zeit.de"
|
||||
url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
|
||||
|
||||
domain = "https://premium.zeit.de"
|
||||
url = domain + "/abo/zeit_digital"
|
||||
browser = self.get_browser()
|
||||
browser.add_password("http://premium.zeit.de", self.username, self.password)
|
||||
|
||||
try:
|
||||
browser.open(url)
|
||||
except urllib2.HTTPError:
|
||||
self.report_progress(0,_("Can't login to download issue"))
|
||||
raise ValueError('Failed to login, check your username and password')
|
||||
|
||||
response = browser.follow_link(text="DIE ZEIT als E-Paper")
|
||||
response = browser.follow_link(url_regex=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))
|
||||
# new login process
|
||||
response = browser.open(url)
|
||||
browser.select_form(nr=2)
|
||||
browser.form['name']=self.username
|
||||
browser.form['pass']=self.password
|
||||
browser.submit()
|
||||
# now find the correct file, we will still use the ePub file
|
||||
epublink = browser.find_link(text_regex=re.compile('.*Ausgabe als Datei im ePub-Format.*'))
|
||||
response = browser.follow_link(epublink)
|
||||
self.report_progress(1,_('next step'))
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
@ -104,9 +170,45 @@ class ZeitEPUBAbo(BasicNewsRecipe):
|
||||
|
||||
# getting url of the cover
|
||||
def get_cover_url(self):
|
||||
self.log.warning('Downloading cover')
|
||||
try:
|
||||
self.log.warning('Trying PDF-based cover')
|
||||
domain = "https://premium.zeit.de"
|
||||
url = domain + "/abo/zeit_digital"
|
||||
browser = self.get_browser()
|
||||
|
||||
# new login process
|
||||
browser.open(url)
|
||||
browser.select_form(nr=2)
|
||||
browser.form['name']=self.username
|
||||
browser.form['pass']=self.password
|
||||
browser.submit()
|
||||
# actual cover search
|
||||
pdflink = browser.find_link(url_regex=re.compile('system/files/epaper/DZ/pdf/DZ_ePaper*'))
|
||||
cover_url = urlparse(pdflink.base_url)[0]+'://'+urlparse(pdflink.base_url)[1]+''+(urlparse(pdflink.url)[2]).replace('ePaper_','').replace('.pdf','_001.pdf')
|
||||
self.log.warning('PDF link found:')
|
||||
self.log.warning(cover_url)
|
||||
# download the cover (has to be here due to new login process)
|
||||
with closing(browser.open(cover_url)) as r:
|
||||
cdata = r.read()
|
||||
from calibre.ebooks.metadata.pdf import get_metadata
|
||||
stream = cStringIO.StringIO(cdata)
|
||||
cdata = None
|
||||
mi = get_metadata(stream)
|
||||
if mi.cover_data and mi.cover_data[1]:
|
||||
cdata = mi.cover_data[1]
|
||||
|
||||
cpath = os.path.join(self.output_dir, 'cover.jpg')
|
||||
save_cover_data_to(cdata, cpath)
|
||||
cover_url = cpath
|
||||
|
||||
except:
|
||||
self.log.warning('Trying low-res cover')
|
||||
try:
|
||||
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
|
||||
cover_url = inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
|
||||
except:
|
||||
self.log.warning('Using static old low-res cover')
|
||||
cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
|
||||
return cover_url
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/browse/browse.css" />
|
||||
<link type="text/css" href="{prefix}/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/jquery.multiselect.css" />
|
||||
<link rel="apple-touch-icon" href="/static/calibre.png" />
|
||||
|
||||
<script type="text/javascript" src="{prefix}/static/jquery.js"></script>
|
||||
<script type="text/javascript" src="{prefix}/static/jquery.corner.js"></script>
|
||||
|
@ -11,7 +11,7 @@ defaults.
|
||||
'''
|
||||
|
||||
#: Auto increment series index
|
||||
# The algorithm used to assign a new book in an existing series a series number.
|
||||
# The algorithm used to assign a book added to an existing series a series number.
|
||||
# New series numbers assigned using this tweak are always integer values, except
|
||||
# if a constant non-integer is specified.
|
||||
# Possible values are:
|
||||
@ -27,7 +27,19 @@ defaults.
|
||||
# series_index_auto_increment = 'next'
|
||||
# series_index_auto_increment = 'next_free'
|
||||
# series_index_auto_increment = 16.5
|
||||
#
|
||||
# Set the use_series_auto_increment_tweak_when_importing tweak to True to
|
||||
# use the above values when importing/adding books. If this tweak is set to
|
||||
# False (the default) then the series number will be set to 1 if it is not
|
||||
# explicitly set to during the import. If set to True, then the
|
||||
# series index will be set according to the series_index_auto_increment setting.
|
||||
# Note that the use_series_auto_increment_tweak_when_importing tweak is used
|
||||
# only when a value is not provided during import. If the importing regular
|
||||
# expression produces a value for series_index, or if you are reading metadata
|
||||
# from books and the import plugin produces a value, than that value will
|
||||
# be used irrespective of the setting of the tweak.
|
||||
series_index_auto_increment = 'next'
|
||||
use_series_auto_increment_tweak_when_importing = False
|
||||
|
||||
#: Add separator after completing an author name
|
||||
# Should the completion separator be append
|
||||
@ -366,3 +378,10 @@ server_listen_on = '0.0.0.0'
|
||||
# on at your own risk!
|
||||
unified_title_toolbar_on_osx = False
|
||||
|
||||
#: Save original file when converting from same format to same format
|
||||
# When calibre does a conversion from the same format to the same format, for
|
||||
# example, from EPUB to EPUB, the original file is saved, so that in case the
|
||||
# conversion is poor, you can tweak the settings and run it again. By setting
|
||||
# this to False you can prevent calibre from saving the original file.
|
||||
save_original_format = True
|
||||
|
||||
|
Before Width: | Height: | Size: 5.7 KiB After Width: | Height: | Size: 4.3 KiB |
BIN
resources/images/random.png
Normal file
After Width: | Height: | Size: 4.3 KiB |
@ -379,7 +379,8 @@
|
||||
<!-- image -->
|
||||
<xsl:template match="fb:image">
|
||||
<div align="center">
|
||||
<img border="1">
|
||||
<xsl:element name="img">
|
||||
<xsl:attribute name="border">1</xsl:attribute>
|
||||
<xsl:choose>
|
||||
<xsl:when test="starts-with(@xlink:href,'#')">
|
||||
<xsl:attribute name="src"><xsl:value-of select="substring-after(@xlink:href,'#')"/></xsl:attribute>
|
||||
@ -388,7 +389,10 @@
|
||||
<xsl:attribute name="src"><xsl:value-of select="@xlink:href"/></xsl:attribute>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</img>
|
||||
<xsl:if test="@title">
|
||||
<xsl:attribute name="title"><xsl:value-of select="@title"/></xsl:attribute>
|
||||
</xsl:if>
|
||||
</xsl:element>
|
||||
</div>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
|
@ -1,5 +1,5 @@
|
||||
" Project wide builtins
|
||||
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
|
||||
let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
|
||||
|
||||
python << EOFPY
|
||||
import os
|
||||
@ -15,7 +15,7 @@ vipy.session.initialize(project_name='calibre', src_dir=src_dir,
|
||||
project_dir=project_dir, base_dir=base_dir)
|
||||
|
||||
def recipe_title_callback(raw):
|
||||
return eval(raw.decode('utf-8'))
|
||||
return eval(raw.decode('utf-8')).replace(' ', '_')
|
||||
|
||||
vipy.session.add_content_browser('.r', ',r', 'Recipe',
|
||||
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
|
||||
|
@ -25,18 +25,11 @@ class Message:
|
||||
return '%s:%s: %s'%(self.filename, self.lineno, self.msg)
|
||||
|
||||
def check_for_python_errors(code_string, filename):
|
||||
# Since compiler.parse does not reliably report syntax errors, use the
|
||||
# built in compiler first to detect those.
|
||||
import _ast
|
||||
# First, compile into an AST and handle syntax errors.
|
||||
try:
|
||||
try:
|
||||
compile(code_string, filename, "exec")
|
||||
except MemoryError:
|
||||
# Python 2.4 will raise MemoryError if the source can't be
|
||||
# decoded.
|
||||
if sys.version_info[:2] == (2, 4):
|
||||
raise SyntaxError(None)
|
||||
raise
|
||||
except (SyntaxError, IndentationError), value:
|
||||
tree = compile(code_string, filename, "exec", _ast.PyCF_ONLY_AST)
|
||||
except (SyntaxError, IndentationError) as value:
|
||||
msg = value.args[0]
|
||||
|
||||
(lineno, offset, text) = value.lineno, value.offset, value.text
|
||||
@ -47,13 +40,11 @@ def check_for_python_errors(code_string, filename):
|
||||
# bogus message that claims the encoding the file declared was
|
||||
# unknown.
|
||||
msg = "%s: problem decoding source" % filename
|
||||
|
||||
return [Message(filename, lineno, msg)]
|
||||
else:
|
||||
# Okay, it's syntactically valid. Now parse it into an ast and check
|
||||
# it.
|
||||
import compiler
|
||||
checker = __import__('pyflakes.checker').checker
|
||||
tree = compiler.parse(code_string)
|
||||
# Okay, it's syntactically valid. Now check it.
|
||||
w = checker.Checker(tree, filename)
|
||||
w.messages.sort(lambda a, b: cmp(a.lineno, b.lineno))
|
||||
return [Message(x.filename, x.lineno, x.message%x.message_args) for x in
|
||||
|
@ -373,7 +373,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
src = self.j(self.src_root, 'setup', 'installer', 'windows',
|
||||
'portable.c')
|
||||
obj = self.j(self.obj_dir, self.b(src)+'.obj')
|
||||
cflags = '/c /EHsc /MT /W3 /Ox /nologo /D_UNICODE'.split()
|
||||
cflags = '/c /EHsc /MT /W3 /Ox /nologo /D_UNICODE /DUNICODE'.split()
|
||||
|
||||
if self.newer(obj, [src]):
|
||||
self.info('Compiling', obj)
|
||||
@ -386,6 +386,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
|
||||
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
|
||||
'/RELEASE',
|
||||
'/ENTRY:wWinMainCRTStartup',
|
||||
'/OUT:'+exe, self.embed_resources(exe),
|
||||
obj, 'User32.lib']
|
||||
self.run_builder(cmd)
|
||||
|
@ -2,15 +2,21 @@
|
||||
#define UNICODE
|
||||
#endif
|
||||
|
||||
#ifndef _UNICODE
|
||||
#define _UNICODE
|
||||
#endif
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
#include <tchar.h>
|
||||
#include <wchar.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define BUFSIZE 4096
|
||||
|
||||
void show_error(LPCTSTR msg) {
|
||||
MessageBeep(MB_ICONERROR);
|
||||
MessageBox(NULL, msg, TEXT("Error"), MB_OK|MB_ICONERROR);
|
||||
MessageBox(NULL, msg, _T("Error"), MB_OK|MB_ICONERROR);
|
||||
}
|
||||
|
||||
void show_detailed_error(LPCTSTR preamble, LPCTSTR msg, int code) {
|
||||
@ -20,7 +26,7 @@ void show_detailed_error(LPCTSTR preamble, LPCTSTR msg, int code) {
|
||||
|
||||
_sntprintf_s(buf,
|
||||
LocalSize(buf) / sizeof(TCHAR), _TRUNCATE,
|
||||
TEXT("%s\r\n %s (Error Code: %d)\r\n"),
|
||||
_T("%s\r\n %s (Error Code: %d)\r\n"),
|
||||
preamble, msg, code);
|
||||
|
||||
show_error(buf);
|
||||
@ -32,7 +38,7 @@ void show_last_error_crt(LPCTSTR preamble) {
|
||||
int err = 0;
|
||||
|
||||
_get_errno(&err);
|
||||
_wcserror_s(buf, BUFSIZE, err);
|
||||
_tcserror_s(buf, BUFSIZE, err);
|
||||
show_detailed_error(preamble, buf, err);
|
||||
}
|
||||
|
||||
@ -57,7 +63,7 @@ void show_last_error(LPCTSTR preamble) {
|
||||
LPTSTR get_app_dir() {
|
||||
LPTSTR buf, buf2, buf3;
|
||||
DWORD sz;
|
||||
TCHAR drive[4] = TEXT("\0\0\0");
|
||||
TCHAR drive[4] = _T("\0\0\0");
|
||||
errno_t err;
|
||||
|
||||
buf = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
|
||||
@ -67,18 +73,18 @@ LPTSTR get_app_dir() {
|
||||
sz = GetModuleFileName(NULL, buf, BUFSIZE);
|
||||
|
||||
if (sz == 0 || sz > BUFSIZE-1) {
|
||||
show_error(TEXT("Failed to get path to calibre-portable.exe"));
|
||||
show_error(_T("Failed to get path to calibre-portable.exe"));
|
||||
ExitProcess(1);
|
||||
}
|
||||
|
||||
err = _tsplitpath_s(buf, drive, 4, buf2, BUFSIZE, NULL, 0, NULL, 0);
|
||||
|
||||
if (err != 0) {
|
||||
show_last_error_crt(TEXT("Failed to split path to calibre-portable.exe"));
|
||||
show_last_error_crt(_T("Failed to split path to calibre-portable.exe"));
|
||||
ExitProcess(1);
|
||||
}
|
||||
|
||||
_sntprintf_s(buf3, BUFSIZE-1, _TRUNCATE, TEXT("%s%s"), drive, buf2);
|
||||
_sntprintf_s(buf3, BUFSIZE-1, _TRUNCATE, _T("%s%s"), drive, buf2);
|
||||
free(buf); free(buf2);
|
||||
return buf3;
|
||||
}
|
||||
@ -90,18 +96,18 @@ void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {
|
||||
BOOL fSuccess;
|
||||
TCHAR cmdline[BUFSIZE];
|
||||
|
||||
if (! SetEnvironmentVariable(TEXT("CALIBRE_CONFIG_DIRECTORY"), config_dir)) {
|
||||
show_last_error(TEXT("Failed to set environment variables"));
|
||||
if (! SetEnvironmentVariable(_T("CALIBRE_CONFIG_DIRECTORY"), config_dir)) {
|
||||
show_last_error(_T("Failed to set environment variables"));
|
||||
ExitProcess(1);
|
||||
}
|
||||
|
||||
if (! SetEnvironmentVariable(TEXT("CALIBRE_PORTABLE_BUILD"), exe)) {
|
||||
show_last_error(TEXT("Failed to set environment variables"));
|
||||
if (! SetEnvironmentVariable(_T("CALIBRE_PORTABLE_BUILD"), exe)) {
|
||||
show_last_error(_T("Failed to set environment variables"));
|
||||
ExitProcess(1);
|
||||
}
|
||||
|
||||
dwFlags = CREATE_UNICODE_ENVIRONMENT | CREATE_NEW_PROCESS_GROUP;
|
||||
_sntprintf_s(cmdline, BUFSIZE, _TRUNCATE, TEXT(" \"--with-library=%s\""), library_dir);
|
||||
_sntprintf_s(cmdline, BUFSIZE, _TRUNCATE, _T(" \"--with-library=%s\""), library_dir);
|
||||
|
||||
ZeroMemory( &si, sizeof(si) );
|
||||
si.cb = sizeof(si);
|
||||
@ -119,7 +125,7 @@ void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {
|
||||
);
|
||||
|
||||
if (fSuccess == 0) {
|
||||
show_last_error(TEXT("Failed to launch the calibre program"));
|
||||
show_last_error(_T("Failed to launch the calibre program"));
|
||||
}
|
||||
|
||||
// Close process and thread handles.
|
||||
@ -137,9 +143,9 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine
|
||||
library_dir = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
|
||||
exe = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
|
||||
|
||||
_sntprintf_s(config_dir, BUFSIZE, _TRUNCATE, TEXT("%sCalibre Settings"), app_dir);
|
||||
_sntprintf_s(exe, BUFSIZE, _TRUNCATE, TEXT("%sCalibre\\calibre.exe"), app_dir);
|
||||
_sntprintf_s(library_dir, BUFSIZE, _TRUNCATE, TEXT("%sCalibre Library"), app_dir);
|
||||
_sntprintf_s(config_dir, BUFSIZE, _TRUNCATE, _T("%sCalibre Settings"), app_dir);
|
||||
_sntprintf_s(exe, BUFSIZE, _TRUNCATE, _T("%sCalibre\\calibre.exe"), app_dir);
|
||||
_sntprintf_s(library_dir, BUFSIZE, _TRUNCATE, _T("%sCalibre Library"), app_dir);
|
||||
|
||||
launch_calibre(exe, config_dir, library_dir);
|
||||
|
||||
|
@ -205,8 +205,8 @@ class Resources(Command):
|
||||
dest = self.j(self.RESOURCES, 'template-functions.json')
|
||||
function_dict = {}
|
||||
import inspect
|
||||
from calibre.utils.formatter_functions import all_builtin_functions
|
||||
for obj in all_builtin_functions:
|
||||
from calibre.utils.formatter_functions import formatter_functions
|
||||
for obj in formatter_functions.get_builtins().values():
|
||||
eval_func = inspect.getmembers(obj,
|
||||
lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
|
||||
try:
|
||||
|
@ -8,11 +8,18 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, tempfile, shutil, subprocess, glob, re, time, textwrap
|
||||
from distutils import sysconfig
|
||||
from functools import partial
|
||||
|
||||
from setup import Command, __appname__, __version__
|
||||
from setup.build_environment import pyqt
|
||||
|
||||
class POT(Command):
|
||||
def qt_sources():
|
||||
qtdir = glob.glob('/usr/src/qt-*')[-1]
|
||||
j = partial(os.path.join, qtdir)
|
||||
return list(map(j, [
|
||||
'src/gui/widgets/qdialogbuttonbox.cpp',
|
||||
]))
|
||||
|
||||
class POT(Command): # {{{
|
||||
|
||||
description = 'Update the .pot translation template'
|
||||
PATH = os.path.join(Command.SRC, __appname__, 'translations')
|
||||
@ -82,6 +89,8 @@ class POT(Command):
|
||||
time=time.strftime('%Y-%m-%d %H:%M+%Z'))
|
||||
|
||||
files = self.source_files()
|
||||
qt_inputs = qt_sources()
|
||||
|
||||
with tempfile.NamedTemporaryFile() as fl:
|
||||
fl.write('\n'.join(files))
|
||||
fl.flush()
|
||||
@ -91,8 +100,14 @@ class POT(Command):
|
||||
subprocess.check_call(['xgettext', '-f', fl.name,
|
||||
'--default-domain=calibre', '-o', out.name, '-L', 'Python',
|
||||
'--from-code=UTF-8', '--sort-by-file', '--omit-header',
|
||||
'--no-wrap', '-k__',
|
||||
'--no-wrap', '-k__', '--add-comments=NOTE:',
|
||||
])
|
||||
subprocess.check_call(['xgettext', '-j',
|
||||
'--default-domain=calibre', '-o', out.name,
|
||||
'--from-code=UTF-8', '--sort-by-file', '--omit-header',
|
||||
'--no-wrap', '-kQT_TRANSLATE_NOOP:2',
|
||||
] + qt_inputs)
|
||||
|
||||
with open(out.name, 'rb') as f:
|
||||
src = f.read()
|
||||
os.remove(out.name)
|
||||
@ -102,10 +117,12 @@ class POT(Command):
|
||||
with open(pot, 'wb') as f:
|
||||
f.write(src)
|
||||
self.info('Translations template:', os.path.abspath(pot))
|
||||
|
||||
|
||||
return pot
|
||||
# }}}
|
||||
|
||||
|
||||
class Translations(POT):
|
||||
class Translations(POT): # {{{
|
||||
description='''Compile the translations'''
|
||||
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
|
||||
'locales')
|
||||
@ -117,7 +134,6 @@ class Translations(POT):
|
||||
locale = os.path.splitext(os.path.basename(po_file))[0]
|
||||
return locale, os.path.join(self.DEST, locale, 'messages.mo')
|
||||
|
||||
|
||||
def run(self, opts):
|
||||
for f in self.po_files():
|
||||
locale, dest = self.mo_file(f)
|
||||
@ -126,7 +142,7 @@ class Translations(POT):
|
||||
os.makedirs(base)
|
||||
self.info('\tCompiling translations for', locale)
|
||||
subprocess.check_call(['msgfmt', '-o', dest, f])
|
||||
if locale in ('en_GB', 'nds', 'te', 'yi'):
|
||||
if locale in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds', 'te', 'yi'):
|
||||
continue
|
||||
pycountry = self.j(sysconfig.get_python_lib(), 'pycountry',
|
||||
'locales', locale, 'LC_MESSAGES')
|
||||
@ -140,17 +156,6 @@ class Translations(POT):
|
||||
self.warn('No ISO 639 translations for locale:', locale,
|
||||
'\nDo you have pycountry installed?')
|
||||
|
||||
base = os.path.join(pyqt.qt_data_dir, 'translations')
|
||||
qt_translations = glob.glob(os.path.join(base, 'qt_*.qm'))
|
||||
if not qt_translations:
|
||||
raise Exception('Could not find qt translations')
|
||||
for f in qt_translations:
|
||||
locale = self.s(self.b(f))[0][3:]
|
||||
dest = self.j(self.DEST, locale, 'LC_MESSAGES', 'qt.qm')
|
||||
if self.e(self.d(dest)) and self.newer(dest, f):
|
||||
self.info('\tCopying Qt translation for locale:', locale)
|
||||
shutil.copy2(f, dest)
|
||||
|
||||
self.write_stats()
|
||||
self.freeze_locales()
|
||||
|
||||
@ -201,7 +206,7 @@ class Translations(POT):
|
||||
for x in (i, j, d):
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
|
||||
# }}}
|
||||
|
||||
class GetTranslations(Translations):
|
||||
|
||||
|
@ -341,7 +341,7 @@ def random_user_agent():
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
'''
|
||||
Create a mechanize browser for web scraping. The browser handles cookies,
|
||||
refresh requests and ignores robots.txt. Also uses proxy if avaialable.
|
||||
refresh requests and ignores robots.txt. Also uses proxy if available.
|
||||
|
||||
:param honor_time: If True honors pause time in refresh requests
|
||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||
@ -353,9 +353,14 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
if user_agent is None:
|
||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
||||
opener.addheaders = [('User-agent', user_agent)]
|
||||
http_proxy = get_proxies().get('http', None)
|
||||
proxies = get_proxies()
|
||||
http_proxy = proxies.get('http', None)
|
||||
if http_proxy:
|
||||
opener.set_proxies({'http':http_proxy})
|
||||
https_proxy = proxies.get('https', None)
|
||||
if https_proxy:
|
||||
opener.set_proxies({'https':https_proxy})
|
||||
|
||||
return opener
|
||||
|
||||
def fit_image(width, height, pwidth, pheight):
|
||||
@ -474,7 +479,7 @@ def strftime(fmt, t=None):
|
||||
def my_unichr(num):
|
||||
try:
|
||||
return unichr(num)
|
||||
except ValueError:
|
||||
except (ValueError, OverflowError):
|
||||
return u'?'
|
||||
|
||||
def entity_to_unicode(match, exceptions=[], encoding='cp1252',
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 9)
|
||||
numeric_version = (0, 8, 12)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -570,7 +570,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK)
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
@ -705,7 +705,7 @@ plugins += [
|
||||
EEEREADER,
|
||||
NEXTBOOK,
|
||||
ADAM,
|
||||
MOOVYBOOK,
|
||||
MOOVYBOOK, COBY,
|
||||
ITUNES,
|
||||
BOEYE_BEX,
|
||||
BOEYE_BDX,
|
||||
@ -843,6 +843,12 @@ class ActionNextMatch(InterfaceActionBase):
|
||||
description = _('Find the next or previous match when searching in '
|
||||
'your calibre library in highlight mode')
|
||||
|
||||
class ActionPickRandom(InterfaceActionBase):
|
||||
name = 'Pick Random Book'
|
||||
actual_plugin = 'calibre.gui2.actions.random:PickRandomAction'
|
||||
description = _('Choose a random book from your calibre library')
|
||||
|
||||
|
||||
class ActionStore(InterfaceActionBase):
|
||||
name = 'Store'
|
||||
author = 'John Schember'
|
||||
@ -873,7 +879,7 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
|
||||
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
||||
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
|
||||
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore,
|
||||
ActionPluginUpdater]
|
||||
ActionPluginUpdater, ActionPickRandom]
|
||||
|
||||
# }}}
|
||||
|
||||
@ -1181,6 +1187,26 @@ class StoreBeWriteStore(StoreBase):
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StoreBookotekaStore(StoreBase):
|
||||
name = 'Bookoteka'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'E-booki w Bookotece dostępne są w formacie EPUB oraz PDF. Publikacje sprzedawane w Bookotece są objęte prawami autorskimi. Zobowiązaliśmy się chronić te prawa, ale bez ograniczania dostępu do książki użytkownikowi, który nabył ją w legalny sposób. Dlatego też Bookoteka stosuje tak zwany „watermarking transakcyjny” czyli swego rodzaju znaki wodne.'
|
||||
actual_plugin = 'calibre.gui2.store.stores.bookoteka_plugin:BookotekaStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
class StoreChitankaStore(StoreBase):
|
||||
name = u'Моята библиотека'
|
||||
author = 'Alex Stanev'
|
||||
description = u'Независим сайт за DRM свободна литература на български език'
|
||||
actual_plugin = 'calibre.gui2.store.stores.chitanka_plugin:ChitankaStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'BG'
|
||||
formats = ['FB2', 'EPUB', 'TXT', 'SFB']
|
||||
|
||||
class StoreDieselEbooksStore(StoreBase):
|
||||
name = 'Diesel eBooks'
|
||||
description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
|
||||
@ -1190,6 +1216,15 @@ class StoreDieselEbooksStore(StoreBase):
|
||||
formats = ['EPUB', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreEbookNLStore(StoreBase):
|
||||
name = 'eBook.nl'
|
||||
description = u'De eBookwinkel van Nederland'
|
||||
actual_plugin = 'calibre.gui2.store.stores.ebook_nl_plugin:EBookNLStore'
|
||||
|
||||
headquarters = 'NL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreEbookscomStore(StoreBase):
|
||||
name = 'eBooks.com'
|
||||
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
|
||||
@ -1199,17 +1234,6 @@ class StoreEbookscomStore(StoreBase):
|
||||
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreEPubBuyDEStore(StoreBase):
|
||||
name = 'EPUBBuy DE'
|
||||
author = 'Charles Haley'
|
||||
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
|
||||
actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'DE'
|
||||
formats = ['EPUB']
|
||||
affiliate = True
|
||||
|
||||
class StoreEBookShoppeUKStore(StoreBase):
|
||||
name = 'ebookShoppe UK'
|
||||
author = u'Charles Haley'
|
||||
@ -1229,14 +1253,15 @@ class StoreEHarlequinStore(StoreBase):
|
||||
formats = ['EPUB', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreEpubBudStore(StoreBase):
|
||||
name = 'ePub Bud'
|
||||
description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
|
||||
actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore'
|
||||
class StoreEKnigiStore(StoreBase):
|
||||
name = u'еКниги'
|
||||
author = 'Alex Stanev'
|
||||
description = u'Онлайн книжарница за електронни книги и аудио риалити романи'
|
||||
actual_plugin = 'calibre.gui2.store.stores.eknigi_plugin:eKnigiStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB']
|
||||
headquarters = 'BG'
|
||||
formats = ['EPUB', 'PDF', 'HTML']
|
||||
affiliate = True
|
||||
|
||||
class StoreFeedbooksStore(StoreBase):
|
||||
name = 'Feedbooks'
|
||||
@ -1272,6 +1297,7 @@ class StoreGoogleBooksStore(StoreBase):
|
||||
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'PDF', 'TXT']
|
||||
affiliate = True
|
||||
|
||||
class StoreGutenbergStore(StoreBase):
|
||||
name = 'Project Gutenberg'
|
||||
@ -1355,6 +1381,17 @@ class StoreOReillyStore(StoreBase):
|
||||
headquarters = 'US'
|
||||
formats = ['APK', 'DAISY', 'EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StoreOzonRUStore(StoreBase):
|
||||
name = 'OZON.ru'
|
||||
description = u'ebooks from OZON.ru'
|
||||
actual_plugin = 'calibre.gui2.store.stores.ozon_ru_plugin:OzonRUStore'
|
||||
author = 'Roman Mukhin'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'RU'
|
||||
formats = ['TXT', 'PDF', 'DJVU', 'RTF', 'DOC', 'JAR', 'FB2']
|
||||
affiliate = True
|
||||
|
||||
class StorePragmaticBookshelfStore(StoreBase):
|
||||
name = 'Pragmatic Bookshelf'
|
||||
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
|
||||
@ -1446,12 +1483,14 @@ plugins += [
|
||||
StoreBNStore,
|
||||
StoreBeamEBooksDEStore,
|
||||
StoreBeWriteStore,
|
||||
StoreBookotekaStore,
|
||||
StoreChitankaStore,
|
||||
StoreDieselEbooksStore,
|
||||
StoreEbookNLStore,
|
||||
StoreEbookscomStore,
|
||||
StoreEBookShoppeUKStore,
|
||||
StoreEPubBuyDEStore,
|
||||
StoreEHarlequinStore,
|
||||
StoreEpubBudStore,
|
||||
StoreEKnigiStore,
|
||||
StoreFeedbooksStore,
|
||||
StoreFoylesUKStore,
|
||||
StoreGandalfStore,
|
||||
@ -1465,6 +1504,7 @@ plugins += [
|
||||
StoreNextoStore,
|
||||
StoreOpenBooksStore,
|
||||
StoreOReillyStore,
|
||||
StoreOzonRUStore,
|
||||
StorePragmaticBookshelfStore,
|
||||
StoreSmashwordsStore,
|
||||
StoreVirtualoStore,
|
||||
|
@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
# Imports {{{
|
||||
import os, shutil, uuid, json
|
||||
import os, shutil, uuid, json, glob, time, tempfile
|
||||
from functools import partial
|
||||
|
||||
import apsw
|
||||
@ -25,7 +25,7 @@ from calibre.utils.config import to_json, from_json, prefs, tweaks
|
||||
from calibre.utils.date import utcfromtimestamp, parse_date
|
||||
from calibre.utils.filenames import is_case_sensitive
|
||||
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
|
||||
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable)
|
||||
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, CompositeTable)
|
||||
# }}}
|
||||
|
||||
'''
|
||||
@ -37,6 +37,8 @@ Differences in semantics from pysqlite:
|
||||
|
||||
'''
|
||||
|
||||
SPOOL_SIZE = 30*1024*1024
|
||||
|
||||
class DynamicFilter(object): # {{{
|
||||
|
||||
'No longer used, present for legacy compatibility'
|
||||
@ -478,7 +480,6 @@ class DB(object):
|
||||
remove.append(data)
|
||||
continue
|
||||
|
||||
self.custom_column_label_map[data['label']] = data['num']
|
||||
self.custom_column_num_map[data['num']] = \
|
||||
self.custom_column_label_map[data['label']] = data
|
||||
|
||||
@ -613,10 +614,31 @@ class DB(object):
|
||||
|
||||
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
|
||||
|
||||
for label, data in self.custom_column_label_map.iteritems():
|
||||
label = '#' + label
|
||||
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
||||
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
||||
'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
|
||||
'formats':13, 'path':14, 'pubdate':15, 'uuid':16, 'cover':17,
|
||||
'au_map':18, 'last_modified':19, 'identifiers':20}
|
||||
|
||||
for k,v in self.FIELD_MAP.iteritems():
|
||||
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
||||
|
||||
base = max(self.FIELD_MAP.itervalues())
|
||||
|
||||
for label_, data in self.custom_column_label_map.iteritems():
|
||||
label = self.field_metadata.custom_field_prefix + label_
|
||||
metadata = self.field_metadata[label].copy()
|
||||
link_table = self.custom_table_names(data['num'])[1]
|
||||
self.FIELD_MAP[data['num']] = base = base+1
|
||||
self.field_metadata.set_field_record_index(label_, base,
|
||||
prefer_custom=True)
|
||||
if data['datatype'] == 'series':
|
||||
# account for the series index column. Field_metadata knows that
|
||||
# the series index is one larger than the series. If you change
|
||||
# it here, be sure to change it there as well.
|
||||
self.FIELD_MAP[str(data['num'])+'_index'] = base = base+1
|
||||
self.field_metadata.set_field_record_index(label_+'_index', base,
|
||||
prefer_custom=True)
|
||||
|
||||
if data['normalized']:
|
||||
if metadata['is_multiple']:
|
||||
@ -632,8 +654,17 @@ class DB(object):
|
||||
metadata['column'] = 'extra'
|
||||
metadata['table'] = link_table
|
||||
tables[label] = OneToOneTable(label, metadata)
|
||||
else:
|
||||
if data['datatype'] == 'composite':
|
||||
tables[label] = CompositeTable(label, metadata)
|
||||
else:
|
||||
tables[label] = OneToOneTable(label, metadata)
|
||||
|
||||
self.FIELD_MAP['ondevice'] = base = base+1
|
||||
self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
|
||||
self.FIELD_MAP['marked'] = base = base+1
|
||||
self.field_metadata.set_field_record_index('marked', base, prefer_custom=False)
|
||||
|
||||
# }}}
|
||||
|
||||
@property
|
||||
@ -732,5 +763,57 @@ class DB(object):
|
||||
pprint.pprint(table.metadata)
|
||||
raise
|
||||
|
||||
def format_abspath(self, book_id, fmt, fname, path):
|
||||
path = os.path.join(self.library_path, path)
|
||||
fmt = ('.' + fmt.lower()) if fmt else ''
|
||||
fmt_path = os.path.join(path, fname+fmt)
|
||||
if os.path.exists(fmt_path):
|
||||
return fmt_path
|
||||
try:
|
||||
candidates = glob.glob(os.path.join(path, '*'+fmt))
|
||||
except: # If path contains strange characters this throws an exc
|
||||
candidates = []
|
||||
if fmt and candidates and os.path.exists(candidates[0]):
|
||||
shutil.copyfile(candidates[0], fmt_path)
|
||||
return fmt_path
|
||||
|
||||
def format_metadata(self, book_id, fmt, fname, path):
|
||||
path = self.format_abspath(book_id, fmt, fname, path)
|
||||
ans = {}
|
||||
if path is not None:
|
||||
stat = os.stat(path)
|
||||
ans['size'] = stat.st_size
|
||||
ans['mtime'] = utcfromtimestamp(stat.st_mtime)
|
||||
return ans
|
||||
|
||||
def cover(self, path, as_file=False, as_image=False,
|
||||
as_path=False):
|
||||
path = os.path.join(self.library_path, path, 'cover.jpg')
|
||||
ret = None
|
||||
if os.access(path, os.R_OK):
|
||||
try:
|
||||
f = lopen(path, 'rb')
|
||||
except (IOError, OSError):
|
||||
time.sleep(0.2)
|
||||
f = lopen(path, 'rb')
|
||||
with f:
|
||||
if as_path:
|
||||
pt = PersistentTemporaryFile('_dbcover.jpg')
|
||||
with pt:
|
||||
shutil.copyfileobj(f, pt)
|
||||
return pt.name
|
||||
if as_file:
|
||||
ret = tempfile.SpooledTemporaryFile(SPOOL_SIZE)
|
||||
shutil.copyfileobj(f, ret)
|
||||
ret.seek(0)
|
||||
else:
|
||||
ret = f.read()
|
||||
if as_image:
|
||||
from PyQt4.Qt import QImage
|
||||
i = QImage()
|
||||
i.loadFromData(ret)
|
||||
ret = i
|
||||
return ret
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -7,5 +7,380 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from functools import wraps, partial
|
||||
|
||||
from calibre.db.locking import create_locks, RecordLock
|
||||
from calibre.db.fields import create_field
|
||||
from calibre.ebooks.book.base import Metadata
|
||||
from calibre.utils.date import now
|
||||
|
||||
def api(f):
|
||||
f.is_cache_api = True
|
||||
return f
|
||||
|
||||
def read_api(f):
|
||||
f = api(f)
|
||||
f.is_read_api = True
|
||||
return f
|
||||
|
||||
def write_api(f):
|
||||
f = api(f)
|
||||
f.is_read_api = False
|
||||
return f
|
||||
|
||||
def wrap_simple(lock, func):
|
||||
@wraps(func)
|
||||
def ans(*args, **kwargs):
|
||||
with lock:
|
||||
return func(*args, **kwargs)
|
||||
return ans
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
||||
def __init__(self, backend):
|
||||
self.backend = backend
|
||||
self.fields = {}
|
||||
self.composites = set()
|
||||
self.read_lock, self.write_lock = create_locks()
|
||||
self.record_lock = RecordLock(self.read_lock)
|
||||
self.format_metadata_cache = defaultdict(dict)
|
||||
|
||||
# Implement locking for all simple read/write API methods
|
||||
# An unlocked version of the method is stored with the name starting
|
||||
# with a leading underscore. Use the unlocked versions when the lock
|
||||
# has already been acquired.
|
||||
for name in dir(self):
|
||||
func = getattr(self, name)
|
||||
ira = getattr(func, 'is_read_api', None)
|
||||
if ira is not None:
|
||||
# Save original function
|
||||
setattr(self, '_'+name, func)
|
||||
# Wrap it in a lock
|
||||
lock = self.read_lock if ira else self.write_lock
|
||||
setattr(self, name, wrap_simple(lock, func))
|
||||
|
||||
@property
|
||||
def field_metadata(self):
|
||||
return self.backend.field_metadata
|
||||
|
||||
def _format_abspath(self, book_id, fmt):
|
||||
'''
|
||||
Return absolute path to the ebook file of format `format`
|
||||
|
||||
WARNING: This method will return a dummy path for a network backend DB,
|
||||
so do not rely on it, use format(..., as_path=True) instead.
|
||||
|
||||
Currently used only in calibredb list, the viewer and the catalogs (via
|
||||
get_data_as_dict()).
|
||||
|
||||
Apart from the viewer, I don't believe any of the others do any file
|
||||
I/O with the results of this call.
|
||||
'''
|
||||
try:
|
||||
name = self.fields['formats'].format_fname(book_id, fmt)
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
return None
|
||||
if name and path:
|
||||
return self.backend.format_abspath(book_id, fmt, name, path)
|
||||
|
||||
def _get_metadata(self, book_id, get_user_categories=True): # {{{
|
||||
mi = Metadata(None)
|
||||
author_ids = self._field_ids_for('authors', book_id)
|
||||
aut_list = [self._author_data(i) for i in author_ids]
|
||||
aum = []
|
||||
aus = {}
|
||||
aul = {}
|
||||
for rec in aut_list:
|
||||
aut = rec['name']
|
||||
aum.append(aut)
|
||||
aus[aut] = rec['sort']
|
||||
aul[aut] = rec['link']
|
||||
mi.title = self._field_for('title', book_id,
|
||||
default_value=_('Unknown'))
|
||||
mi.authors = aum
|
||||
mi.author_sort = self._field_for('author_sort', book_id,
|
||||
default_value=_('Unknown'))
|
||||
mi.author_sort_map = aus
|
||||
mi.author_link_map = aul
|
||||
mi.comments = self._field_for('comments', book_id)
|
||||
mi.publisher = self._field_for('publisher', book_id)
|
||||
n = now()
|
||||
mi.timestamp = self._field_for('timestamp', book_id, default_value=n)
|
||||
mi.pubdate = self._field_for('pubdate', book_id, default_value=n)
|
||||
mi.uuid = self._field_for('uuid', book_id,
|
||||
default_value='dummy')
|
||||
mi.title_sort = self._field_for('sort', book_id,
|
||||
default_value=_('Unknown'))
|
||||
mi.book_size = self._field_for('size', book_id, default_value=0)
|
||||
mi.ondevice_col = self._field_for('ondevice', book_id, default_value='')
|
||||
mi.last_modified = self._field_for('last_modified', book_id,
|
||||
default_value=n)
|
||||
formats = self._field_for('formats', book_id)
|
||||
mi.format_metadata = {}
|
||||
if not formats:
|
||||
formats = None
|
||||
else:
|
||||
for f in formats:
|
||||
mi.format_metadata[f] = self._format_metadata(book_id, f)
|
||||
formats = ','.join(formats)
|
||||
mi.formats = formats
|
||||
mi.has_cover = _('Yes') if self._field_for('cover', book_id,
|
||||
default_value=False) else ''
|
||||
mi.tags = list(self._field_for('tags', book_id, default_value=()))
|
||||
mi.series = self._field_for('series', book_id)
|
||||
if mi.series:
|
||||
mi.series_index = self._field_for('series_index', book_id,
|
||||
default_value=1.0)
|
||||
mi.rating = self._field_for('rating', book_id)
|
||||
mi.set_identifiers(self._field_for('identifiers', book_id,
|
||||
default_value={}))
|
||||
mi.application_id = book_id
|
||||
mi.id = book_id
|
||||
composites = {}
|
||||
for key, meta in self.field_metadata.custom_iteritems():
|
||||
mi.set_user_metadata(key, meta)
|
||||
if meta['datatype'] == 'composite':
|
||||
composites.append(key)
|
||||
else:
|
||||
mi.set(key, val=self._field_for(meta['label'], book_id),
|
||||
extra=self._field_for(meta['label']+'_index', book_id))
|
||||
for c in composites:
|
||||
mi.set(key, val=self._composite_for(key, book_id, mi))
|
||||
|
||||
user_cat_vals = {}
|
||||
if get_user_categories:
|
||||
user_cats = self.prefs['user_categories']
|
||||
for ucat in user_cats:
|
||||
res = []
|
||||
for name,cat,ign in user_cats[ucat]:
|
||||
v = mi.get(cat, None)
|
||||
if isinstance(v, list):
|
||||
if name in v:
|
||||
res.append([name,cat])
|
||||
elif name == v:
|
||||
res.append([name,cat])
|
||||
user_cat_vals[ucat] = res
|
||||
mi.user_categories = user_cat_vals
|
||||
|
||||
return mi
|
||||
# }}}
|
||||
|
||||
# Cache Layer API {{{
|
||||
|
||||
@api
|
||||
def init(self):
|
||||
'''
|
||||
Initialize this cache with data from the backend.
|
||||
'''
|
||||
with self.write_lock:
|
||||
self.backend.read_tables()
|
||||
|
||||
for field, table in self.backend.tables.iteritems():
|
||||
self.fields[field] = create_field(field, table)
|
||||
if table.metadata['datatype'] == 'composite':
|
||||
self.composites.add(field)
|
||||
|
||||
self.fields['ondevice'] = create_field('ondevice', None)
|
||||
|
||||
@read_api
|
||||
def field_for(self, name, book_id, default_value=None):
|
||||
'''
|
||||
Return the value of the field ``name`` for the book identified by
|
||||
``book_id``. If no such book exists or it has no defined value for the
|
||||
field ``name`` or no such field exists, then ``default_value`` is returned.
|
||||
|
||||
The returned value for is_multiple fields are always tuples.
|
||||
'''
|
||||
if self.composites and name in self.composites:
|
||||
return self.composite_for(name, book_id,
|
||||
default_value=default_value)
|
||||
try:
|
||||
return self.fields[name].for_book(book_id, default_value=default_value)
|
||||
except (KeyError, IndexError):
|
||||
return default_value
|
||||
|
||||
@read_api
|
||||
def composite_for(self, name, book_id, mi=None, default_value=''):
|
||||
try:
|
||||
f = self.fields[name]
|
||||
except KeyError:
|
||||
return default_value
|
||||
|
||||
if mi is None:
|
||||
return f.get_value_with_cache(book_id, partial(self._get_metadata,
|
||||
get_user_categories=False))
|
||||
else:
|
||||
return f.render_composite(book_id, mi)
|
||||
|
||||
@read_api
|
||||
def field_ids_for(self, name, book_id):
|
||||
'''
|
||||
Return the ids (as a tuple) for the values that the field ``name`` has on the book
|
||||
identified by ``book_id``. If there are no values, or no such book, or
|
||||
no such field, an empty tuple is returned.
|
||||
'''
|
||||
try:
|
||||
return self.fields[name].ids_for_book(book_id)
|
||||
except (KeyError, IndexError):
|
||||
return ()
|
||||
|
||||
@read_api
|
||||
def books_for_field(self, name, item_id):
|
||||
'''
|
||||
Return all the books associated with the item identified by
|
||||
``item_id``, where the item belongs to the field ``name``.
|
||||
|
||||
Returned value is a tuple of book ids, or the empty tuple if the item
|
||||
or the field does not exist.
|
||||
'''
|
||||
try:
|
||||
return self.fields[name].books_for(item_id)
|
||||
except (KeyError, IndexError):
|
||||
return ()
|
||||
|
||||
@read_api
|
||||
def all_book_ids(self):
|
||||
'''
|
||||
Frozen set of all known book ids.
|
||||
'''
|
||||
return frozenset(self.fields['uuid'].iter_book_ids())
|
||||
|
||||
@read_api
|
||||
def all_field_ids(self, name):
|
||||
'''
|
||||
Frozen set of ids for all values in the field ``name``.
|
||||
'''
|
||||
return frozenset(iter(self.fields[name]))
|
||||
|
||||
@read_api
|
||||
def author_data(self, author_id):
|
||||
'''
|
||||
Return author data as a dictionary with keys: name, sort, link
|
||||
|
||||
If no author with the specified id is found an empty dictionary is
|
||||
returned.
|
||||
'''
|
||||
try:
|
||||
return self.fields['authors'].author_data(author_id)
|
||||
except (KeyError, IndexError):
|
||||
return {}
|
||||
|
||||
@read_api
|
||||
def format_metadata(self, book_id, fmt, allow_cache=True):
|
||||
if not fmt:
|
||||
return {}
|
||||
fmt = fmt.upper()
|
||||
if allow_cache:
|
||||
x = self.format_metadata_cache[book_id].get(fmt, None)
|
||||
if x is not None:
|
||||
return x
|
||||
try:
|
||||
name = self.fields['formats'].format_fname(book_id, fmt)
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
return {}
|
||||
|
||||
ans = {}
|
||||
if path and name:
|
||||
ans = self.backend.format_metadata(book_id, fmt, name, path)
|
||||
self.format_metadata_cache[book_id][fmt] = ans
|
||||
return ans
|
||||
|
||||
@api
|
||||
def get_metadata(self, book_id,
|
||||
get_cover=False, get_user_categories=True, cover_as_data=False):
|
||||
'''
|
||||
Return metadata for the book identified by book_id as a :class:`Metadata` object.
|
||||
Note that the list of formats is not verified. If get_cover is True,
|
||||
the cover is returned, either a path to temp file as mi.cover or if
|
||||
cover_as_data is True then as mi.cover_data.
|
||||
'''
|
||||
|
||||
with self.read_lock:
|
||||
mi = self._get_metadata(book_id, get_user_categories=get_user_categories)
|
||||
|
||||
if get_cover:
|
||||
if cover_as_data:
|
||||
cdata = self.cover(book_id)
|
||||
if cdata:
|
||||
mi.cover_data = ('jpeg', cdata)
|
||||
else:
|
||||
mi.cover = self.cover(book_id, as_path=True)
|
||||
|
||||
return mi
|
||||
|
||||
@api
|
||||
def cover(self, book_id,
|
||||
as_file=False, as_image=False, as_path=False):
|
||||
'''
|
||||
Return the cover image or None. By default, returns the cover as a
|
||||
bytestring.
|
||||
|
||||
WARNING: Using as_path will copy the cover to a temp file and return
|
||||
the path to the temp file. You should delete the temp file when you are
|
||||
done with it.
|
||||
|
||||
:param as_file: If True return the image as an open file object (a SpooledTemporaryFile)
|
||||
:param as_image: If True return the image as a QImage object
|
||||
:param as_path: If True return the image as a path pointing to a
|
||||
temporary file
|
||||
'''
|
||||
with self.read_lock:
|
||||
try:
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
return None
|
||||
|
||||
with self.record_lock.lock(book_id):
|
||||
return self.backend.cover(path, as_file=as_file, as_image=as_image,
|
||||
as_path=as_path)
|
||||
|
||||
@read_api
|
||||
def multisort(self, fields):
|
||||
all_book_ids = frozenset(self._all_book_ids())
|
||||
get_metadata = partial(self._get_metadata, get_user_categories=False)
|
||||
|
||||
sort_keys = tuple(self.fields[field[0]].sort_keys_for_books(get_metadata,
|
||||
all_book_ids) for field in fields)
|
||||
|
||||
if len(sort_keys) == 1:
|
||||
sk = sort_keys[0]
|
||||
return sorted(all_book_ids, key=lambda i:sk[i], reverse=not
|
||||
fields[1])
|
||||
else:
|
||||
return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))
|
||||
|
||||
# }}}
|
||||
|
||||
class SortKey(object):
|
||||
|
||||
def __init__(self, fields, sort_keys, book_id):
|
||||
self.orders = tuple(1 if f[1] else -1 for f in fields)
|
||||
self.sort_key = tuple(sk[book_id] for sk in sort_keys)
|
||||
|
||||
def __cmp__(self, other):
|
||||
for i, order in enumerate(self.orders):
|
||||
ans = cmp(self.sort_key[i], other.sort_key[i])
|
||||
if ans != 0:
|
||||
return ans * order
|
||||
return 0
|
||||
|
||||
|
||||
# Testing {{{
|
||||
|
||||
def test(library_path):
|
||||
from calibre.db.backend import DB
|
||||
backend = DB(library_path)
|
||||
cache = Cache(backend)
|
||||
cache.init()
|
||||
print ('All book ids:', cache.all_book_ids())
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.utils.config import prefs
|
||||
test(prefs['library_path'])
|
||||
|
||||
# }}}
|
||||
|
257
src/calibre/db/fields.py
Normal file
@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
from future_builtins import map
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from threading import Lock
|
||||
|
||||
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
|
||||
from calibre.utils.icu import sort_key
|
||||
|
||||
class Field(object):
|
||||
|
||||
def __init__(self, name, table):
|
||||
self.name, self.table = name, table
|
||||
self.has_text_data = self.metadata['datatype'] in ('text', 'comments',
|
||||
'series', 'enumeration')
|
||||
self.table_type = self.table.table_type
|
||||
dt = self.metadata['datatype']
|
||||
self._sort_key = (sort_key if dt == 'text' else lambda x: x)
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
return self.table.metadata
|
||||
|
||||
def for_book(self, book_id, default_value=None):
|
||||
'''
|
||||
Return the value of this field for the book identified by book_id.
|
||||
When no value is found, returns ``default_value``.
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
def ids_for_book(self, book_id):
|
||||
'''
|
||||
Return a tuple of items ids for items associated with the book
|
||||
identified by book_ids. Returns an empty tuple if no such items are
|
||||
found.
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
def books_for(self, item_id):
|
||||
'''
|
||||
Return the ids of all books associated with the item identified by
|
||||
item_id as a tuple. An empty tuple is returned if no books are found.
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
def __iter__(self):
|
||||
'''
|
||||
Iterate over the ids for all values in this field
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
'''
|
||||
Return a mapping of book_id -> sort_key. The sort key is suitable for
|
||||
use in sorting the list of all books by this field, via the python cmp
|
||||
method.
|
||||
'''
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class OneToOneField(Field):
|
||||
|
||||
def for_book(self, book_id, default_value=None):
|
||||
return self.table.book_col_map.get(book_id, default_value)
|
||||
|
||||
def ids_for_book(self, book_id):
|
||||
return (book_id,)
|
||||
|
||||
def books_for(self, item_id):
|
||||
return (item_id,)
|
||||
|
||||
def __iter__(self):
|
||||
return self.table.book_col_map.iterkeys()
|
||||
|
||||
def iter_book_ids(self):
|
||||
return self.table.book_col_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
return {id_ : self._sort_key(self.book_col_map.get(id_, '')) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
class CompositeField(OneToOneField):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
OneToOneField.__init__(self, *args, **kwargs)
|
||||
|
||||
self._render_cache = {}
|
||||
self._lock = Lock()
|
||||
|
||||
def render_composite(self, book_id, mi):
|
||||
with self._lock:
|
||||
ans = self._render_cache.get(book_id, None)
|
||||
if ans is None:
|
||||
ans = mi.get(self.metadata['label'])
|
||||
with self._lock:
|
||||
self._render_cache[book_id] = ans
|
||||
return ans
|
||||
|
||||
def clear_cache(self):
|
||||
with self._lock:
|
||||
self._render_cache = {}
|
||||
|
||||
def pop_cache(self, book_id):
|
||||
with self._lock:
|
||||
self._render_cache.pop(book_id, None)
|
||||
|
||||
def get_value_with_cache(self, book_id, get_metadata):
|
||||
with self._lock:
|
||||
ans = self._render_cache.get(book_id, None)
|
||||
if ans is None:
|
||||
mi = get_metadata(book_id)
|
||||
ans = mi.get(self.metadata['label'])
|
||||
return ans
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
|
||||
class OnDeviceField(OneToOneField):
|
||||
|
||||
def __init__(self, name, table):
|
||||
self.name = name
|
||||
self.book_on_device_func = None
|
||||
|
||||
def book_on_device(self, book_id):
|
||||
if callable(self.book_on_device_func):
|
||||
return self.book_on_device_func(book_id)
|
||||
return None
|
||||
|
||||
def set_book_on_device_func(self, func):
|
||||
self.book_on_device_func = func
|
||||
|
||||
def for_book(self, book_id, default_value=None):
|
||||
loc = []
|
||||
count = 0
|
||||
on = self.book_on_device(book_id)
|
||||
if on is not None:
|
||||
m, a, b, count = on[:4]
|
||||
if m is not None:
|
||||
loc.append(_('Main'))
|
||||
if a is not None:
|
||||
loc.append(_('Card A'))
|
||||
if b is not None:
|
||||
loc.append(_('Card B'))
|
||||
return ', '.join(loc) + ((' (%s books)'%count) if count > 1 else '')
|
||||
|
||||
def __iter__(self):
|
||||
return iter(())
|
||||
|
||||
def iter_book_ids(self):
|
||||
return iter(())
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
return {id_ : self.for_book(id_) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
class ManyToOneField(Field):
|
||||
|
||||
def for_book(self, book_id, default_value=None):
|
||||
ids = self.table.book_col_map.get(book_id, None)
|
||||
if ids is not None:
|
||||
ans = self.id_map[ids]
|
||||
else:
|
||||
ans = default_value
|
||||
return ans
|
||||
|
||||
def ids_for_book(self, book_id):
|
||||
id_ = self.table.book_col_map.get(book_id, None)
|
||||
if id_ is None:
|
||||
return ()
|
||||
return (id_,)
|
||||
|
||||
def books_for(self, item_id):
|
||||
return self.table.col_book_map.get(item_id, ())
|
||||
|
||||
def __iter__(self):
|
||||
return self.table.id_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
keys = {id_ : self._sort_key(self.id_map.get(id_, '')) for id_ in
|
||||
all_book_ids}
|
||||
return {id_ : keys.get(
|
||||
self.book_col_map.get(id_, None), '') for id_ in all_book_ids}
|
||||
|
||||
class ManyToManyField(Field):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
Field.__init__(self, *args, **kwargs)
|
||||
self.alphabetical_sort = self.name != 'authors'
|
||||
|
||||
def for_book(self, book_id, default_value=None):
|
||||
ids = self.table.book_col_map.get(book_id, ())
|
||||
if ids:
|
||||
ans = tuple(self.id_map[i] for i in ids)
|
||||
else:
|
||||
ans = default_value
|
||||
return ans
|
||||
|
||||
def ids_for_book(self, book_id):
|
||||
return self.table.book_col_map.get(book_id, ())
|
||||
|
||||
def books_for(self, item_id):
|
||||
return self.table.col_book_map.get(item_id, ())
|
||||
|
||||
def __iter__(self):
|
||||
return self.table.id_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, all_book_ids):
|
||||
keys = {id_ : self._sort_key(self.id_map.get(id_, '')) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
def sort_key_for_book(book_id):
|
||||
item_ids = self.table.book_col_map.get(book_id, ())
|
||||
if self.alphabetical_sort:
|
||||
item_ids = sorted(item_ids, key=keys.get)
|
||||
return tuple(map(keys.get, item_ids))
|
||||
|
||||
return {id_ : sort_key_for_book(id_) for id_ in all_book_ids}
|
||||
|
||||
|
||||
class AuthorsField(ManyToManyField):
|
||||
|
||||
def author_data(self, author_id):
|
||||
return {
|
||||
'name' : self.table.id_map[author_id],
|
||||
'sort' : self.table.asort_map[author_id],
|
||||
'link' : self.table.alink_map[author_id],
|
||||
}
|
||||
|
||||
class FormatsField(ManyToManyField):
|
||||
|
||||
def format_fname(self, book_id, fmt):
|
||||
return self.table.fname_map[book_id][fmt.upper()]
|
||||
|
||||
def create_field(name, table):
|
||||
cls = {
|
||||
ONE_ONE : OneToOneField,
|
||||
MANY_ONE : ManyToOneField,
|
||||
MANY_MANY : ManyToManyField,
|
||||
}[table.table_type]
|
||||
if name == 'authors':
|
||||
cls = AuthorsField
|
||||
elif name == 'ondevice':
|
||||
cls = OnDeviceField
|
||||
elif name == 'formats':
|
||||
cls = FormatsField
|
||||
elif table.metadata['datatype'] == 'composite':
|
||||
cls = CompositeField
|
||||
return cls(name, table)
|
||||
|
@ -7,7 +7,9 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from threading import Lock, Condition, current_thread
|
||||
from threading import Lock, Condition, current_thread, RLock
|
||||
from functools import partial
|
||||
from collections import Counter
|
||||
|
||||
class LockingError(RuntimeError):
|
||||
pass
|
||||
@ -37,7 +39,7 @@ def create_locks():
|
||||
l = SHLock()
|
||||
return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
|
||||
|
||||
class SHLock(object):
|
||||
class SHLock(object): # {{{
|
||||
'''
|
||||
Shareable lock class. Used to implement the Multiple readers-single writer
|
||||
paradigm. As best as I can tell, neither writer nor reader starvation
|
||||
@ -79,6 +81,11 @@ class SHLock(object):
|
||||
return self._acquire_exclusive(blocking)
|
||||
assert not (self.is_shared and self.is_exclusive)
|
||||
|
||||
def owns_lock(self):
|
||||
me = current_thread()
|
||||
with self._lock:
|
||||
return self._exclusive_owner is me or me in self._shared_owners
|
||||
|
||||
def release(self):
|
||||
''' Release the lock. '''
|
||||
# This decrements the appropriate lock counters, and if the lock
|
||||
@ -189,6 +196,8 @@ class SHLock(object):
|
||||
def _return_waiter(self, waiter):
|
||||
self._free_waiters.append(waiter)
|
||||
|
||||
# }}}
|
||||
|
||||
class RWLockWrapper(object):
|
||||
|
||||
def __init__(self, shlock, is_shared=True):
|
||||
@ -200,16 +209,124 @@ class RWLockWrapper(object):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.release()
|
||||
|
||||
def release(self):
|
||||
self._shlock.release()
|
||||
|
||||
def owns_lock(self):
|
||||
return self._shlock.owns_lock()
|
||||
|
||||
class RecordLock(object):
|
||||
|
||||
'''
|
||||
Lock records identified by hashable ids. To use
|
||||
|
||||
rl = RecordLock()
|
||||
|
||||
with rl.lock(some_id):
|
||||
# do something
|
||||
|
||||
This will lock the record identified by some_id exclusively. The lock is
|
||||
recursive, which means that you can lock the same record multiple times in
|
||||
the same thread.
|
||||
|
||||
This class co-operates with the SHLock class. If you try to lock a record
|
||||
in a thread that already holds the SHLock, a LockingError is raised. This
|
||||
is to prevent the possibility of a cross-lock deadlock.
|
||||
|
||||
A cross-lock deadlock is still possible if you first lock a record and then
|
||||
acquire the SHLock, but the usage pattern for this lock makes this highly
|
||||
unlikely (this lock should be acquired immediately before any file I/O on
|
||||
files in the library and released immediately after).
|
||||
'''
|
||||
|
||||
class Wrap(object):
|
||||
|
||||
def __init__(self, release):
|
||||
self.release = release
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
self.release()
|
||||
self.release = None
|
||||
|
||||
def __init__(self, sh_lock):
|
||||
self._lock = Lock()
|
||||
# This is for recycling lock objects.
|
||||
self._free_locks = [RLock()]
|
||||
self._records = {}
|
||||
self._counter = Counter()
|
||||
self.sh_lock = sh_lock
|
||||
|
||||
def lock(self, record_id):
|
||||
if self.sh_lock.owns_lock():
|
||||
raise LockingError('Current thread already holds a shared lock,'
|
||||
' you cannot also ask for record lock as this could cause a'
|
||||
' deadlock.')
|
||||
with self._lock:
|
||||
l = self._records.get(record_id, None)
|
||||
if l is None:
|
||||
l = self._take_lock()
|
||||
self._records[record_id] = l
|
||||
self._counter[record_id] += 1
|
||||
l.acquire()
|
||||
return RecordLock.Wrap(partial(self.release, record_id))
|
||||
|
||||
def release(self, record_id):
|
||||
with self._lock:
|
||||
l = self._records.pop(record_id, None)
|
||||
if l is None:
|
||||
raise LockingError('No lock acquired for record %r'%record_id)
|
||||
l.release()
|
||||
self._counter[record_id] -= 1
|
||||
if self._counter[record_id] > 0:
|
||||
self._records[record_id] = l
|
||||
else:
|
||||
self._return_lock(l)
|
||||
|
||||
def _take_lock(self):
|
||||
try:
|
||||
return self._free_locks.pop()
|
||||
except IndexError:
|
||||
return RLock()
|
||||
|
||||
def _return_lock(self, lock):
|
||||
self._free_locks.append(lock)
|
||||
|
||||
# Tests {{{
|
||||
if __name__ == '__main__':
|
||||
import time, random, unittest
|
||||
from threading import Thread
|
||||
|
||||
class TestSHLock(unittest.TestCase):
|
||||
"""Testcases for SHLock class."""
|
||||
class TestLock(unittest.TestCase):
|
||||
"""Testcases for Lock classes."""
|
||||
|
||||
def test_owns_locks(self):
|
||||
lock = SHLock()
|
||||
self.assertFalse(lock.owns_lock())
|
||||
lock.acquire(shared=True)
|
||||
self.assertTrue(lock.owns_lock())
|
||||
lock.release()
|
||||
self.assertFalse(lock.owns_lock())
|
||||
lock.acquire(shared=False)
|
||||
self.assertTrue(lock.owns_lock())
|
||||
lock.release()
|
||||
self.assertFalse(lock.owns_lock())
|
||||
|
||||
done = []
|
||||
def test():
|
||||
if not lock.owns_lock():
|
||||
done.append(True)
|
||||
lock.acquire()
|
||||
t = Thread(target=test)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
t.join(1)
|
||||
self.assertEqual(len(done), 1)
|
||||
lock.release()
|
||||
|
||||
def test_multithread_deadlock(self):
|
||||
lock = SHLock()
|
||||
@ -345,8 +462,38 @@ if __name__ == '__main__':
|
||||
self.assertFalse(lock.is_shared)
|
||||
self.assertFalse(lock.is_exclusive)
|
||||
|
||||
def test_record_lock(self):
|
||||
shlock = SHLock()
|
||||
lock = RecordLock(shlock)
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock)
|
||||
shlock.acquire()
|
||||
self.assertRaises(LockingError, lock.lock, 1)
|
||||
shlock.release()
|
||||
with lock.lock(1):
|
||||
with lock.lock(1):
|
||||
pass
|
||||
|
||||
def dolock():
|
||||
with lock.lock(1):
|
||||
time.sleep(0.1)
|
||||
|
||||
t = Thread(target=dolock)
|
||||
t.daemon = True
|
||||
with lock.lock(1):
|
||||
t.start()
|
||||
t.join(0.2)
|
||||
self.assertTrue(t.is_alive())
|
||||
t.join(0.11)
|
||||
self.assertFalse(t.is_alive())
|
||||
|
||||
t = Thread(target=dolock)
|
||||
t.daemon = True
|
||||
with lock.lock(2):
|
||||
t.start()
|
||||
t.join(0.11)
|
||||
self.assertFalse(t.is_alive())
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestLock)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
|
||||
# }}}
|
||||
|
@ -12,11 +12,13 @@ from datetime import datetime
|
||||
from dateutil.tz import tzoffset
|
||||
|
||||
from calibre.constants import plugins
|
||||
from calibre.utils.date import parse_date, local_tz
|
||||
from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
|
||||
_c_speedup = plugins['speedup'][0]
|
||||
|
||||
ONE_ONE, MANY_ONE, MANY_MANY = xrange(3)
|
||||
|
||||
def _c_convert_timestamp(val):
|
||||
if not val:
|
||||
return None
|
||||
@ -27,8 +29,11 @@ def _c_convert_timestamp(val):
|
||||
if ret is None:
|
||||
return parse_date(val, as_utc=False)
|
||||
year, month, day, hour, minutes, seconds, tzsecs = ret
|
||||
try:
|
||||
return datetime(year, month, day, hour, minutes, seconds,
|
||||
tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
|
||||
except OverflowError:
|
||||
return UNDEFINED_DATE.astimezone(local_tz)
|
||||
|
||||
class Table(object):
|
||||
|
||||
@ -57,6 +62,8 @@ class OneToOneTable(Table):
|
||||
timestamp, size, etc.
|
||||
'''
|
||||
|
||||
table_type = ONE_ONE
|
||||
|
||||
def read(self, db):
|
||||
self.book_col_map = {}
|
||||
idcol = 'id' if self.metadata['table'] == 'books' else 'book'
|
||||
@ -73,6 +80,17 @@ class SizeTable(OneToOneTable):
|
||||
'WHERE data.book=books.id) FROM books'):
|
||||
self.book_col_map[row[0]] = self.unserialize(row[1])
|
||||
|
||||
class CompositeTable(OneToOneTable):
|
||||
|
||||
def read(self, db):
|
||||
self.book_col_map = {}
|
||||
d = self.metadata['display']
|
||||
self.composite_template = ['composite_template']
|
||||
self.contains_html = d['contains_html']
|
||||
self.make_category = d['make_category']
|
||||
self.composite_sort = d['composite_sort']
|
||||
self.use_decorations = d['use_decorations']
|
||||
|
||||
class ManyToOneTable(Table):
|
||||
|
||||
'''
|
||||
@ -82,9 +100,10 @@ class ManyToOneTable(Table):
|
||||
Each book however has only one value for data of this type.
|
||||
'''
|
||||
|
||||
table_type = MANY_ONE
|
||||
|
||||
def read(self, db):
|
||||
self.id_map = {}
|
||||
self.extra_map = {}
|
||||
self.col_book_map = {}
|
||||
self.book_col_map = {}
|
||||
self.read_id_maps(db)
|
||||
@ -105,6 +124,9 @@ class ManyToOneTable(Table):
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
self.book_col_map[row[0]] = row[1]
|
||||
|
||||
for key in tuple(self.col_book_map.iterkeys()):
|
||||
self.col_book_map[key] = tuple(self.col_book_map[key])
|
||||
|
||||
class ManyToManyTable(ManyToOneTable):
|
||||
|
||||
'''
|
||||
@ -113,6 +135,8 @@ class ManyToManyTable(ManyToOneTable):
|
||||
book. For example: tags or authors.
|
||||
'''
|
||||
|
||||
table_type = MANY_MANY
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute(
|
||||
'SELECT book, {0} FROM {1}'.format(
|
||||
@ -124,14 +148,21 @@ class ManyToManyTable(ManyToOneTable):
|
||||
self.book_col_map[row[0]] = []
|
||||
self.book_col_map[row[0]].append(row[1])
|
||||
|
||||
for key in tuple(self.col_book_map.iterkeys()):
|
||||
self.col_book_map[key] = tuple(self.col_book_map[key])
|
||||
|
||||
for key in tuple(self.book_col_map.iterkeys()):
|
||||
self.book_col_map[key] = tuple(self.book_col_map[key])
|
||||
|
||||
class AuthorsTable(ManyToManyTable):
|
||||
|
||||
def read_id_maps(self, db):
|
||||
self.alink_map = {}
|
||||
self.asort_map = {}
|
||||
for row in db.conn.execute(
|
||||
'SELECT id, name, sort, link FROM authors'):
|
||||
self.id_map[row[0]] = row[1]
|
||||
self.extra_map[row[0]] = (row[2] if row[2] else
|
||||
self.asort_map[row[0]] = (row[2] if row[2] else
|
||||
author_to_author_sort(row[1]))
|
||||
self.alink_map[row[0]] = row[3]
|
||||
|
||||
@ -141,14 +172,25 @@ class FormatsTable(ManyToManyTable):
|
||||
pass
|
||||
|
||||
def read_maps(self, db):
|
||||
self.fname_map = {}
|
||||
for row in db.conn.execute('SELECT book, format, name FROM data'):
|
||||
if row[1] is not None:
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
fmt = row[1].upper()
|
||||
if fmt not in self.col_book_map:
|
||||
self.col_book_map[fmt] = []
|
||||
self.col_book_map[fmt].append(row[0])
|
||||
if row[0] not in self.book_col_map:
|
||||
self.book_col_map[row[0]] = []
|
||||
self.book_col_map[row[0]].append((row[1], row[2]))
|
||||
self.book_col_map[row[0]].append(fmt)
|
||||
if row[0] not in self.fname_map:
|
||||
self.fname_map[row[0]] = {}
|
||||
self.fname_map[row[0]][fmt] = row[2]
|
||||
|
||||
for key in tuple(self.col_book_map.iterkeys()):
|
||||
self.col_book_map[key] = tuple(self.col_book_map[key])
|
||||
|
||||
for key in tuple(self.book_col_map.iterkeys()):
|
||||
self.book_col_map[key] = tuple(self.book_col_map[key])
|
||||
|
||||
class IdentifiersTable(ManyToManyTable):
|
||||
|
||||
@ -162,6 +204,9 @@ class IdentifiersTable(ManyToManyTable):
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
if row[0] not in self.book_col_map:
|
||||
self.book_col_map[row[0]] = []
|
||||
self.book_col_map[row[0]].append((row[1], row[2]))
|
||||
self.book_col_map[row[0]] = {}
|
||||
self.book_col_map[row[0]][row[1]] = row[2]
|
||||
|
||||
for key in tuple(self.col_book_map.iterkeys()):
|
||||
self.col_book_map[key] = tuple(self.col_book_map[key])
|
||||
|
||||
|
109
src/calibre/db/view.py
Normal file
@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from functools import partial
|
||||
|
||||
def sanitize_sort_field_name(field_metadata, field):
|
||||
field = field_metadata.search_term_to_field_key(field.lower().strip())
|
||||
# translate some fields to their hidden equivalent
|
||||
field = {'title': 'sort', 'authors':'author_sort'}.get(field, field)
|
||||
return field
|
||||
|
||||
class View(object):
|
||||
|
||||
def __init__(self, cache):
|
||||
self.cache = cache
|
||||
self.marked_ids = {}
|
||||
self._field_getters = {}
|
||||
for col, idx in cache.backend.FIELD_MAP.iteritems():
|
||||
if isinstance(col, int):
|
||||
label = self.cache.backend.custom_column_num_map[col]['label']
|
||||
label = (self.cache.backend.field_metadata.custom_field_prefix
|
||||
+ label)
|
||||
self._field_getters[idx] = partial(self.get, label)
|
||||
else:
|
||||
try:
|
||||
self._field_getters[idx] = {
|
||||
'id' : self._get_id,
|
||||
'au_map' : self.get_author_data,
|
||||
'ondevice': self.get_ondevice,
|
||||
'marked' : self.get_marked,
|
||||
}[col]
|
||||
except KeyError:
|
||||
self._field_getters[idx] = partial(self.get, col)
|
||||
|
||||
self._map = list(self.cache.all_book_ids())
|
||||
self._map_filtered = list(self._map)
|
||||
|
||||
@property
|
||||
def field_metadata(self):
|
||||
return self.cache.field_metadata
|
||||
|
||||
def _get_id(self, idx, index_is_id=True):
|
||||
ans = idx if index_is_id else self.index_to_id(idx)
|
||||
return ans
|
||||
|
||||
def get_field_map_field(self, row, col, index_is_id=True):
|
||||
'''
|
||||
Supports the legacy FIELD_MAP interface for getting metadata. Do not use
|
||||
in new code.
|
||||
'''
|
||||
getter = self._field_getters[col]
|
||||
return getter(row, index_is_id=index_is_id)
|
||||
|
||||
def index_to_id(self, idx):
|
||||
return self._map_filtered[idx]
|
||||
|
||||
def get(self, field, idx, index_is_id=True, default_value=None):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
return self.cache.field_for(field, id_)
|
||||
|
||||
def get_ondevice(self, idx, index_is_id=True, default_value=''):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
self.cache.field_for('ondevice', id_, default_value=default_value)
|
||||
|
||||
def get_marked(self, idx, index_is_id=True, default_value=None):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
return self.marked_ids.get(id_, default_value)
|
||||
|
||||
def get_author_data(self, idx, index_is_id=True, default_value=()):
|
||||
'''
|
||||
Return author data for all authors of the book identified by idx as a
|
||||
tuple of dictionaries. The dictionaries should never be empty, unless
|
||||
there is a bug somewhere. The list could be empty if idx point to an
|
||||
non existent book, or book with no authors (though again a book with no
|
||||
authors should never happen).
|
||||
|
||||
Each dictionary has the keys: name, sort, link. Link can be an empty
|
||||
string.
|
||||
|
||||
default_value is ignored, this method always returns a tuple
|
||||
'''
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
with self.cache.read_lock:
|
||||
ids = self.cache._field_ids_for('authors', id_)
|
||||
ans = []
|
||||
for id_ in ids:
|
||||
ans.append(self.cache._author_data(id_))
|
||||
return tuple(ans)
|
||||
|
||||
def multisort(self, fields=[], subsort=False):
|
||||
fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields]
|
||||
keys = self.field_metadata.sortable_field_keys()
|
||||
fields = [x for x in fields if x[0] in keys]
|
||||
if subsort and 'sort' not in [x[0] for x in fields]:
|
||||
fields += [('sort', True)]
|
||||
if not fields:
|
||||
fields = [('timestamp', False)]
|
||||
|
||||
sorted_book_ids = self.cache.multisort(fields)
|
||||
sorted_book_ids
|
||||
# TODO: change maps
|
||||
|
||||
|
@ -39,7 +39,7 @@ class ANDROID(USBMS):
|
||||
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
|
||||
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
||||
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
|
||||
0x7086 : [0x0226], 0x70a8: [0x9999],
|
||||
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
||||
},
|
||||
|
||||
# Sony Ericsson
|
||||
@ -47,10 +47,12 @@ class ANDROID(USBMS):
|
||||
|
||||
# Google
|
||||
0x18d1 : {
|
||||
0x0001 : [0x0223],
|
||||
0x4e11 : [0x0100, 0x226, 0x227],
|
||||
0x4e12: [0x0100, 0x226, 0x227],
|
||||
0x4e21: [0x0100, 0x226, 0x227],
|
||||
0xb058: [0x0222, 0x226, 0x227]},
|
||||
0x4e12 : [0x0100, 0x226, 0x227],
|
||||
0x4e21 : [0x0100, 0x226, 0x227],
|
||||
0xb058 : [0x0222, 0x226, 0x227]
|
||||
},
|
||||
|
||||
# Samsung
|
||||
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
|
||||
@ -60,6 +62,7 @@ class ANDROID(USBMS):
|
||||
0x685e : [0x0400],
|
||||
0x6860 : [0x0400],
|
||||
0x6877 : [0x0400],
|
||||
0x689e : [0x0400],
|
||||
},
|
||||
|
||||
# Viewsonic
|
||||
@ -124,7 +127,8 @@ class ANDROID(USBMS):
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
||||
'MB525', 'ANDROID2.3', 'SGH-I997']
|
||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||
'GT-S5830_CARD', 'GT-S5570_CARD']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
|
@ -35,9 +35,9 @@ class EB600(USBMS):
|
||||
PRODUCT_ID = [0x1688]
|
||||
BCD = [0x110]
|
||||
|
||||
VENDOR_NAME = ['NETRONIX', 'WOLDER']
|
||||
WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2']
|
||||
WINDOWS_CARD_A_MEM = 'EBOOK'
|
||||
VENDOR_NAME = ['NETRONIX', 'WOLDER', 'MD86371']
|
||||
WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2', 'MD86371']
|
||||
WINDOWS_CARD_A_MEM = ['EBOOK', 'MD86371']
|
||||
|
||||
OSX_MAIN_MEM = 'EB600 Internal Storage Media'
|
||||
OSX_CARD_A_MEM = 'EB600 Card Storage Media'
|
||||
|
@ -131,7 +131,7 @@ class AZBOOKA(ALEX):
|
||||
description = _('Communicate with the Azbooka')
|
||||
|
||||
VENDOR_NAME = 'LINUX'
|
||||
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
||||
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'Azbooka Internal Memory'
|
||||
|
||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import sqlite3 as sqlite
|
||||
from contextlib import closing
|
||||
|
||||
from calibre.devices.usbms.books import BookList
|
||||
from calibre.devices.kobo.books import Book
|
||||
@ -22,7 +23,7 @@ class KOBO(USBMS):
|
||||
gui_name = 'Kobo Reader'
|
||||
description = _('Communicate with the Kobo Reader')
|
||||
author = 'Timothy Legge'
|
||||
version = (1, 0, 9)
|
||||
version = (1, 0, 10)
|
||||
|
||||
dbversion = 0
|
||||
fwversion = 0
|
||||
@ -48,12 +49,16 @@ class KOBO(USBMS):
|
||||
|
||||
VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo'])
|
||||
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = _('The Kobo supports only one collection '
|
||||
'currently: the \"Im_Reading\" list. Create a tag called \"Im_Reading\" ')+\
|
||||
'for automatic management'
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = [
|
||||
_('The Kobo supports several collections including ')+\
|
||||
'Read, Closed, Im_Reading ' +\
|
||||
_('Create tags for automatic management'),
|
||||
]
|
||||
|
||||
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['tags'])
|
||||
|
||||
OPT_COLLECTIONS = 0
|
||||
|
||||
def initialize(self):
|
||||
USBMS.initialize(self)
|
||||
self.book_class = Book
|
||||
@ -188,7 +193,9 @@ class KOBO(USBMS):
|
||||
traceback.print_exc()
|
||||
return changed
|
||||
|
||||
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
|
||||
with closing(sqlite.connect(
|
||||
self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
# return bytestrings if the content cannot the decoded as unicode
|
||||
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
|
||||
@ -258,7 +265,6 @@ class KOBO(USBMS):
|
||||
need_sync = True
|
||||
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
# Remove books that are no longer in the filesystem. Cache contains
|
||||
# indices into the booklist if book not in filesystem, None otherwise
|
||||
@ -288,7 +294,8 @@ class KOBO(USBMS):
|
||||
# 2) content
|
||||
|
||||
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
|
||||
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
# return bytestrings if the content cannot the decoded as unicode
|
||||
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
|
||||
@ -326,8 +333,14 @@ class KOBO(USBMS):
|
||||
except Exception as e:
|
||||
if 'no such column' not in str(e):
|
||||
raise
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \
|
||||
'where BookID is Null and ContentID =?',t)
|
||||
except Exception as e:
|
||||
if 'no such column' not in str(e):
|
||||
raise
|
||||
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\' ' \
|
||||
'where BookID is Null and ContentID =?',t)
|
||||
|
||||
|
||||
connection.commit()
|
||||
@ -337,7 +350,6 @@ class KOBO(USBMS):
|
||||
print "Error condition ImageID was not found"
|
||||
print "You likely tried to delete a book that the kobo has not yet added to the database"
|
||||
|
||||
connection.close()
|
||||
# If all this succeeds we need to delete the images files via the ImageID
|
||||
return ImageID
|
||||
|
||||
@ -664,7 +676,8 @@ class KOBO(USBMS):
|
||||
# Needs to be outside books collection as in the case of removing
|
||||
# the last book from the collection the list of books is empty
|
||||
# and the removal of the last book would not occur
|
||||
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
# return bytestrings if the content cannot the decoded as unicode
|
||||
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
|
||||
@ -693,10 +706,10 @@ class KOBO(USBMS):
|
||||
if category in readstatuslist.keys():
|
||||
# Manage ReadStatus
|
||||
self.set_readstatus(connection, ContentID, readstatuslist.get(category))
|
||||
if category == 'Shortlist' and self.dbversion >= 14:
|
||||
elif category == 'Shortlist' and self.dbversion >= 14:
|
||||
# Manage FavouritesIndex/Shortlist
|
||||
self.set_favouritesindex(connection, ContentID)
|
||||
if category in accessibilitylist.keys():
|
||||
elif category in accessibilitylist.keys():
|
||||
# Do not manage the Accessibility List
|
||||
pass
|
||||
else: # No collections
|
||||
@ -707,8 +720,6 @@ class KOBO(USBMS):
|
||||
debug_print("No Collections - reseting FavouritesIndex")
|
||||
self.reset_favouritesindex(connection, oncard)
|
||||
|
||||
connection.close()
|
||||
|
||||
# debug_print('Finished update_device_database_collections', collections_attributes)
|
||||
|
||||
def sync_booklists(self, booklists, end_session=True):
|
||||
@ -723,7 +734,7 @@ class KOBO(USBMS):
|
||||
opts = self.settings()
|
||||
if opts.extra_customization:
|
||||
collections = [x.lower().strip() for x in
|
||||
opts.extra_customization.split(',')]
|
||||
opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
|
||||
else:
|
||||
collections = []
|
||||
|
||||
|
@ -351,3 +351,29 @@ class MOOVYBOOK(USBMS):
|
||||
def get_main_ebook_dir(self, for_upload=False):
|
||||
return 'Books' if for_upload else self.EBOOK_DIR_MAIN
|
||||
|
||||
class COBY(USBMS):
|
||||
|
||||
name = 'COBY MP977 device interface'
|
||||
gui_name = 'COBY'
|
||||
description = _('Communicate with the COBY')
|
||||
author = 'Kovid Goyal'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
# Ordered list of supported formats
|
||||
FORMATS = ['epub', 'pdf']
|
||||
|
||||
VENDOR_ID = [0x1e74]
|
||||
PRODUCT_ID = [0x7121]
|
||||
BCD = [0x02]
|
||||
VENDOR_NAME = 'USB_2.0'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MP977_DRIVER'
|
||||
|
||||
EBOOK_DIR_MAIN = ''
|
||||
|
||||
SUPPORTS_SUB_DIRS = False
|
||||
|
||||
def get_carda_ebook_dir(self, for_upload=False):
|
||||
if for_upload:
|
||||
return 'eBooks'
|
||||
return self.EBOOK_DIR_CARD_A
|
||||
|
||||
|
@ -1077,8 +1077,13 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
settings = self.settings()
|
||||
template = self.save_template()
|
||||
if mdata.tags and _('News') in mdata.tags:
|
||||
try:
|
||||
p = mdata.pubdate
|
||||
date = (p.year, p.month, p.day)
|
||||
except:
|
||||
today = time.localtime()
|
||||
template = "{title}_%d-%d-%d" % (today[0], today[1], today[2])
|
||||
date = (today[0], today[1], today[2])
|
||||
template = "{title}_%d-%d-%d" % date
|
||||
use_subdirs = self.SUPPORTS_SUB_DIRS and settings.use_subdirs
|
||||
|
||||
fname = sanitize(fname)
|
||||
|
@ -94,11 +94,29 @@ class USBMS(CLI, Device):
|
||||
self.report_progress(1.0, _('Get device information...'))
|
||||
self.driveinfo = {}
|
||||
if self._main_prefix is not None:
|
||||
try:
|
||||
self.driveinfo['main'] = self._update_driveinfo_file(self._main_prefix, 'main')
|
||||
except (IOError, OSError) as e:
|
||||
raise IOError(_('Failed to access files in the main memory of'
|
||||
' your device. You should contact the device'
|
||||
' manufacturer for support. Common fixes are:'
|
||||
' try a different USB cable/USB port on your computer.'
|
||||
' If you device has a "Reset to factory defaults" type'
|
||||
' of setting somewhere, use it. Underlying error: %s')
|
||||
% e)
|
||||
try:
|
||||
if self._card_a_prefix is not None:
|
||||
self.driveinfo['A'] = self._update_driveinfo_file(self._card_a_prefix, 'A')
|
||||
if self._card_b_prefix is not None:
|
||||
self.driveinfo['B'] = self._update_driveinfo_file(self._card_b_prefix, 'B')
|
||||
except (IOError, OSError) as e:
|
||||
raise IOError(_('Failed to access files on the SD card in your'
|
||||
' device. This can happen for many reasons. The SD card may be'
|
||||
' corrupted, it may be too large for your device, it may be'
|
||||
' write-protected, etc. Try a different SD card, or reformat'
|
||||
' your SD card using the FAT32 filesystem. Also make sure'
|
||||
' there are not too many files in the root of your SD card.'
|
||||
' Underlying error: %s') % e)
|
||||
return (self.get_gui_name(), '', '', '', self.driveinfo)
|
||||
|
||||
def set_driveinfo_name(self, location_code, name):
|
||||
|
@ -8,6 +8,7 @@ from various formats.
|
||||
'''
|
||||
|
||||
import traceback, os, re
|
||||
from cStringIO import StringIO
|
||||
from calibre import CurrentDir
|
||||
|
||||
class ConversionError(Exception):
|
||||
@ -159,7 +160,7 @@ def normalize(x):
|
||||
return x
|
||||
|
||||
def calibre_cover(title, author_string, series_string=None,
|
||||
output_format='jpg', title_size=46, author_size=36):
|
||||
output_format='jpg', title_size=46, author_size=36, logo_path=None):
|
||||
title = normalize(title)
|
||||
author_string = normalize(author_string)
|
||||
series_string = normalize(series_string)
|
||||
@ -167,7 +168,9 @@ def calibre_cover(title, author_string, series_string=None,
|
||||
lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
|
||||
if series_string:
|
||||
lines.append(TextLine(series_string, author_size))
|
||||
return create_cover_page(lines, I('library.png'), output_format='jpg')
|
||||
if logo_path is None:
|
||||
logo_path = I('library.png')
|
||||
return create_cover_page(lines, logo_path, output_format='jpg')
|
||||
|
||||
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
||||
|
||||
@ -207,4 +210,45 @@ def unit_convert(value, base, font, dpi):
|
||||
result = value * 0.40
|
||||
return result
|
||||
|
||||
def generate_masthead(title, output_path=None, width=600, height=60):
|
||||
from calibre.ebooks.conversion.config import load_defaults
|
||||
from calibre.utils.fonts import fontconfig
|
||||
font_path = default_font = P('fonts/liberation/LiberationSerif-Bold.ttf')
|
||||
recs = load_defaults('mobi_output')
|
||||
masthead_font_family = recs.get('masthead_font', 'Default')
|
||||
|
||||
if masthead_font_family != 'Default':
|
||||
masthead_font = fontconfig.files_for_family(masthead_font_family)
|
||||
# Assume 'normal' always in dict, else use default
|
||||
# {'normal': (path_to_font, friendly name)}
|
||||
if 'normal' in masthead_font:
|
||||
font_path = masthead_font['normal'][0]
|
||||
|
||||
if not font_path or not os.access(font_path, os.R_OK):
|
||||
font_path = default_font
|
||||
|
||||
try:
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
Image, ImageDraw, ImageFont
|
||||
except ImportError:
|
||||
import Image, ImageDraw, ImageFont
|
||||
|
||||
img = Image.new('RGB', (width, height), 'white')
|
||||
draw = ImageDraw.Draw(img)
|
||||
try:
|
||||
font = ImageFont.truetype(font_path, 48)
|
||||
except:
|
||||
font = ImageFont.truetype(default_font, 48)
|
||||
text = title.encode('utf-8')
|
||||
width, height = draw.textsize(text, font=font)
|
||||
left = max(int((width - width)/2.), 0)
|
||||
top = max(int((height - height)/2.), 0)
|
||||
draw.text((left, top), text, fill=(0,0,0), font=font)
|
||||
if output_path is None:
|
||||
f = StringIO()
|
||||
img.save(f, 'JPEG')
|
||||
return f.getvalue()
|
||||
else:
|
||||
with open(output_path, 'wb') as f:
|
||||
img.save(f, 'JPEG')
|
||||
|
||||
|
@ -38,8 +38,12 @@ ENCODING_PATS = [
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
def strip_encoding_declarations(raw):
|
||||
limit = 50*1024
|
||||
for pat in ENCODING_PATS:
|
||||
raw = pat.sub('', raw)
|
||||
prefix = raw[:limit]
|
||||
suffix = raw[limit:]
|
||||
prefix = pat.sub('', prefix)
|
||||
raw = prefix + suffix
|
||||
return raw
|
||||
|
||||
def substitute_entites(raw):
|
||||
|
@ -137,7 +137,9 @@ def add_pipeline_options(parser, plumber):
|
||||
'extra_css', 'smarten_punctuation',
|
||||
'margin_top', 'margin_left', 'margin_right',
|
||||
'margin_bottom', 'change_justification',
|
||||
'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size',
|
||||
'insert_blank_line', 'insert_blank_line_size',
|
||||
'remove_paragraph_spacing',
|
||||
'remove_paragraph_spacing_indent_size',
|
||||
'asciiize',
|
||||
]
|
||||
),
|
||||
@ -208,12 +210,13 @@ def add_pipeline_options(parser, plumber):
|
||||
if rec.level < rec.HIGH:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
parser.add_option('--list-recipes', default=False, action='store_true',
|
||||
help=_('List builtin recipes'))
|
||||
|
||||
def option_parser():
|
||||
return OptionParser(usage=USAGE)
|
||||
|
||||
parser = OptionParser(usage=USAGE)
|
||||
parser.add_option('--list-recipes', default=False, action='store_true',
|
||||
help=_('List builtin recipe names. You can create an ebook from '
|
||||
'a builtin recipe like this: ebook-convert "Recipe Name.recipe" '
|
||||
'output.epub'))
|
||||
return parser
|
||||
|
||||
class ProgressBar(object):
|
||||
|
||||
|
@ -366,9 +366,9 @@ OptionRecommendation(name='remove_paragraph_spacing',
|
||||
|
||||
OptionRecommendation(name='remove_paragraph_spacing_indent_size',
|
||||
recommended_value=1.5, level=OptionRecommendation.LOW,
|
||||
help=_('When calibre removes inter paragraph spacing, it automatically '
|
||||
help=_('When calibre removes blank lines between paragraphs, it automatically '
|
||||
'sets a paragraph indent, to ensure that paragraphs can be easily '
|
||||
'distinguished. This option controls the width of that indent.')
|
||||
'distinguished. This option controls the width of that indent (in em).')
|
||||
),
|
||||
|
||||
OptionRecommendation(name='prefer_metadata_cover',
|
||||
@ -384,6 +384,13 @@ OptionRecommendation(name='insert_blank_line',
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='insert_blank_line_size',
|
||||
recommended_value=0.5, level=OptionRecommendation.LOW,
|
||||
help=_('Set the height of the inserted blank lines (in em).'
|
||||
' The height of the lines between paragraphs will be twice the value'
|
||||
' set here.')
|
||||
),
|
||||
|
||||
OptionRecommendation(name='remove_first_image',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Remove the first image from the input ebook. Useful if the '
|
||||
@ -550,7 +557,7 @@ OptionRecommendation(name='delete_blank_paragraphs',
|
||||
OptionRecommendation(name='format_scene_breaks',
|
||||
recommended_value=True, level=OptionRecommendation.LOW,
|
||||
help=_('Left aligned scene break markers are center aligned. '
|
||||
'Replace soft scene breaks that use multiple blank lines with'
|
||||
'Replace soft scene breaks that use multiple blank lines with '
|
||||
'horizontal rules.')),
|
||||
|
||||
OptionRecommendation(name='replace_scene_breaks',
|
||||
@ -602,7 +609,7 @@ OptionRecommendation(name='sr3_replace',
|
||||
input_fmt = os.path.splitext(self.input)[1]
|
||||
if not input_fmt:
|
||||
raise ValueError('Input file must have an extension')
|
||||
input_fmt = input_fmt[1:].lower()
|
||||
input_fmt = input_fmt[1:].lower().replace('original_', '')
|
||||
self.archive_input_tdir = None
|
||||
if input_fmt in ARCHIVE_FMTS:
|
||||
self.log('Processing archive...')
|
||||
@ -1048,6 +1055,7 @@ OptionRecommendation(name='sr3_replace',
|
||||
with self.output_plugin:
|
||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||
self.opts, self.log)
|
||||
self.oeb.clean_temp_files()
|
||||
self.ui_reporter(1.)
|
||||
run_plugins_on_postprocess(self.output, self.output_fmt)
|
||||
|
||||
|
@ -303,6 +303,9 @@ class CSSPreProcessor(object):
|
||||
class HTMLPreProcessor(object):
|
||||
|
||||
PREPROCESS = [
|
||||
# Remove huge block of contiguous spaces as they slow down
|
||||
# the following regexes pretty badly
|
||||
(re.compile(r'\s{10000,}'), lambda m: ''),
|
||||
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
||||
# Put all sorts of crap into <head>. This messes up lxml
|
||||
(re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre import guess_type, walk
|
||||
from calibre import guess_type
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
@ -25,16 +25,50 @@ class HTMLZInput(InputFormatPlugin):
|
||||
accelerators):
|
||||
self.log = log
|
||||
html = u''
|
||||
top_levels = []
|
||||
|
||||
# Extract content from zip archive.
|
||||
zf = ZipFile(stream)
|
||||
zf.extractall()
|
||||
|
||||
for x in walk('.'):
|
||||
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
|
||||
with open(x, 'rb') as tf:
|
||||
html = tf.read()
|
||||
# Find the HTML file in the archive. It needs to be
|
||||
# top level.
|
||||
index = u''
|
||||
multiple_html = False
|
||||
# Get a list of all top level files in the archive.
|
||||
for x in os.listdir('.'):
|
||||
if os.path.isfile(x):
|
||||
top_levels.append(x)
|
||||
# Try to find an index. file.
|
||||
for x in top_levels:
|
||||
if x.lower() in ('index.html', 'index.xhtml', 'index.htm'):
|
||||
index = x
|
||||
break
|
||||
# Look for multiple HTML files in the archive. We look at the
|
||||
# top level files only as only they matter in HTMLZ.
|
||||
for x in top_levels:
|
||||
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
|
||||
# Set index to the first HTML file found if it's not
|
||||
# called index.
|
||||
if not index:
|
||||
index = x
|
||||
else:
|
||||
multiple_html = True
|
||||
# Warn the user if there multiple HTML file in the archive. HTMLZ
|
||||
# supports a single HTML file. A conversion with a multiple HTML file
|
||||
# HTMLZ archive probably won't turn out as the user expects. With
|
||||
# Multiple HTML files ZIP input should be used in place of HTMLZ.
|
||||
if multiple_html:
|
||||
log.warn(_('Multiple HTML files found in the archive. Only %s will be used.') % index)
|
||||
|
||||
if index:
|
||||
with open(index, 'rb') as tf:
|
||||
html = tf.read()
|
||||
else:
|
||||
raise Exception(_('No top level HTML file found.'))
|
||||
|
||||
if not html:
|
||||
raise Exception(_('Top level HTML file %s is empty') % index)
|
||||
|
||||
# Encoding
|
||||
if options.input_encoding:
|
||||
@ -75,7 +109,7 @@ class HTMLZInput(InputFormatPlugin):
|
||||
# Get the cover path from the OPF.
|
||||
cover_path = None
|
||||
opf = None
|
||||
for x in walk('.'):
|
||||
for x in top_levels:
|
||||
if os.path.splitext(x)[1].lower() in ('.opf'):
|
||||
opf = x
|
||||
break
|
||||
|
@ -742,7 +742,7 @@ class Metadata(object):
|
||||
ans += [('ISBN', unicode(self.isbn))]
|
||||
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
|
||||
if self.series:
|
||||
ans += [_('Series'), unicode(self.series) + ' #%s'%self.format_series_index()]
|
||||
ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())]
|
||||
ans += [(_('Language'), unicode(self.language))]
|
||||
if self.timestamp is not None:
|
||||
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
|
||||
|
@ -24,10 +24,9 @@ XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
tostring = partial(etree.tostring, method='text', encoding=unicode)
|
||||
|
||||
def get_metadata(stream):
|
||||
""" Return fb2 metadata as a L{MetaInformation} object """
|
||||
''' Return fb2 metadata as a L{MetaInformation} object '''
|
||||
|
||||
root = _get_fbroot(stream)
|
||||
|
||||
book_title = _parse_book_title(root)
|
||||
authors = _parse_authors(root)
|
||||
|
||||
@ -166,7 +165,7 @@ def _parse_tags(root, mi):
|
||||
break
|
||||
|
||||
def _parse_series(root, mi):
|
||||
#calibri supports only 1 series: use the 1-st one
|
||||
# calibri supports only 1 series: use the 1-st one
|
||||
# pick up sequence but only from 1 secrion in prefered order
|
||||
# except <src-title-info>
|
||||
xp_ti = '//fb2:title-info/fb2:sequence[1]'
|
||||
@ -181,6 +180,7 @@ def _parse_series(root, mi):
|
||||
def _parse_isbn(root, mi):
|
||||
# some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case
|
||||
isbn = XPath('normalize-space(//fb2:publish-info/fb2:isbn/text())')(root)
|
||||
if isbn:
|
||||
# some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case
|
||||
if ',' in isbn:
|
||||
isbn = isbn[:isbn.index(',')]
|
||||
@ -232,4 +232,3 @@ def _get_fbroot(stream):
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
|
||||
root = etree.fromstring(raw, parser=parser)
|
||||
return root
|
||||
|
||||
|
@ -22,6 +22,7 @@ from calibre.utils.date import parse_date, isoformat
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre import prints, guess_type
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
class Resource(object): # {{{
|
||||
'''
|
||||
@ -527,7 +528,12 @@ class OPF(object): # {{{
|
||||
category = MetadataField('type')
|
||||
rights = MetadataField('rights')
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||
if tweaks['use_series_auto_increment_tweak_when_importing']:
|
||||
series_index = MetadataField('series_index', is_dc=False,
|
||||
formatter=float, none_is=None)
|
||||
else:
|
||||
series_index = MetadataField('series_index', is_dc=False,
|
||||
formatter=float, none_is=1)
|
||||
title_sort = TitleSortField('title_sort', is_dc=False)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
pubdate = MetadataField('date', formatter=parse_date,
|
||||
@ -1024,8 +1030,10 @@ class OPF(object): # {{{
|
||||
attrib = attrib or {}
|
||||
attrib['name'] = 'calibre:' + name
|
||||
name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta')
|
||||
nsmap = dict(self.NAMESPACES)
|
||||
del nsmap['opf']
|
||||
elem = etree.SubElement(self.metadata, name, attrib=attrib,
|
||||
nsmap=self.NAMESPACES)
|
||||
nsmap=nsmap)
|
||||
elem.tail = '\n'
|
||||
return elem
|
||||
|
||||
|
@ -22,6 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import utc_tz, as_utc
|
||||
from calibre.utils.html2text import html2text
|
||||
from calibre.utils.icu import lower
|
||||
from calibre.utils.date import UNDEFINED_DATE
|
||||
|
||||
# Download worker {{{
|
||||
class Worker(Thread):
|
||||
@ -490,6 +491,8 @@ def identify(log, abort, # {{{
|
||||
max_tags = msprefs['max_tags']
|
||||
for r in results:
|
||||
r.tags = r.tags[:max_tags]
|
||||
if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year:
|
||||
r.pubdate = None
|
||||
|
||||
if msprefs['swap_author_names']:
|
||||
for r in results:
|
||||
|
@ -151,7 +151,7 @@ class ISBNDB(Source):
|
||||
|
||||
bl = feed.find('BookList')
|
||||
if bl is None:
|
||||
err = tostring(etree.find('errormessage'))
|
||||
err = tostring(feed.find('errormessage'))
|
||||
raise ValueError('ISBNDb query failed:' + err)
|
||||
total_results = int(bl.get('total_results'))
|
||||
shown_results = int(bl.get('shown_results'))
|
||||
|
@ -1,8 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Decompress MOBI files compressed with the Huff/cdic algorithm. Code thanks to darkninja
|
||||
and igorsk.
|
||||
@ -12,82 +16,92 @@ import struct
|
||||
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
|
||||
class BitReader(object):
|
||||
class Reader(object):
|
||||
|
||||
def __init__(self, data):
|
||||
self.data, self.pos, self.nbits = data + "\x00\x00\x00\x00", 0, len(data) * 8
|
||||
def __init__(self):
|
||||
self.q = struct.Struct(b'>Q').unpack_from
|
||||
|
||||
def peek(self, n):
|
||||
r, g = 0, 0
|
||||
while g < n:
|
||||
r, g = (r << 8) | ord(self.data[(self.pos+g)>>3]), g + 8 - ((self.pos+g) & 7)
|
||||
return (r >> (g - n)) & ((1 << n) - 1)
|
||||
def load_huff(self, huff):
|
||||
if huff[0:8] != b'HUFF\x00\x00\x00\x18':
|
||||
raise MobiError('Invalid HUFF header')
|
||||
off1, off2 = struct.unpack_from(b'>LL', huff, 8)
|
||||
|
||||
def eat(self, n):
|
||||
self.pos += n
|
||||
return self.pos <= self.nbits
|
||||
def dict1_unpack(v):
|
||||
codelen, term, maxcode = v&0x1f, v&0x80, v>>8
|
||||
assert codelen != 0
|
||||
if codelen <= 8:
|
||||
assert term
|
||||
maxcode = ((maxcode + 1) << (32 - codelen)) - 1
|
||||
return (codelen, term, maxcode)
|
||||
self.dict1 = map(dict1_unpack, struct.unpack_from(b'>256L', huff, off1))
|
||||
|
||||
def left(self):
|
||||
return self.nbits - self.pos
|
||||
dict2 = struct.unpack_from(b'>64L', huff, off2)
|
||||
self.mincode, self.maxcode = (), ()
|
||||
for codelen, mincode in enumerate((0,) + dict2[0::2]):
|
||||
self.mincode += (mincode << (32 - codelen), )
|
||||
for codelen, maxcode in enumerate((0,) + dict2[1::2]):
|
||||
self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1, )
|
||||
|
||||
self.dictionary = []
|
||||
|
||||
def load_cdic(self, cdic):
|
||||
if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
|
||||
raise MobiError('Invalid CDIC header')
|
||||
phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
|
||||
n = min(1<<bits, phrases-len(self.dictionary))
|
||||
h = struct.Struct(b'>H').unpack_from
|
||||
def getslice(off):
|
||||
blen, = h(cdic, 16+off)
|
||||
slice = cdic[18+off:18+off+(blen&0x7fff)]
|
||||
return (slice, blen&0x8000)
|
||||
self.dictionary += map(getslice, struct.unpack_from(b'>%dH' % n, cdic, 16))
|
||||
|
||||
def unpack(self, data):
|
||||
q = self.q
|
||||
|
||||
bitsleft = len(data) * 8
|
||||
data += b'\x00\x00\x00\x00\x00\x00\x00\x00'
|
||||
pos = 0
|
||||
x, = q(data, pos)
|
||||
n = 32
|
||||
|
||||
s = []
|
||||
while True:
|
||||
if n <= 0:
|
||||
pos += 4
|
||||
x, = q(data, pos)
|
||||
n += 32
|
||||
code = (x >> n) & ((1 << 32) - 1)
|
||||
|
||||
codelen, term, maxcode = self.dict1[code >> 24]
|
||||
if not term:
|
||||
while code < self.mincode[codelen]:
|
||||
codelen += 1
|
||||
maxcode = self.maxcode[codelen]
|
||||
|
||||
n -= codelen
|
||||
bitsleft -= codelen
|
||||
if bitsleft < 0:
|
||||
break
|
||||
|
||||
r = (maxcode - code) >> (32 - codelen)
|
||||
slice_, flag = self.dictionary[r]
|
||||
if not flag:
|
||||
self.dictionary[r] = None
|
||||
slice_ = self.unpack(slice_)
|
||||
self.dictionary[r] = (slice_, 1)
|
||||
s.append(slice_)
|
||||
return b''.join(s)
|
||||
|
||||
class HuffReader(object):
|
||||
|
||||
def __init__(self, huffs):
|
||||
self.huffs = huffs
|
||||
self.reader = Reader()
|
||||
self.reader.load_huff(huffs[0])
|
||||
for cdic in huffs[1:]:
|
||||
self.reader.load_cdic(cdic)
|
||||
|
||||
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
||||
raise MobiError('Invalid HUFF header')
|
||||
def unpack(self, section):
|
||||
return self.reader.unpack(section)
|
||||
|
||||
if huffs[1][0:4] != 'CDIC' or huffs[1][4:8] != '\x00\x00\x00\x10':
|
||||
raise ValueError('Invalid CDIC header')
|
||||
|
||||
self.entry_bits, = struct.unpack('>L', huffs[1][12:16])
|
||||
off1,off2 = struct.unpack('>LL', huffs[0][16:24])
|
||||
self.dict1 = struct.unpack('<256L', huffs[0][off1:off1+256*4])
|
||||
self.dict2 = struct.unpack('<64L', huffs[0][off2:off2+64*4])
|
||||
self.dicts = huffs[1:]
|
||||
self.r = ''
|
||||
|
||||
def _unpack(self, bits, depth = 0):
|
||||
if depth > 32:
|
||||
raise MobiError('Corrupt file')
|
||||
|
||||
while bits.left():
|
||||
dw = bits.peek(32)
|
||||
v = self.dict1[dw >> 24]
|
||||
codelen = v & 0x1F
|
||||
assert codelen != 0
|
||||
code = dw >> (32 - codelen)
|
||||
r = (v >> 8)
|
||||
if not (v & 0x80):
|
||||
while code < self.dict2[(codelen-1)*2]:
|
||||
codelen += 1
|
||||
code = dw >> (32 - codelen)
|
||||
r = self.dict2[(codelen-1)*2+1]
|
||||
r -= code
|
||||
assert codelen != 0
|
||||
if not bits.eat(codelen):
|
||||
return
|
||||
dicno = r >> self.entry_bits
|
||||
off1 = 16 + (r - (dicno << self.entry_bits)) * 2
|
||||
dic = self.dicts[dicno]
|
||||
off2 = 16 + ord(dic[off1]) * 256 + ord(dic[off1+1])
|
||||
blen = ord(dic[off2]) * 256 + ord(dic[off2+1])
|
||||
slice = dic[off2+2:off2+2+(blen&0x7fff)]
|
||||
if blen & 0x8000:
|
||||
self.r += slice
|
||||
else:
|
||||
self._unpack(BitReader(slice), depth + 1)
|
||||
|
||||
def unpack(self, data):
|
||||
self.r = ''
|
||||
self._unpack(BitReader(data))
|
||||
return self.r
|
||||
|
||||
def decompress(self, sections):
|
||||
r = ''
|
||||
for data in sections:
|
||||
r += self.unpack(data)
|
||||
if r.endswith('#'):
|
||||
r = r[:-1]
|
||||
return r
|
||||
|
86
src/calibre/ebooks/mobi/kindlegen.py
Normal file
@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, subprocess, shutil, tempfile
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.constants import iswindows
|
||||
from calibre.customize.ui import plugin_for_output_format
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.mobi.utils import detect_periodical
|
||||
from calibre import CurrentDir
|
||||
|
||||
exe = 'kindlegen.exe' if iswindows else 'kindlegen'
|
||||
|
||||
def refactor_opf(opf, is_periodical, toc):
|
||||
with open(opf, 'rb') as f:
|
||||
root = etree.fromstring(f.read())
|
||||
'''
|
||||
for spine in root.xpath('//*[local-name() = "spine" and @toc]'):
|
||||
# Do not use the NCX toc as kindlegen requires the section structure
|
||||
# in the TOC to be duplicated in the HTML, asinine!
|
||||
del spine.attrib['toc']
|
||||
'''
|
||||
if is_periodical:
|
||||
metadata = root.xpath('//*[local-name() = "metadata"]')[0]
|
||||
xm = etree.SubElement(metadata, 'x-metadata')
|
||||
xm.tail = '\n'
|
||||
xm.text = '\n\t'
|
||||
mobip = etree.SubElement(xm, 'output', attrib={'encoding':"utf-8",
|
||||
'content-type':"application/x-mobipocket-subscription-magazine"})
|
||||
mobip.tail = '\n\t'
|
||||
with open(opf, 'wb') as f:
|
||||
f.write(etree.tostring(root, method='xml', encoding='utf-8',
|
||||
xml_declaration=True))
|
||||
|
||||
|
||||
def refactor_guide(oeb):
|
||||
for key in list(oeb.guide):
|
||||
if key not in ('toc', 'start', 'masthead'):
|
||||
oeb.guide.remove(key)
|
||||
|
||||
def run_kindlegen(opf, log):
|
||||
log.info('Running kindlegen on MOBIML created by calibre')
|
||||
oname = os.path.splitext(opf)[0] + '.mobi'
|
||||
p = subprocess.Popen([exe, opf, '-c1', '-verbose', '-o', oname],
|
||||
stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
|
||||
ko = p.stdout.read()
|
||||
returncode = p.wait()
|
||||
log.debug('kindlegen verbose output:')
|
||||
log.debug(ko.decode('utf-8', 'replace'))
|
||||
log.info('kindlegen returned returncode: %d'%returncode)
|
||||
if not os.path.exists(oname) or os.stat(oname).st_size < 100:
|
||||
raise RuntimeError('kindlegen did not produce any output. '
|
||||
'kindlegen return code: %d'%returncode)
|
||||
return oname
|
||||
|
||||
def kindlegen(oeb, opts, input_plugin, output_path):
|
||||
is_periodical = detect_periodical(oeb.toc, oeb.log)
|
||||
refactor_guide(oeb)
|
||||
with TemporaryDirectory('_kindlegen_output') as tdir:
|
||||
oeb_output = plugin_for_output_format('oeb')
|
||||
oeb_output.convert(oeb, tdir, input_plugin, opts, oeb.log)
|
||||
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||
refactor_opf(os.path.join(tdir, opf), is_periodical, oeb.toc)
|
||||
try:
|
||||
td = tempfile.gettempdir()
|
||||
kd = os.path.join(td, 'kindlegen')
|
||||
if os.path.exists(kd):
|
||||
shutil.rmtree(kd)
|
||||
shutil.copytree(tdir, kd)
|
||||
oeb.log('kindlegen intermediate output stored in: %s'%kd)
|
||||
except:
|
||||
pass
|
||||
|
||||
with CurrentDir(tdir):
|
||||
oname = run_kindlegen(opf, oeb.log)
|
||||
shutil.copyfile(oname, output_path)
|
||||
|
||||
|
@ -532,7 +532,7 @@ class MobiMLizer(object):
|
||||
bstate.pbreak = True
|
||||
if isblock:
|
||||
para = bstate.para
|
||||
if para is not None and para.text == u'\xa0':
|
||||
if para is not None and para.text == u'\xa0' and len(para) < 1:
|
||||
para.getparent().replace(para, etree.Element(XHTML('br')))
|
||||
bstate.para = None
|
||||
bstate.istate = None
|
||||
|
@ -27,7 +27,7 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
),
|
||||
OptionRecommendation(name='no_inline_toc',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Don\'t add Table of Contents to end of book. Useful if '
|
||||
help=_('Don\'t add Table of Contents to the book. Useful if '
|
||||
'the book has its own table of contents.')),
|
||||
OptionRecommendation(name='toc_title', recommended_value=None,
|
||||
help=_('Title for any generated in-line table of contents.')
|
||||
@ -45,6 +45,30 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
'the MOBI output plugin will try to convert margins specified'
|
||||
' in the input document, otherwise it will ignore them.')
|
||||
),
|
||||
OptionRecommendation(name='mobi_toc_at_start',
|
||||
recommended_value=False,
|
||||
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
||||
'book instead of the end. Not recommended.')
|
||||
),
|
||||
OptionRecommendation(name='extract_to', recommended_value=None,
|
||||
help=_('Extract the contents of the MOBI file to the'
|
||||
' specified directory. If the directory already '
|
||||
'exists, it will be deleted.')
|
||||
),
|
||||
OptionRecommendation(name='mobi_navpoints_only_deepest',
|
||||
recommended_value=False,
|
||||
help=_('When adding navpoints for the chapter-to-chapter'
|
||||
' navigation on the kindle, use only the lowest level '
|
||||
'of items in the TOC, instead of items at every level.')
|
||||
),
|
||||
|
||||
OptionRecommendation(name='kindlegen',
|
||||
recommended_value=False,
|
||||
help=('Use kindlegen (must be in your PATH) to generate the'
|
||||
' binary wrapper for the MOBI format. Useful to debug '
|
||||
' the calibre MOBI output.')
|
||||
),
|
||||
|
||||
])
|
||||
|
||||
def check_for_periodical(self):
|
||||
@ -76,26 +100,6 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
else:
|
||||
self.oeb.log.debug('Using mastheadImage supplied in manifest...')
|
||||
|
||||
|
||||
def dump_toc(self, toc) :
|
||||
self.log( "\n >>> TOC contents <<<")
|
||||
self.log( " toc.title: %s" % toc.title)
|
||||
self.log( " toc.href: %s" % toc.href)
|
||||
for periodical in toc.nodes :
|
||||
self.log( "\tperiodical title: %s" % periodical.title)
|
||||
self.log( "\t href: %s" % periodical.href)
|
||||
for section in periodical :
|
||||
self.log( "\t\tsection title: %s" % section.title)
|
||||
self.log( "\t\tfirst article: %s" % section.href)
|
||||
for article in section :
|
||||
self.log( "\t\t\tarticle title: %s" % repr(article.title))
|
||||
self.log( "\t\t\t href: %s" % article.href)
|
||||
|
||||
def dump_manifest(self) :
|
||||
self.log( "\n >>> Manifest entries <<<")
|
||||
for href in self.oeb.manifest.hrefs :
|
||||
self.log ("\t%s" % href)
|
||||
|
||||
def periodicalize_toc(self):
|
||||
from calibre.ebooks.oeb.base import TOC
|
||||
toc = self.oeb.toc
|
||||
@ -150,24 +154,16 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
# Fix up the periodical href to point to first section href
|
||||
toc.nodes[0].href = toc.nodes[0].nodes[0].href
|
||||
|
||||
# GR diagnostics
|
||||
if self.opts.verbose > 3:
|
||||
self.dump_toc(toc)
|
||||
self.dump_manifest()
|
||||
|
||||
|
||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||
self.log, self.opts, self.oeb = log, opts, oeb
|
||||
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
|
||||
MobiWriter, PALMDOC, UNCOMPRESSED
|
||||
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
||||
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||
if not opts.no_inline_toc:
|
||||
tocadder = HTMLTOCAdder(title=opts.toc_title)
|
||||
tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if
|
||||
opts.mobi_toc_at_start else 'end')
|
||||
tocadder(oeb, opts)
|
||||
mangler = CaseMangler()
|
||||
mangler(oeb, opts)
|
||||
@ -179,10 +175,23 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||
mobimlizer(oeb, opts)
|
||||
self.check_for_periodical()
|
||||
write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
|
||||
writer = MobiWriter(opts, imagemax=imagemax,
|
||||
compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
|
||||
prefer_author_sort=opts.prefer_author_sort,
|
||||
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
|
||||
from calibre.utils.config import tweaks
|
||||
if tweaks.get('new_mobi_writer', False):
|
||||
from calibre.ebooks.mobi.writer2.main import MobiWriter
|
||||
MobiWriter
|
||||
else:
|
||||
from calibre.ebooks.mobi.writer import MobiWriter
|
||||
if opts.kindlegen:
|
||||
from calibre.ebooks.mobi.kindlegen import kindlegen
|
||||
kindlegen(oeb, opts, input_plugin, output_path)
|
||||
else:
|
||||
writer = MobiWriter(opts,
|
||||
write_page_breaks_after_item=write_page_breaks_after_item)
|
||||
writer(oeb, output_path)
|
||||
|
||||
if opts.extract_to is not None:
|
||||
from calibre.ebooks.mobi.debug import inspect_mobi
|
||||
ddir = opts.extract_to
|
||||
inspect_mobi(output_path, ddir=ddir)
|
||||
|
||||
|
@ -859,16 +859,19 @@ class MobiReader(object):
|
||||
processed_records += list(range(self.book_header.huff_offset,
|
||||
self.book_header.huff_offset + self.book_header.huff_number))
|
||||
huff = HuffReader(huffs)
|
||||
self.mobi_html = huff.decompress(text_sections)
|
||||
unpack = huff.unpack
|
||||
|
||||
elif self.book_header.compression_type == '\x00\x02':
|
||||
for section in text_sections:
|
||||
self.mobi_html += decompress_doc(section)
|
||||
unpack = decompress_doc
|
||||
|
||||
elif self.book_header.compression_type == '\x00\x01':
|
||||
self.mobi_html = ''.join(text_sections)
|
||||
unpack = lambda x: x
|
||||
else:
|
||||
raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type))
|
||||
self.mobi_html = b''.join(map(unpack, text_sections))
|
||||
if self.mobi_html.endswith(b'#'):
|
||||
self.mobi_html = self.mobi_html[:-1]
|
||||
|
||||
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
||||
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
|
||||
self.mobi_html = self.mobi_html.replace('\0', '')
|
||||
@ -933,6 +936,9 @@ class MobiReader(object):
|
||||
continue
|
||||
processed_records.append(i)
|
||||
data = self.sections[i][0]
|
||||
if data[:4] in (b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n'):
|
||||
# A FLIS, FCIS, SRCS or EOF record, ignore
|
||||
continue
|
||||
buf = cStringIO.StringIO(data)
|
||||
image_index += 1
|
||||
try:
|
||||
|
363
src/calibre/ebooks/mobi/tbs_periodicals.rst
Normal file
@ -0,0 +1,363 @@
|
||||
Reverse engineering the trailing byte sequences for hierarchical periodicals
|
||||
===============================================================================
|
||||
|
||||
In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing.
|
||||
|
||||
Sequence encoding:
|
||||
|
||||
0b1000 : Continuation bit
|
||||
|
||||
First sequences:
|
||||
0b0010 : 80
|
||||
0b0011 : 80 80
|
||||
0b0110 : 80 2
|
||||
0b0111 : 80 2 80
|
||||
|
||||
Other sequences:
|
||||
0b0101 : 4 1a
|
||||
0b0001 : c b1
|
||||
|
||||
Opening record
|
||||
----------------
|
||||
|
||||
The text record that contains the opening node for the periodical (depth=0 node in the NCX) can have TBS of 3 different forms:
|
||||
|
||||
1. If it has only the periodical node and no section/article nodes, TBS of type 2, like this::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 1 index entries (0 ends, 0 complete, 1 starts)
|
||||
TBS bytes: 82 80
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader]
|
||||
TBS Type: 010 (2)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
|
||||
2. A periodical and a section node, but no article nodes, TBS type of 6, like this::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 2 index entries (0 ends, 0 complete, 2 starts)
|
||||
TBS bytes: 86 80 2
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 93254) [j_x's Google reader]
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 49280) [Ars Technica]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
|
||||
3. If it has both the section 1 node and at least one article node, TBS of type 6, like this::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 4 index entries (0 ends, 1 complete, 3 starts)
|
||||
TBS bytes: 86 80 2 c4 2
|
||||
Complete:
|
||||
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 549, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz]
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 79253) [j_x's Google reader]
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 35279) [Ars Technica]
|
||||
Index Entry: 6 (Parent index: 1, Depth: 2, Offset: 2415, Size: 2764) [Week in Apple: ZFS on Mac OS X, rogue tethering, DUI apps, and more]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
|
||||
Number of article nodes in the record (byte): 2
|
||||
|
||||
If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record.
|
||||
|
||||
Starting record with two section transitions::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 7 index entries (0 ends, 4 complete, 3 starts)
|
||||
TBS bytes: 86 80 2 c0 b8 c4 3
|
||||
Complete:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
|
||||
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
|
||||
Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 1014) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 7 (Parent index: 2, Depth: 2, Offset: 1961, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD]
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 35372) [j_x's Google reader]
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 10368) [Neowin.net]
|
||||
Index Entry: 8 (Parent index: 2, Depth: 2, Offset: 3038, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
|
||||
Remaining bytes: b8 c4 3
|
||||
|
||||
Starting record with three section transitions::
|
||||
|
||||
Record #1: Starts at: 0 Ends at: 4095
|
||||
Contains: 10 index entries (0 ends, 7 complete, 3 starts)
|
||||
TBS bytes: 86 80 2 c0 b8 c0 b8 c4 4
|
||||
Complete:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica]
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 316) [Neowin.net]
|
||||
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz]
|
||||
Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 308) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 7 (Parent index: 3, Depth: 2, Offset: 1263, Size: 760) [OSnews Asks on Interrupts: The Results]
|
||||
Index Entry: 8 (Parent index: 3, Depth: 2, Offset: 2023, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement]
|
||||
Index Entry: 9 (Parent index: 3, Depth: 2, Offset: 2716, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents]
|
||||
Starts:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 25320) [j_x's Google reader]
|
||||
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 1255, Size: 6829) [OSNews]
|
||||
Index Entry: 10 (Parent index: 3, Depth: 2, Offset: 3463, Size: 666) [Transparent Monitor Embedded in Window Glass]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
|
||||
Remaining bytes: b8 c0 b8 c4 4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Records with no nodes
|
||||
------------------------
|
||||
|
||||
subtype = 010
|
||||
|
||||
These records are spanned by a single article. They are of two types:
|
||||
|
||||
1. If the parent section index is 1, TBS type of 6, like this::
|
||||
|
||||
Record #4: Starts at: 12288 Ends at: 16383
|
||||
Contains: 0 index entries (0 ends, 0 complete, 0 starts)
|
||||
TBS bytes: 86 80 2 c1 80
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute]
|
||||
EOF (vwi: should be 0): 0
|
||||
|
||||
If the record is before the first article, the TBS bytes would be: 86 80 2
|
||||
|
||||
2. If the parent section index is > 1, TBS type of 2, like this::
|
||||
|
||||
Record #14: Starts at: 53248 Ends at: 57343
|
||||
Contains: 0 index entries (0 ends, 0 complete, 0 starts)
|
||||
TBS bytes: 82 80 a0 1 e1 80
|
||||
TBS Type: 010 (2)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Parent section index (fvwi): 2
|
||||
Flags: 0
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 14 [16 absolute]
|
||||
EOF (vwi: should be 0): 0
|
||||
|
||||
Records with only article nodes
|
||||
-----------------------------------
|
||||
|
||||
Such records have no section transitions (i.e. a section end/section start pair). They have only one or more article nodes. They are of two types:
|
||||
|
||||
1. If the parent section index is 1, TBS type of 7, like this::
|
||||
|
||||
Record #6: Starts at: 20480 Ends at: 24575
|
||||
Contains: 2 index entries (1 ends, 0 complete, 1 starts)
|
||||
TBS bytes: 87 80 2 80 1 84 2
|
||||
Ends:
|
||||
Index Entry: 9 (Parent index: 1, Depth: 2, Offset: 16453, Size: 4199) [Vaccine's success spurs whooping cough comeback]
|
||||
Starts:
|
||||
Index Entry: 10 (Parent index: 1, Depth: 2, Offset: 20652, Size: 4246) [Apple's mobile products do not violate Nokia patents, says ITC]
|
||||
TBS Type: 111 (7)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown: '\x02\x80' (vwi?: Always 256)
|
||||
Article at start of record (fvwi): 8
|
||||
Number of articles in record (byte): 2
|
||||
|
||||
If there was only one article in the record, the last two bytes would be replaced by a single byte: 80
|
||||
|
||||
If this record is the first record with an article, then the article at the start of the record should be the last section index. At least, that's what kindlegen does, though if you ask me, it should be the first section index.
|
||||
|
||||
|
||||
2. If the parent section index is > 1, TBS type of 2, like this::
|
||||
|
||||
Record #16: Starts at: 61440 Ends at: 65535
|
||||
Contains: 5 index entries (1 ends, 3 complete, 1 starts)
|
||||
TBS bytes: 82 80 a1 80 1 f4 5
|
||||
Ends:
|
||||
Index Entry: 17 (Parent index: 2, Depth: 2, Offset: 60920, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
|
||||
Complete:
|
||||
Index Entry: 18 (Parent index: 2, Depth: 2, Offset: 62002, Size: 1016) [Rumour: OS X Lion nearing Golden Master stage]
|
||||
Index Entry: 19 (Parent index: 2, Depth: 2, Offset: 63018, Size: 1045) [iOS 4.3.1 released]
|
||||
Index Entry: 20 (Parent index: 2, Depth: 2, Offset: 64063, Size: 972) [Windows 8 'system reset' image leaks]
|
||||
Starts:
|
||||
Index Entry: 21 (Parent index: 2, Depth: 2, Offset: 65035, Size: 1057) [Windows Phone 7: Why it's failing]
|
||||
TBS Type: 010 (2)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Parent section index (fvwi) : 2
|
||||
Flags: 1
|
||||
Unknown (vwi: always 0?): 0
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 15 [17 absolute]
|
||||
Number of article nodes in the record (byte): 5
|
||||
|
||||
If there was only one article in the record, the last two bytes would be replaced by a single byte: f0
|
||||
|
||||
Records with a section transition
|
||||
-----------------------------------
|
||||
|
||||
In such a record there is a transition from one section to the next. As such the record must have at least one article ending and one article starting, except in the case of the first section.
|
||||
|
||||
1. The first section::
|
||||
|
||||
Record #2: Starts at: 4096 Ends at: 8191
|
||||
Contains: 2 index entries (0 ends, 0 complete, 2 starts)
|
||||
TBS bytes: 83 80 80 90 c0
|
||||
Starts:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
|
||||
Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 7766, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz]
|
||||
TBS Type: 011 (3)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
First section index (fvwi) : 1
|
||||
Extra bits: 0
|
||||
First section starts
|
||||
Article at start of block as offset from parent index (fvwi): 4 [5 absolute]
|
||||
Flags: 0
|
||||
|
||||
If there was more than one article at the start then the last byte would be replaced by: c4 n where n is the number of articles
|
||||
|
||||
2. A record with a section transition and only one article from the ending section::
|
||||
|
||||
Record #9: Starts at: 32768 Ends at: 36863
|
||||
Contains: 6 index entries (2 ends, 2 complete, 2 starts)
|
||||
TBS bytes: 83 80 80 90 1 d0 1 c8 1 d4 3
|
||||
Ends:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
|
||||
Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
|
||||
Complete:
|
||||
Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 1014) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 16 (Parent index: 2, Depth: 2, Offset: 35059, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD]
|
||||
Starts:
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 10368) [Neowin.net]
|
||||
Index Entry: 17 (Parent index: 2, Depth: 2, Offset: 36136, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware]
|
||||
TBS Type: 011 (3)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
First section index (fvwi): 1
|
||||
Extra bits (flag: always 0?): 0
|
||||
First article of ending section, relative to its parent's index (fvwi): 13 [14 absolute]
|
||||
Last article of ending section w.r.t. starting section offset (fvwi): 12 [14 absolute]
|
||||
Flags (always 8?): 8
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 13 [15 absolute]
|
||||
Number of article nodes in the record (byte): 3
|
||||
|
||||
3. A record with a section transition and more than one article from the ending section::
|
||||
|
||||
Record #11: Starts at: 40960 Ends at: 45055
|
||||
Contains: 7 index entries (2 ends, 3 complete, 2 starts)
|
||||
TBS bytes: 83 80 80 a0 2 b5 4 1a f5 2 d8 2 e0
|
||||
Ends:
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 10368) [Neowin.net]
|
||||
Index Entry: 21 (Parent index: 2, Depth: 2, Offset: 40251, Size: 1057) [Windows Phone 7: Why it's failing]
|
||||
Complete:
|
||||
Index Entry: 22 (Parent index: 2, Depth: 2, Offset: 41308, Size: 1050) [RIM announces Android app support for Blackberry Playbook]
|
||||
Index Entry: 23 (Parent index: 2, Depth: 2, Offset: 42358, Size: 1087) [Microsoft buys $7.5m worth of IPv4 addresses]
|
||||
Index Entry: 24 (Parent index: 2, Depth: 2, Offset: 43445, Size: 960) [TechSpot: Apple iPad 2 Review]
|
||||
Starts:
|
||||
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 44405, Size: 6829) [OSNews]
|
||||
Index Entry: 25 (Parent index: 3, Depth: 2, Offset: 44413, Size: 760) [OSnews Asks on Interrupts: The Results]
|
||||
TBS Type: 011 (3)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
First section index (fvwi): 2
|
||||
Extra bits (flag: always 0?): 0
|
||||
First article of ending section, relative to its parent's index (fvwi): 19 [21 absolute]
|
||||
Number of article nodes in the record (byte): 4
|
||||
->Offset from start of record to beginning of last starting section in this record (vwi)): 3445
|
||||
Last article of ending section w.r.t. starting section offset (fvwi): 21 [24 absolute]
|
||||
Flags (always 8?): 8
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 22 [25 absolute]
|
||||
|
||||
The difference to the previous case is the extra two bytes that encode the offset of the opening section from the start of the record.
|
||||
|
||||
4. A record with multiple section transitions::
|
||||
|
||||
Record #9: Starts at: 32768 Ends at: 36863
|
||||
Contains: 9 index entries (2 ends, 5 complete, 2 starts)
|
||||
TBS bytes: 83 80 80 90 1 d0 1 c8 1 d1 c b1 1 c8 1 d4 4
|
||||
Ends:
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
|
||||
Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
|
||||
Complete:
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net]
|
||||
Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 760) [OSnews Asks on Interrupts: The Results]
|
||||
Index Entry: 17 (Parent index: 3, Depth: 2, Offset: 35121, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement]
|
||||
Index Entry: 18 (Parent index: 3, Depth: 2, Offset: 35814, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents]
|
||||
Starts:
|
||||
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 6829) [OSNews]
|
||||
Index Entry: 19 (Parent index: 3, Depth: 2, Offset: 36561, Size: 666) [Transparent Monitor Embedded in Window Glass]
|
||||
TBS Type: 011 (3)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
First section index (fvwi): 1
|
||||
Extra bits (flag: always 0?): 0
|
||||
First article of ending section, relative to its parent's index (fvwi): 13 [14 absolute]
|
||||
Last article of ending section w.r.t. starting section offset (fvwi): 12 [14 absolute]
|
||||
Flags (always 8?): 8
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 13 [15 absolute]
|
||||
->Offset from start of record to beginning ofnext starting section in this record: 1585
|
||||
Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute]
|
||||
Flags (always 8?): 8
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute]
|
||||
Number of article nodes in the record belonging ot the last section (byte): 4
|
||||
|
||||
|
||||
Ending record
|
||||
----------------
|
||||
|
||||
Logically, ending records must have at least one article ending, one section ending and the periodical ending. They are of TBS type 2, like this::
|
||||
|
||||
Record #17: Starts at: 65536 Ends at: 68684
|
||||
Contains: 4 index entries (3 ends, 1 complete, 0 starts)
|
||||
TBS bytes: 82 80 c0 4 f4 2
|
||||
Ends:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader]
|
||||
Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 51234, Size: 17451) [Slashdot]
|
||||
Index Entry: 43 (Parent index: 4, Depth: 2, Offset: 65422, Size: 1717) [US ITC May Reverse Judge's Ruling In Kodak vs. Apple]
|
||||
Complete:
|
||||
Index Entry: 44 (Parent index: 4, Depth: 2, Offset: 67139, Size: 1546) [Google Starts Testing Google Music Internally]
|
||||
TBS Type: 010 (2)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Parent section index (fvwi): 4
|
||||
Flags: 0
|
||||
Article at start of block as offset from parent index (fvwi): 39 [43 absolute]
|
||||
Number of nodes (byte): 2
|
||||
|
||||
If the record had only a single article end, the last two bytes would be replaced with: f0
|
||||
|
||||
If the last record has multiple section transitions, it is of type 6 and looks like::
|
||||
|
||||
Record #9: Starts at: 32768 Ends at: 34953
|
||||
Contains: 9 index entries (3 ends, 6 complete, 0 starts)
|
||||
TBS bytes: 86 80 2 1 d0 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
|
||||
Ends:
|
||||
Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 34739) [j_x's Google reader]
|
||||
Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica]
|
||||
Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe]
|
||||
Complete:
|
||||
Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net]
|
||||
Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 282) [OSNews]
|
||||
Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 34635, Size: 319) [Slashdot]
|
||||
Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review]
|
||||
Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 274) [OSnews Asks on Interrupts: The Results]
|
||||
Index Entry: 17 (Parent index: 4, Depth: 2, Offset: 34643, Size: 311) [Leonard Nimoy Turns 80]
|
||||
TBS Type: 110 (6)
|
||||
Outer Index entry: 0
|
||||
Unknown (vwi: always 0?): 0
|
||||
Unknown (byte: always 2?): 2
|
||||
Article index at start of record or first article index, relative to parent section (fvwi): 13 [14 absolute]
|
||||
Remaining bytes: 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0
|
||||
|
341
src/calibre/ebooks/mobi/utils.py
Normal file
@ -0,0 +1,341 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||
from calibre.ebooks import normalize
|
||||
|
||||
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
||||
|
||||
def decode_hex_number(raw):
|
||||
'''
|
||||
Return a variable length number encoded using hexadecimal encoding. These
|
||||
numbers have the first byte which tells the number of bytes that follow.
|
||||
The bytes that follow are simply the hexadecimal representation of the
|
||||
number.
|
||||
|
||||
:param raw: Raw binary data as a bytestring
|
||||
|
||||
:return: The number and the number of bytes from raw that the number
|
||||
occupies
|
||||
'''
|
||||
length, = struct.unpack(b'>B', raw[0])
|
||||
raw = raw[1:1+length]
|
||||
consumed = length+1
|
||||
return int(raw, 16), consumed
|
||||
|
||||
def encode_number_as_hex(num):
|
||||
'''
|
||||
Encode num as a variable length encoded hexadecimal number. Returns the
|
||||
bytestring containing the encoded number. These
|
||||
numbers have the first byte which tells the number of bytes that follow.
|
||||
The bytes that follow are simply the hexadecimal representation of the
|
||||
number.
|
||||
'''
|
||||
num = bytes(hex(num)[2:].upper())
|
||||
nlen = len(num)
|
||||
if nlen % 2 != 0:
|
||||
num = b'0'+num
|
||||
ans = bytearray(num)
|
||||
ans.insert(0, len(num))
|
||||
return bytes(ans)
|
||||
|
||||
def encint(value, forward=True):
|
||||
'''
|
||||
Some parts of the Mobipocket format encode data as variable-width integers.
|
||||
These integers are represented big-endian with 7 bits per byte in bits 1-7.
|
||||
They may be either forward-encoded, in which case only the first byte has bit 8 set,
|
||||
or backward-encoded, in which case only the last byte has bit 8 set.
|
||||
For example, the number 0x11111 = 0b10001000100010001 would be represented
|
||||
forward-encoded as:
|
||||
|
||||
0x04 0x22 0x91 = 0b100 0b100010 0b10010001
|
||||
|
||||
And backward-encoded as:
|
||||
|
||||
0x84 0x22 0x11 = 0b10000100 0b100010 0b10001
|
||||
|
||||
This function encodes the integer ``value`` as a variable width integer and
|
||||
returns the bytestring corresponding to it.
|
||||
|
||||
If forward is True the bytes returned are suitable for prepending to the
|
||||
output buffer, otherwise they must be append to the output buffer.
|
||||
'''
|
||||
if value < 0:
|
||||
raise ValueError('Cannot encode negative numbers as vwi')
|
||||
# Encode vwi
|
||||
byts = bytearray()
|
||||
while True:
|
||||
b = value & 0b01111111
|
||||
value >>= 7 # shift value to the right by 7 bits
|
||||
|
||||
byts.append(b)
|
||||
if value == 0:
|
||||
break
|
||||
byts[0 if forward else -1] |= 0b10000000
|
||||
byts.reverse()
|
||||
return bytes(byts)
|
||||
|
||||
def decint(raw, forward=True):
|
||||
'''
|
||||
Read a variable width integer from the bytestring or bytearray raw and return the
|
||||
integer and the number of bytes read. If forward is True bytes are read
|
||||
from the start of raw, otherwise from the end of raw.
|
||||
|
||||
This function is the inverse of encint above, see its docs for more
|
||||
details.
|
||||
'''
|
||||
val = 0
|
||||
byts = bytearray()
|
||||
src = bytearray(raw)
|
||||
if not forward:
|
||||
src.reverse()
|
||||
for bnum in src:
|
||||
byts.append(bnum & 0b01111111)
|
||||
if bnum & 0b10000000:
|
||||
break
|
||||
if not forward:
|
||||
byts.reverse()
|
||||
for byte in byts:
|
||||
val <<= 7 # Shift value to the left by 7 bits
|
||||
val |= byte
|
||||
|
||||
return val, len(byts)
|
||||
|
||||
def test_decint(num):
|
||||
for d in (True, False):
|
||||
raw = encint(num, forward=d)
|
||||
sz = len(raw)
|
||||
if (num, sz) != decint(raw, forward=d):
|
||||
raise ValueError('Failed for num %d, forward=%r: %r != %r' % (
|
||||
num, d, (num, sz), decint(raw, forward=d)))
|
||||
|
||||
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
|
||||
'''
|
||||
Convert image setting all transparent pixels to white and changing format
|
||||
to JPEG. Ensure the resultant image has a byte size less than
|
||||
maxsizeb.
|
||||
|
||||
If dimen is not None, generate a thumbnail of width=dimen, height=dimen
|
||||
|
||||
Returns the image as a bytestring
|
||||
'''
|
||||
if dimen is not None:
|
||||
data = thumbnail(data, width=dimen, height=dimen,
|
||||
compression_quality=90)[-1]
|
||||
else:
|
||||
# Replace transparent pixels with white pixels and convert to JPEG
|
||||
data = save_cover_data_to(data, 'img.jpg', return_data=True)
|
||||
if len(data) <= maxsizeb:
|
||||
return data
|
||||
orig_data = data
|
||||
img = Image()
|
||||
quality = 95
|
||||
|
||||
img.load(data)
|
||||
while len(data) >= maxsizeb and quality >= 10:
|
||||
quality -= 5
|
||||
img.set_compression_quality(quality)
|
||||
data = img.export('jpg')
|
||||
if len(data) <= maxsizeb:
|
||||
return data
|
||||
orig_data = data
|
||||
|
||||
scale = 0.9
|
||||
while len(data) >= maxsizeb and scale >= 0.05:
|
||||
img = Image()
|
||||
img.load(orig_data)
|
||||
w, h = img.size
|
||||
img.size = (int(scale*w), int(scale*h))
|
||||
img.set_compression_quality(quality)
|
||||
data = img.export('jpg')
|
||||
scale -= 0.05
|
||||
return data
|
||||
|
||||
def get_trailing_data(record, extra_data_flags):
|
||||
'''
|
||||
Given a text record as a bytestring and the extra data flags from the MOBI
|
||||
header, return the trailing data as a dictionary, mapping bit number to
|
||||
data as bytestring. Also returns the record - all trailing data.
|
||||
|
||||
:return: Trailing data, record - trailing data
|
||||
'''
|
||||
data = OrderedDict()
|
||||
flags = extra_data_flags >> 1
|
||||
|
||||
num = 0
|
||||
while flags:
|
||||
num += 1
|
||||
if flags & 0b1:
|
||||
sz, consumed = decint(record, forward=False)
|
||||
if sz > consumed:
|
||||
data[num] = record[-sz:-consumed]
|
||||
record = record[:-sz]
|
||||
flags >>= 1
|
||||
# Read multibyte chars if any
|
||||
if extra_data_flags & 0b1:
|
||||
# Only the first two bits are used for the size since there can
|
||||
# never be more than 3 trailing multibyte chars
|
||||
sz = (ord(record[-1]) & 0b11) + 1
|
||||
consumed = 1
|
||||
if sz > consumed:
|
||||
data[0] = record[-sz:-consumed]
|
||||
record = record[:-sz]
|
||||
return data, record
|
||||
|
||||
def encode_trailing_data(raw):
|
||||
'''
|
||||
Given some data in the bytestring raw, return a bytestring of the form
|
||||
|
||||
<data><size>
|
||||
|
||||
where size is a backwards encoded vwi whose value is the length of the
|
||||
entire returned bytestring. data is the bytestring passed in as raw.
|
||||
|
||||
This is the encoding used for trailing data entries at the end of text
|
||||
records. See get_trailing_data() for details.
|
||||
'''
|
||||
lsize = 1
|
||||
while True:
|
||||
encoded = encint(len(raw) + lsize, forward=False)
|
||||
if len(encoded) == lsize:
|
||||
break
|
||||
lsize += 1
|
||||
return raw + encoded
|
||||
|
||||
def encode_fvwi(val, flags, flag_size=4):
|
||||
'''
|
||||
Encode the value val and the flag_size bits from flags as a fvwi. This encoding is
|
||||
used in the trailing byte sequences for indexing. Returns encoded
|
||||
bytestring.
|
||||
'''
|
||||
ans = val << flag_size
|
||||
for i in xrange(flag_size):
|
||||
ans |= (flags & (1 << i))
|
||||
return encint(ans)
|
||||
|
||||
|
||||
def decode_fvwi(byts, flag_size=4):
|
||||
'''
|
||||
Decode encoded fvwi. Returns number, flags, consumed
|
||||
'''
|
||||
arg, consumed = decint(bytes(byts))
|
||||
val = arg >> flag_size
|
||||
flags = 0
|
||||
for i in xrange(flag_size):
|
||||
flags |= (arg & (1 << i))
|
||||
return val, flags, consumed
|
||||
|
||||
|
||||
def decode_tbs(byts, flag_size=4):
|
||||
'''
|
||||
Trailing byte sequences for indexing consists of series of fvwi numbers.
|
||||
This function reads the fvwi number and its associated flags. It them uses
|
||||
the flags to read any more numbers that belong to the series. The flags are
|
||||
the lowest 4 bits of the vwi (see the encode_fvwi function above).
|
||||
|
||||
Returns the fvwi number, a dictionary mapping flags bits to the associated
|
||||
data and the number of bytes consumed.
|
||||
'''
|
||||
byts = bytes(byts)
|
||||
val, flags, consumed = decode_fvwi(byts, flag_size=flag_size)
|
||||
extra = {}
|
||||
byts = byts[consumed:]
|
||||
if flags & 0b1000 and flag_size > 3:
|
||||
extra[0b1000] = True
|
||||
if flags & 0b0010:
|
||||
x, consumed2 = decint(byts)
|
||||
byts = byts[consumed2:]
|
||||
extra[0b0010] = x
|
||||
consumed += consumed2
|
||||
if flags & 0b0100:
|
||||
extra[0b0100] = ord(byts[0])
|
||||
byts = byts[1:]
|
||||
consumed += 1
|
||||
if flags & 0b0001:
|
||||
x, consumed2 = decint(byts)
|
||||
byts = byts[consumed2:]
|
||||
extra[0b0001] = x
|
||||
consumed += consumed2
|
||||
return val, extra, consumed
|
||||
|
||||
def encode_tbs(val, extra, flag_size=4):
|
||||
'''
|
||||
Encode the number val and the extra data in the extra dict as an fvwi. See
|
||||
decode_tbs above.
|
||||
'''
|
||||
flags = 0
|
||||
for flag in extra:
|
||||
flags |= flag
|
||||
ans = encode_fvwi(val, flags, flag_size=flag_size)
|
||||
|
||||
if 0b0010 in extra:
|
||||
ans += encint(extra[0b0010])
|
||||
if 0b0100 in extra:
|
||||
ans += bytes(bytearray([extra[0b0100]]))
|
||||
if 0b0001 in extra:
|
||||
ans += encint(extra[0b0001])
|
||||
return ans
|
||||
|
||||
def utf8_text(text):
|
||||
'''
|
||||
Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
|
||||
empty, normalized bytestring.
|
||||
'''
|
||||
if text and text.strip():
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else:
|
||||
text = _('Unknown').encode('utf-8')
|
||||
return text
|
||||
|
||||
def align_block(raw, multiple=4, pad=b'\0'):
|
||||
'''
|
||||
Return raw with enough pad bytes append to ensure its length is a multiple
|
||||
of 4.
|
||||
'''
|
||||
extra = len(raw) % multiple
|
||||
if extra == 0: return raw
|
||||
return raw + pad*(multiple - extra)
|
||||
|
||||
|
||||
def detect_periodical(toc, log=None):
|
||||
'''
|
||||
Detect if the TOC object toc contains a periodical that conforms to the
|
||||
structure required by kindlegen to generate a periodical.
|
||||
'''
|
||||
for node in toc.iterdescendants():
|
||||
if node.depth() == 1 and node.klass != 'article':
|
||||
if log is not None:
|
||||
log.debug(
|
||||
'Not a periodical: Deepest node does not have '
|
||||
'class="article"')
|
||||
return False
|
||||
if node.depth() == 2 and node.klass != 'section':
|
||||
if log is not None:
|
||||
log.debug(
|
||||
'Not a periodical: Second deepest node does not have'
|
||||
' class="section"')
|
||||
return False
|
||||
if node.depth() == 3 and node.klass != 'periodical':
|
||||
if log is not None:
|
||||
log.debug('Not a periodical: Third deepest node'
|
||||
' does not have class="periodical"')
|
||||
return False
|
||||
if node.depth() > 3:
|
||||
if log is not None:
|
||||
log.debug('Not a periodical: Has nodes of depth > 3')
|
||||
return False
|
||||
return True
|
||||
|
||||
|
@ -111,7 +111,8 @@ def align_block(raw, multiple=4, pad='\0'):
|
||||
|
||||
def rescale_image(data, maxsizeb, dimen=None):
|
||||
if dimen is not None:
|
||||
data = thumbnail(data, width=dimen, height=dimen)[-1]
|
||||
data = thumbnail(data, width=dimen[0], height=dimen[1],
|
||||
compression_quality=90)[-1]
|
||||
else:
|
||||
# Replace transparent pixels with white pixels and convert to JPEG
|
||||
data = save_cover_data_to(data, 'img.jpg', return_data=True)
|
||||
@ -141,7 +142,7 @@ def rescale_image(data, maxsizeb, dimen=None):
|
||||
scale -= 0.05
|
||||
return data
|
||||
|
||||
class Serializer(object):
|
||||
class Serializer(object): # {{{
|
||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||
|
||||
def __init__(self, oeb, images, write_page_breaks_after_item=True):
|
||||
@ -172,6 +173,9 @@ class Serializer(object):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
buffer.write('<guide>')
|
||||
for ref in self.oeb.guide.values():
|
||||
# The Kindle decides where to open a book based on the presence of
|
||||
# an item in the guide that looks like
|
||||
# <reference type="text" title="Start" href="chapter-one.xhtml"/>
|
||||
path = urldefrag(ref.href)[0]
|
||||
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
|
||||
continue
|
||||
@ -215,12 +219,6 @@ class Serializer(object):
|
||||
self.anchor_offset = buffer.tell()
|
||||
buffer.write('<body>')
|
||||
self.anchor_offset_kindle = buffer.tell()
|
||||
# CybookG3 'Start Reading' link
|
||||
if 'text' in self.oeb.guide:
|
||||
href = self.oeb.guide['text'].href
|
||||
buffer.write('<a ')
|
||||
self.serialize_href(href)
|
||||
buffer.write(' />')
|
||||
spine = [item for item in self.oeb.spine if item.linear]
|
||||
spine.extend([item for item in self.oeb.spine if not item.linear])
|
||||
for item in spine:
|
||||
@ -315,16 +313,20 @@ class Serializer(object):
|
||||
buffer.seek(hoff)
|
||||
buffer.write('%010d' % ioff)
|
||||
|
||||
# }}}
|
||||
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def __init__(self, opts, compression=PALMDOC, imagemax=None,
|
||||
prefer_author_sort=False, write_page_breaks_after_item=True):
|
||||
def __init__(self, opts,
|
||||
write_page_breaks_after_item=True):
|
||||
self.opts = opts
|
||||
self.write_page_breaks_after_item = write_page_breaks_after_item
|
||||
self._compression = compression or UNCOMPRESSED
|
||||
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
||||
self._prefer_author_sort = prefer_author_sort
|
||||
self._compression = UNCOMPRESSED if getattr(opts, 'dont_compress',
|
||||
False) else PALMDOC
|
||||
self._imagemax = (PALM_MAX_IMAGE_SIZE if getattr(opts,
|
||||
'rescale_images', False) else OTHER_MAX_IMAGE_SIZE)
|
||||
self._prefer_author_sort = getattr(opts, 'prefer_author_sort', False)
|
||||
self._primary_index_record = None
|
||||
self._conforming_periodical_toc = False
|
||||
self._indexable = False
|
||||
@ -428,6 +430,7 @@ class MobiWriter(object):
|
||||
text.seek(npos)
|
||||
return data, overlap
|
||||
|
||||
# TBS {{{
|
||||
def _generate_flat_indexed_navpoints(self):
|
||||
# Assemble a HTMLRecordData instance for each HTML record
|
||||
# Return True if valid, False if invalid
|
||||
@ -1172,6 +1175,8 @@ class MobiWriter(object):
|
||||
|
||||
self._tbSequence = tbSequence
|
||||
|
||||
# }}}
|
||||
|
||||
def _evaluate_periodical_toc(self):
|
||||
'''
|
||||
Periodical:
|
||||
@ -1229,6 +1234,9 @@ class MobiWriter(object):
|
||||
self._oeb.logger.info(' Compressing markup content...')
|
||||
data, overlap = self._read_text_record(text)
|
||||
|
||||
if not self.opts.mobi_periodical:
|
||||
self._flatten_toc()
|
||||
|
||||
# Evaluate toc for conformance
|
||||
if self.opts.mobi_periodical :
|
||||
self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...')
|
||||
@ -1258,11 +1266,11 @@ class MobiWriter(object):
|
||||
data = compress_doc(data)
|
||||
record = StringIO()
|
||||
record.write(data)
|
||||
|
||||
# Marshall's utf-8 break code.
|
||||
if WRITE_PBREAKS :
|
||||
# Write trailing muti-byte sequence if any
|
||||
record.write(overlap)
|
||||
record.write(pack('>B', len(overlap)))
|
||||
|
||||
if WRITE_PBREAKS :
|
||||
nextra = 0
|
||||
pbreak = 0
|
||||
running = offset
|
||||
@ -1325,6 +1333,8 @@ class MobiWriter(object):
|
||||
except:
|
||||
self._oeb.logger.warn('Bad image file %r' % item.href)
|
||||
continue
|
||||
finally:
|
||||
item.unload_data_from_memory()
|
||||
self._records.append(data)
|
||||
if self._first_image_record is None:
|
||||
self._first_image_record = len(self._records)-1
|
||||
@ -1627,7 +1637,7 @@ class MobiWriter(object):
|
||||
now = int(time.time())
|
||||
nrecords = len(self._records)
|
||||
self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
|
||||
'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
|
||||
'BOOK', 'MOBI', pack('>IIH', (2*nrecords)-1, 0, nrecords))
|
||||
offset = self._tell() + (8 * nrecords) + 2
|
||||
for i, record in enumerate(self._records):
|
||||
self._write(pack('>I', offset), '\0', pack('>I', 2*i)[1:])
|
||||
@ -1638,6 +1648,87 @@ class MobiWriter(object):
|
||||
for record in self._records:
|
||||
self._write(record)
|
||||
|
||||
def _clean_text_value(self, text):
|
||||
if text is not None and text.strip() :
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else :
|
||||
text = "(none)".encode('utf-8')
|
||||
return text
|
||||
|
||||
def _compute_offset_length(self, i, node, entries) :
|
||||
h = node.href
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.log.warning('Could not find TOC entry:', node.title)
|
||||
return -1, -1
|
||||
|
||||
offset = self._id_offsets[h]
|
||||
length = None
|
||||
# Calculate length based on next entry's offset
|
||||
for sibling in entries[i+1:]:
|
||||
h2 = sibling.href
|
||||
if h2 in self._id_offsets:
|
||||
offset2 = self._id_offsets[h2]
|
||||
if offset2 > offset:
|
||||
length = offset2 - offset
|
||||
break
|
||||
if length is None:
|
||||
length = self._content_length - offset
|
||||
return offset, length
|
||||
|
||||
def _establish_document_structure(self) :
|
||||
documentType = None
|
||||
try :
|
||||
klass = self._ctoc_map[0]['klass']
|
||||
except :
|
||||
klass = None
|
||||
|
||||
if klass == 'chapter' or klass == None :
|
||||
documentType = 'book'
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiBook()
|
||||
|
||||
elif klass == 'periodical' :
|
||||
documentType = klass
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
|
||||
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
|
||||
else :
|
||||
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
|
||||
return documentType
|
||||
|
||||
# Index {{{
|
||||
|
||||
def _flatten_toc(self):
|
||||
'''
|
||||
Flatten and re-order entries in TOC so that chapter to chapter jumping
|
||||
never fails on the Kindle.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import TOC
|
||||
items = list(self._oeb.toc.iterdescendants())
|
||||
if self.opts.mobi_navpoints_only_deepest:
|
||||
items = [i for i in items if i.depth == 1]
|
||||
offsets = {i:self._id_offsets.get(i.href, -1) for i in items if i.href}
|
||||
items = [i for i in items if offsets[i] > -1]
|
||||
items.sort(key=lambda i:offsets[i])
|
||||
filt = []
|
||||
seen = set()
|
||||
for i in items:
|
||||
off = offsets[i]
|
||||
if off in seen: continue
|
||||
seen.add(off)
|
||||
filt.append(i)
|
||||
items = filt
|
||||
newtoc = TOC()
|
||||
for c, i in enumerate(items):
|
||||
newtoc.add(i.title, i.href, play_order=c+1, id=str(c),
|
||||
klass='chapter')
|
||||
self._oeb.toc = newtoc
|
||||
|
||||
def _generate_index(self):
|
||||
self._oeb.log('Generating INDX ...')
|
||||
self._primary_index_record = None
|
||||
@ -1811,276 +1902,7 @@ class MobiWriter(object):
|
||||
open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
|
||||
self._oeb.log.debug('Index records dumped to', t)
|
||||
|
||||
def _clean_text_value(self, text):
|
||||
if text is not None and text.strip() :
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else :
|
||||
text = "(none)".encode('utf-8')
|
||||
return text
|
||||
|
||||
def _add_to_ctoc(self, ctoc_str, record_offset):
|
||||
# Write vwilen + string to ctoc
|
||||
# Return offset
|
||||
# Is there enough room for this string in the current ctoc record?
|
||||
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
||||
# flush this ctoc, start a new one
|
||||
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
||||
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
||||
# pad with 00
|
||||
pad = 0xfbf8 - self._ctoc.tell()
|
||||
# print "padding %d bytes of 00" % pad
|
||||
self._ctoc.write('\0' * (pad))
|
||||
self._ctoc_records.append(self._ctoc.getvalue())
|
||||
self._ctoc.truncate(0)
|
||||
self._ctoc_offset += 0x10000
|
||||
record_offset = self._ctoc_offset
|
||||
|
||||
offset = self._ctoc.tell() + record_offset
|
||||
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
||||
return offset
|
||||
|
||||
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# article = chapter
|
||||
if node.klass == 'article' :
|
||||
ctoc_name_map['klass'] = 'chapter'
|
||||
else :
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
return
|
||||
|
||||
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'periodical', 'section' and 'article'
|
||||
|
||||
# Fetch the offset referencing the current ctoc_record
|
||||
if node.klass is None :
|
||||
return
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# Add the klass of this node
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
if node.klass == 'chapter':
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
elif node.klass == 'periodical' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'periodical' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'periodical':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._periodicalCount += 1
|
||||
|
||||
elif node.klass == 'section' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'section' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'section':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._sectionCount += 1
|
||||
|
||||
elif node.klass == 'article' :
|
||||
# Add title offset/title
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'article' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'article':
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
# Add description offset/description
|
||||
if node.description :
|
||||
d = self._clean_text_value(node.description)
|
||||
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['descriptionOffset'] = None
|
||||
|
||||
# Add author offset/attribution
|
||||
if node.author :
|
||||
a = self._clean_text_value(node.author)
|
||||
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['authorOffset'] = None
|
||||
|
||||
self._articleCount += 1
|
||||
|
||||
else :
|
||||
raise NotImplementedError( \
|
||||
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
|
||||
(node.title, node.klass, node.play_order))
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
def _generate_ctoc(self):
|
||||
# Generate the compiled TOC strings
|
||||
# Each node has 1-4 CTOC entries:
|
||||
# Periodical (0xDF)
|
||||
# title, class
|
||||
# Section (0xFF)
|
||||
# title, class
|
||||
# Article (0x3F)
|
||||
# title, class, description, author
|
||||
# Chapter (0x0F)
|
||||
# title, class
|
||||
# nb: Chapters don't actually have @class, so we synthesize it
|
||||
# in reader._toc_from_navpoint
|
||||
|
||||
toc = self._oeb.toc
|
||||
reduced_toc = []
|
||||
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
|
||||
self._last_toc_entry = None
|
||||
#ctoc = StringIO()
|
||||
self._ctoc = StringIO()
|
||||
|
||||
# Track the individual node types
|
||||
self._periodicalCount = 0
|
||||
self._sectionCount = 0
|
||||
self._articleCount = 0
|
||||
self._chapterCount = 0
|
||||
|
||||
#first = True
|
||||
|
||||
if self._conforming_periodical_toc :
|
||||
self._oeb.logger.info('Generating structured CTOC ...')
|
||||
for (child) in toc.iter():
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info(" %s" % child)
|
||||
self._add_structured_ctoc_node(child, self._ctoc)
|
||||
#first = False
|
||||
|
||||
else :
|
||||
self._oeb.logger.info('Generating flat CTOC ...')
|
||||
previousOffset = -1
|
||||
currentOffset = 0
|
||||
for (i, child) in enumerate(toc.iterdescendants()):
|
||||
# Only add chapters or articles at depth==1
|
||||
# no class defaults to 'chapter'
|
||||
if child.klass is None : child.klass = 'chapter'
|
||||
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
||||
(child.klass, child.depth(), child) )
|
||||
|
||||
# Test to see if this child's offset is the same as the previous child's
|
||||
# offset, skip it
|
||||
h = child.href
|
||||
|
||||
if h is None:
|
||||
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
|
||||
child.title)
|
||||
continue
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.logger.warn(' Ignoring missing TOC entry:',
|
||||
unicode(child))
|
||||
continue
|
||||
|
||||
currentOffset = self._id_offsets[h]
|
||||
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
||||
|
||||
if currentOffset != previousOffset :
|
||||
self._add_flat_ctoc_node(child, self._ctoc)
|
||||
reduced_toc.append(child)
|
||||
previousOffset = currentOffset
|
||||
else :
|
||||
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
||||
|
||||
else :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
|
||||
(child.klass, child.depth(),i))
|
||||
|
||||
# Update the TOC with our edited version
|
||||
self._oeb.toc.nodes = reduced_toc
|
||||
|
||||
# Instantiate a MobiDocument(mobitype)
|
||||
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
|
||||
not self.opts.mobi_periodical :
|
||||
mobiType = 0x002
|
||||
elif self._periodicalCount:
|
||||
pt = None
|
||||
if self._oeb.metadata.publication_type:
|
||||
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
|
||||
if len(x) > 1:
|
||||
pt = x[1]
|
||||
mobiType = {'newspaper':0x101}.get(pt, 0x103)
|
||||
else :
|
||||
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
|
||||
|
||||
self._MobiDoc = MobiDocument(mobiType)
|
||||
|
||||
if self.opts.verbose > 2 :
|
||||
structType = 'book'
|
||||
if mobiType > 0x100 :
|
||||
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
|
||||
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
|
||||
if mobiType > 0x100 :
|
||||
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
|
||||
(self._periodicalCount, self._sectionCount, self._articleCount) )
|
||||
else :
|
||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||
|
||||
# Apparently the CTOC must end with a null byte
|
||||
self._ctoc.write('\0')
|
||||
|
||||
ctoc = self._ctoc.getvalue()
|
||||
rec_count = len(self._ctoc_records)
|
||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||
len(ctoc)/655) )
|
||||
|
||||
return align_block(ctoc)
|
||||
|
||||
# Index nodes {{{
|
||||
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
||||
pos = 0xc0 + indxt.tell()
|
||||
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
||||
@ -2172,48 +1994,8 @@ class MobiWriter(object):
|
||||
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
|
||||
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
|
||||
|
||||
def _compute_offset_length(self, i, node, entries) :
|
||||
h = node.href
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.log.warning('Could not find TOC entry:', node.title)
|
||||
return -1, -1
|
||||
# }}}
|
||||
|
||||
offset = self._id_offsets[h]
|
||||
length = None
|
||||
# Calculate length based on next entry's offset
|
||||
for sibling in entries[i+1:]:
|
||||
h2 = sibling.href
|
||||
if h2 in self._id_offsets:
|
||||
offset2 = self._id_offsets[h2]
|
||||
if offset2 > offset:
|
||||
length = offset2 - offset
|
||||
break
|
||||
if length is None:
|
||||
length = self._content_length - offset
|
||||
return offset, length
|
||||
|
||||
def _establish_document_structure(self) :
|
||||
documentType = None
|
||||
try :
|
||||
klass = self._ctoc_map[0]['klass']
|
||||
except :
|
||||
klass = None
|
||||
|
||||
if klass == 'chapter' or klass == None :
|
||||
documentType = 'book'
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiBook()
|
||||
|
||||
elif klass == 'periodical' :
|
||||
documentType = klass
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
|
||||
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
|
||||
else :
|
||||
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
|
||||
return documentType
|
||||
|
||||
def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
|
||||
sectionTitles = list(child.iter())[1:]
|
||||
@ -2491,6 +2273,270 @@ class MobiWriter(object):
|
||||
last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
|
||||
|
||||
return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
|
||||
# }}}
|
||||
|
||||
# CTOC {{{
|
||||
def _add_to_ctoc(self, ctoc_str, record_offset):
|
||||
# Write vwilen + string to ctoc
|
||||
# Return offset
|
||||
# Is there enough room for this string in the current ctoc record?
|
||||
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
||||
# flush this ctoc, start a new one
|
||||
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
||||
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
||||
# pad with 00
|
||||
pad = 0xfbf8 - self._ctoc.tell()
|
||||
# print "padding %d bytes of 00" % pad
|
||||
self._ctoc.write('\0' * (pad))
|
||||
self._ctoc_records.append(self._ctoc.getvalue())
|
||||
self._ctoc.truncate(0)
|
||||
self._ctoc_offset += 0x10000
|
||||
record_offset = self._ctoc_offset
|
||||
|
||||
offset = self._ctoc.tell() + record_offset
|
||||
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
||||
return offset
|
||||
|
||||
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# article = chapter
|
||||
if node.klass == 'article' :
|
||||
ctoc_name_map['klass'] = 'chapter'
|
||||
else :
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
return
|
||||
|
||||
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'periodical', 'section' and 'article'
|
||||
|
||||
# Fetch the offset referencing the current ctoc_record
|
||||
if node.klass is None :
|
||||
return
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# Add the klass of this node
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
if node.klass == 'chapter':
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
elif node.klass == 'periodical' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'periodical' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'periodical':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._periodicalCount += 1
|
||||
|
||||
elif node.klass == 'section' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'section' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'section':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._sectionCount += 1
|
||||
|
||||
elif node.klass == 'article' :
|
||||
# Add title offset/title
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'article' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'article':
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
# Add description offset/description
|
||||
if node.description :
|
||||
d = self._clean_text_value(node.description)
|
||||
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['descriptionOffset'] = None
|
||||
|
||||
# Add author offset/attribution
|
||||
if node.author :
|
||||
a = self._clean_text_value(node.author)
|
||||
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['authorOffset'] = None
|
||||
|
||||
self._articleCount += 1
|
||||
|
||||
else :
|
||||
raise NotImplementedError( \
|
||||
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
|
||||
(node.title, node.klass, node.play_order))
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
def _generate_ctoc(self):
|
||||
# Generate the compiled TOC strings
|
||||
# Each node has 1-4 CTOC entries:
|
||||
# Periodical (0xDF)
|
||||
# title, class
|
||||
# Section (0xFF)
|
||||
# title, class
|
||||
# Article (0x3F)
|
||||
# title, class, description, author
|
||||
# Chapter (0x0F)
|
||||
# title, class
|
||||
# nb: Chapters don't actually have @class, so we synthesize it
|
||||
# in reader._toc_from_navpoint
|
||||
|
||||
toc = self._oeb.toc
|
||||
reduced_toc = []
|
||||
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
|
||||
self._last_toc_entry = None
|
||||
#ctoc = StringIO()
|
||||
self._ctoc = StringIO()
|
||||
|
||||
# Track the individual node types
|
||||
self._periodicalCount = 0
|
||||
self._sectionCount = 0
|
||||
self._articleCount = 0
|
||||
self._chapterCount = 0
|
||||
|
||||
#first = True
|
||||
|
||||
if self._conforming_periodical_toc :
|
||||
self._oeb.logger.info('Generating structured CTOC ...')
|
||||
for (child) in toc.iter():
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info(" %s" % child)
|
||||
self._add_structured_ctoc_node(child, self._ctoc)
|
||||
#first = False
|
||||
|
||||
else :
|
||||
self._oeb.logger.info('Generating flat CTOC ...')
|
||||
previousOffset = -1
|
||||
currentOffset = 0
|
||||
for (i, child) in enumerate(toc.iterdescendants()):
|
||||
# Only add chapters or articles at depth==1
|
||||
# no class defaults to 'chapter'
|
||||
if child.klass is None : child.klass = 'chapter'
|
||||
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
||||
(child.klass, child.depth(), child) )
|
||||
|
||||
# Test to see if this child's offset is the same as the previous child's
|
||||
# offset, skip it
|
||||
h = child.href
|
||||
|
||||
if h is None:
|
||||
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
|
||||
child.title)
|
||||
continue
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.logger.warn(' Ignoring missing TOC entry:',
|
||||
unicode(child))
|
||||
continue
|
||||
|
||||
currentOffset = self._id_offsets[h]
|
||||
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
||||
|
||||
if currentOffset != previousOffset :
|
||||
self._add_flat_ctoc_node(child, self._ctoc)
|
||||
reduced_toc.append(child)
|
||||
previousOffset = currentOffset
|
||||
else :
|
||||
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
||||
|
||||
else :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
|
||||
(child.klass, child.depth(),i))
|
||||
|
||||
# Update the TOC with our edited version
|
||||
self._oeb.toc.nodes = reduced_toc
|
||||
|
||||
# Instantiate a MobiDocument(mobitype)
|
||||
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
|
||||
not self.opts.mobi_periodical :
|
||||
mobiType = 0x002
|
||||
elif self._periodicalCount:
|
||||
pt = None
|
||||
if self._oeb.metadata.publication_type:
|
||||
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
|
||||
if len(x) > 1:
|
||||
pt = x[1]
|
||||
mobiType = {'newspaper':0x101}.get(pt, 0x103)
|
||||
else :
|
||||
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
|
||||
|
||||
self._MobiDoc = MobiDocument(mobiType)
|
||||
|
||||
if self.opts.verbose > 2 :
|
||||
structType = 'book'
|
||||
if mobiType > 0x100 :
|
||||
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
|
||||
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
|
||||
if mobiType > 0x100 :
|
||||
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
|
||||
(self._periodicalCount, self._sectionCount, self._articleCount) )
|
||||
else :
|
||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||
|
||||
# Apparently the CTOC must end with a null byte
|
||||
self._ctoc.write('\0')
|
||||
|
||||
ctoc = self._ctoc.getvalue()
|
||||
rec_count = len(self._ctoc_records)
|
||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||
len(ctoc)/655) )
|
||||
|
||||
return align_block(ctoc)
|
||||
|
||||
# }}}
|
||||
|
||||
class HTMLRecordData(object):
|
||||
""" A data structure containing indexing/navigation data for an HTML record """
|
||||
|
16
src/calibre/ebooks/mobi/writer2/__init__.py
Normal file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
UNCOMPRESSED = 1
|
||||
PALMDOC = 2
|
||||
HUFFDIC = 17480
|
||||
PALM_MAX_IMAGE_SIZE = 63 * 1024
|
||||
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
|
||||
|
856
src/calibre/ebooks/mobi/writer2/indexer.py
Normal file
@ -0,0 +1,856 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
from future_builtins import filter, map
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack
|
||||
from cStringIO import StringIO
|
||||
from collections import OrderedDict, defaultdict
|
||||
|
||||
from calibre.ebooks.mobi.writer2 import RECORD_SIZE
|
||||
from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
|
||||
encode_tbs, align_block, utf8_text)
|
||||
|
||||
class CNCX(object): # {{{
|
||||
|
||||
'''
|
||||
Create the CNCX records. These are records containing all the strings from
|
||||
the NCX. Each record is of the form: <vwi string size><utf-8 encoded
|
||||
string>
|
||||
'''
|
||||
|
||||
MAX_STRING_LENGTH = 500
|
||||
|
||||
def __init__(self, toc, is_periodical):
|
||||
self.strings = OrderedDict()
|
||||
|
||||
for item in toc.iterdescendants(breadth_first=True):
|
||||
self.strings[item.title] = 0
|
||||
if is_periodical:
|
||||
self.strings[item.klass] = 0
|
||||
aut, desc = item.author, item.description
|
||||
self.strings[item.author] = self.strings[item.description] = 0
|
||||
|
||||
self.records = []
|
||||
offset = 0
|
||||
buf = StringIO()
|
||||
for key in tuple(self.strings.iterkeys()):
|
||||
utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
|
||||
l = len(utf8)
|
||||
sz_bytes = encint(l)
|
||||
raw = sz_bytes + utf8
|
||||
if 0xfbf8 - buf.tell() < 6 + len(raw):
|
||||
# Records in PDB files cannot be larger than 0x10000, so we
|
||||
# stop well before that.
|
||||
pad = 0xfbf8 - self._ctoc.tell()
|
||||
buf.write(b'\0' * pad)
|
||||
self.records.append(buf.getvalue())
|
||||
buf.truncate(0)
|
||||
offset = len(self.records) * 0x10000
|
||||
buf.write(raw)
|
||||
self.strings[key] = offset
|
||||
offset += len(raw)
|
||||
|
||||
self.records.append(align_block(buf.getvalue()))
|
||||
|
||||
def __getitem__(self, string):
|
||||
return self.strings[string]
|
||||
# }}}
|
||||
|
||||
class TAGX(object): # {{{
|
||||
|
||||
BITMASKS = {11:0b1}
|
||||
BITMASKS.update({x:i+1 for i, x in enumerate([1, 2, 3, 4, 5, 21, 22, 23])})
|
||||
BITMASKS.update({x:i+1 for i, x in enumerate([69, 70, 71, 72, 73])})
|
||||
|
||||
NUM_VALUES = defaultdict(lambda x:1)
|
||||
NUM_VALUES[11] = 3
|
||||
NUM_VALUES[0] = 0
|
||||
|
||||
def __init__(self):
|
||||
self.byts = bytearray()
|
||||
|
||||
def add_tag(self, tag):
|
||||
buf = self.byts
|
||||
buf.append(tag)
|
||||
buf.append(self.NUM_VALUES[tag])
|
||||
# bitmask
|
||||
buf.append((1 << (self.BITMASKS[tag])) if tag else 0)
|
||||
# eof
|
||||
buf.append(0 if tag else 1)
|
||||
|
||||
def header(self, control_byte_count):
|
||||
header = b'TAGX'
|
||||
# table length, control byte count
|
||||
header += pack(b'>II', 12+len(self.byts), control_byte_count)
|
||||
return header
|
||||
|
||||
@property
|
||||
def periodical(self):
|
||||
'''
|
||||
TAGX block for the Primary index header of a periodical
|
||||
'''
|
||||
map(self.add_tag, (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72, 73, 0))
|
||||
return self.header(2) + bytes(self.byts)
|
||||
|
||||
@property
|
||||
def secondary(self):
|
||||
'''
|
||||
TAGX block for the secondary index header of a periodical
|
||||
'''
|
||||
map(self.add_tag, (11, 0))
|
||||
return self.header(1) + bytes(self.byts)
|
||||
|
||||
@property
|
||||
def flat_book(self):
|
||||
'''
|
||||
TAGX block for the primary index header of a flat book
|
||||
'''
|
||||
map(self.add_tag, (1, 2, 3, 4, 0))
|
||||
return self.header(1) + bytes(self.byts)
|
||||
|
||||
# }}}
|
||||
|
||||
# Index Entries {{{
|
||||
|
||||
class IndexEntry(object):
|
||||
|
||||
TAG_VALUES = {
|
||||
'offset': 1,
|
||||
'size': 2,
|
||||
'label_offset': 3,
|
||||
'depth': 4,
|
||||
'class_offset': 5,
|
||||
'secondary': 11,
|
||||
'parent_index': 21,
|
||||
'first_child_index': 22,
|
||||
'last_child_index': 23,
|
||||
'image_index': 69,
|
||||
'desc_offset': 70,
|
||||
'author_offset': 73,
|
||||
}
|
||||
RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()}
|
||||
|
||||
|
||||
def __init__(self, offset, label_offset):
|
||||
self.offset, self.label_offset = offset, label_offset
|
||||
self.depth, self.class_offset = 0, None
|
||||
self.control_byte_count = 1
|
||||
|
||||
self.length = 0
|
||||
self.index = 0
|
||||
|
||||
self.parent_index = None
|
||||
self.first_child_index = None
|
||||
self.last_child_index = None
|
||||
|
||||
self.image_index = None
|
||||
self.author_offset = None
|
||||
self.desc_offset = None
|
||||
|
||||
def __repr__(self):
|
||||
return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,'
|
||||
' parent_index=%r)')%(self.offset, self.depth, self.length,
|
||||
self.index, self.parent_index)
|
||||
|
||||
@dynamic_property
|
||||
def size(self):
|
||||
def fget(self): return self.length
|
||||
def fset(self, val): self.length = val
|
||||
return property(fget=fget, fset=fset, doc='Alias for length')
|
||||
|
||||
@property
|
||||
def next_offset(self):
|
||||
return self.offset + self.length
|
||||
|
||||
@property
|
||||
def tag_nums(self):
|
||||
for i in range(1, 5):
|
||||
yield i
|
||||
for attr in ('class_offset', 'parent_index', 'first_child_index',
|
||||
'last_child_index'):
|
||||
if getattr(self, attr) is not None:
|
||||
yield self.TAG_VALUES[attr]
|
||||
|
||||
@property
|
||||
def entry_type(self):
|
||||
ans = 0
|
||||
for tag in self.tag_nums:
|
||||
ans |= (1 << (TAGX.BITMASKS[tag])) # 1 << x == 2**x
|
||||
return ans
|
||||
|
||||
@property
|
||||
def bytestring(self):
|
||||
buf = StringIO()
|
||||
if isinstance(self.index, int):
|
||||
buf.write(encode_number_as_hex(self.index))
|
||||
else:
|
||||
raw = bytearray(self.index.encode('ascii'))
|
||||
raw.insert(0, len(raw))
|
||||
buf.write(bytes(raw))
|
||||
et = self.entry_type
|
||||
buf.write(bytes(bytearray([et])))
|
||||
|
||||
if self.control_byte_count == 2:
|
||||
flags = 0
|
||||
for attr in ('image_index', 'desc_offset', 'author_offset'):
|
||||
val = getattr(self, attr)
|
||||
if val is not None:
|
||||
tag = self.RTAG_MAP[attr]
|
||||
bm = TAGX.BITMASKS[tag]
|
||||
flags |= bm
|
||||
buf.write(bytes(bytearray([flags])))
|
||||
|
||||
for tag in self.tag_nums:
|
||||
attr = self.RTAG_MAP[tag]
|
||||
val = getattr(self, attr)
|
||||
if isinstance(val, int):
|
||||
val = [val]
|
||||
for x in val:
|
||||
buf.write(encint(x))
|
||||
|
||||
if self.control_byte_count == 2:
|
||||
for attr in ('image_index', 'desc_offset', 'author_offset'):
|
||||
val = getattr(self, attr)
|
||||
if val is not None:
|
||||
buf.write(encint(val))
|
||||
|
||||
ans = buf.getvalue()
|
||||
return ans
|
||||
|
||||
class PeriodicalIndexEntry(IndexEntry):
|
||||
|
||||
def __init__(self, offset, label_offset, class_offset, depth):
|
||||
IndexEntry.__init__(offset, label_offset)
|
||||
self.depth = depth
|
||||
self.class_offset = class_offset
|
||||
self.control_byte_count = 2
|
||||
|
||||
class SecondaryIndexEntry(IndexEntry):
|
||||
|
||||
INDEX_MAP = {'author':73, 'caption':72, 'credit':71, 'description':70,
|
||||
'mastheadImage':69}
|
||||
|
||||
def __init__(self, index):
|
||||
IndexEntry.__init__(self, index, 0)
|
||||
|
||||
tag = self.INDEX_MAP[index]
|
||||
|
||||
# The values for this index entry
|
||||
self.secondary = [len(self.INDEX_MAP) if tag == min(
|
||||
self.INDEX_MAP.itervalues()) else 0, 0, tag]
|
||||
|
||||
@property
|
||||
def tag_nums(self):
|
||||
yield 11
|
||||
|
||||
@property
|
||||
def entry_type(self):
|
||||
return 1
|
||||
|
||||
@classmethod
|
||||
def entries(cls):
|
||||
rmap = {v:k for k,v in cls.INDEX_MAP.iteritems()}
|
||||
for tag in sorted(rmap, reverse=True):
|
||||
yield cls(rmap[tag])
|
||||
|
||||
# }}}
|
||||
|
||||
class TBS(object): # {{{
|
||||
|
||||
'''
|
||||
Take the list of index nodes starting/ending on a record and calculate the
|
||||
trailing byte sequence for the record.
|
||||
'''
|
||||
|
||||
def __init__(self, data, is_periodical, first=False, section_map={},
|
||||
after_first=False):
|
||||
self.section_map = section_map
|
||||
#import pprint
|
||||
#pprint.pprint(data)
|
||||
#print()
|
||||
if is_periodical:
|
||||
# The starting bytes.
|
||||
# The value is zero which I think indicates the periodical
|
||||
# index entry. The values for the various flags seem to be
|
||||
# unused. If the 0b100 is present, it means that the record
|
||||
# deals with section 1 (or is the final record with section
|
||||
# transitions).
|
||||
self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
|
||||
self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0},
|
||||
flag_size=3)
|
||||
self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0},
|
||||
flag_size=3)
|
||||
self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001:
|
||||
0}, flag_size=3)
|
||||
|
||||
if not data:
|
||||
byts = b''
|
||||
if after_first:
|
||||
# This can happen if a record contains only text between
|
||||
# the periodical start and the first section
|
||||
byts = self.type_011
|
||||
self.bytestring = byts
|
||||
else:
|
||||
depth_map = defaultdict(list)
|
||||
for x in ('starts', 'ends', 'completes'):
|
||||
for idx in data[x]:
|
||||
depth_map[idx.depth].append(idx)
|
||||
for l in depth_map.itervalues():
|
||||
l.sort(key=lambda x:x.offset)
|
||||
self.periodical_tbs(data, first, depth_map)
|
||||
else:
|
||||
if not data:
|
||||
self.bytestring = b''
|
||||
else:
|
||||
self.book_tbs(data, first)
|
||||
|
||||
def periodical_tbs(self, data, first, depth_map):
|
||||
buf = StringIO()
|
||||
|
||||
has_section_start = (depth_map[1] and
|
||||
set(depth_map[1]).intersection(set(data['starts'])))
|
||||
spanner = data['spans']
|
||||
parent_section_index = -1
|
||||
|
||||
if depth_map[0]:
|
||||
# We have a terminal record
|
||||
|
||||
# Find the first non periodical node
|
||||
first_node = None
|
||||
for nodes in (depth_map[1], depth_map[2]):
|
||||
for node in nodes:
|
||||
if (first_node is None or (node.offset, node.depth) <
|
||||
(first_node.offset, first_node.depth)):
|
||||
first_node = node
|
||||
|
||||
typ = (self.type_110 if has_section_start else self.type_010)
|
||||
|
||||
# parent_section_index is needed for the last record
|
||||
if first_node is not None and first_node.depth > 0:
|
||||
parent_section_index = (first_node.index if first_node.depth
|
||||
== 1 else first_node.parent_index)
|
||||
else:
|
||||
parent_section_index = max(self.section_map.iterkeys())
|
||||
|
||||
else:
|
||||
# Non terminal record
|
||||
|
||||
if spanner is not None:
|
||||
# record is spanned by a single article
|
||||
parent_section_index = spanner.parent_index
|
||||
typ = (self.type_110 if parent_section_index == 1 else
|
||||
self.type_010)
|
||||
elif not depth_map[1]:
|
||||
# has only article nodes, i.e. spanned by a section
|
||||
parent_section_index = depth_map[2][0].parent_index
|
||||
typ = (self.type_111 if parent_section_index == 1 else
|
||||
self.type_010)
|
||||
else:
|
||||
# has section transitions
|
||||
if depth_map[2]:
|
||||
parent_section_index = depth_map[2][0].parent_index
|
||||
else:
|
||||
parent_section_index = depth_map[1][0].index
|
||||
typ = self.type_011
|
||||
|
||||
buf.write(typ)
|
||||
|
||||
if typ not in (self.type_110, self.type_111) and parent_section_index > 0:
|
||||
extra = {}
|
||||
# Write starting section information
|
||||
if spanner is None:
|
||||
num_articles = len([a for a in depth_map[1] if a.parent_index
|
||||
== parent_section_index])
|
||||
if not depth_map[1]:
|
||||
extra = {0b0001: 0}
|
||||
if num_articles > 1:
|
||||
extra = {0b0100: num_articles}
|
||||
buf.write(encode_tbs(parent_section_index, extra))
|
||||
|
||||
if spanner is None:
|
||||
articles = depth_map[2]
|
||||
sections = set([self.section_map[a.parent_index] for a in
|
||||
articles])
|
||||
sections = sorted(sections, key=lambda x:x.offset)
|
||||
section_map = {s:[a for a in articles if a.parent_index ==
|
||||
s.index] for s in sections}
|
||||
for i, section in enumerate(sections):
|
||||
# All the articles in this record that belong to section
|
||||
articles = section_map[section]
|
||||
first_article = articles[0]
|
||||
last_article = articles[-1]
|
||||
num = len(articles)
|
||||
|
||||
try:
|
||||
next_sec = sections[i+1]
|
||||
except:
|
||||
next_sec = None
|
||||
|
||||
extra = {}
|
||||
if num > 1:
|
||||
extra[0b0100] = num
|
||||
if False and i == 0 and next_sec is not None:
|
||||
# Write offset to next section from start of record
|
||||
# I can't figure out exactly when Kindlegen decides to
|
||||
# write this so I have disabled it for now.
|
||||
extra[0b0001] = next_sec.offset - data['offset']
|
||||
|
||||
buf.write(encode_tbs(first_article.index-section.index, extra))
|
||||
|
||||
if next_sec is not None:
|
||||
buf.write(encode_tbs(last_article.index-next_sec.index,
|
||||
{0b1000: 0}))
|
||||
else:
|
||||
buf.write(encode_tbs(spanner.index - parent_section_index,
|
||||
{0b0001: 0}))
|
||||
|
||||
self.bytestring = buf.getvalue()
|
||||
|
||||
def book_tbs(self, data, first):
|
||||
self.bytestring = b''
|
||||
# }}}
|
||||
|
||||
class Indexer(object): # {{{
|
||||
|
||||
def __init__(self, serializer, number_of_text_records,
|
||||
size_of_last_text_record, masthead_offset, is_periodical,
|
||||
opts, oeb):
|
||||
self.serializer = serializer
|
||||
self.number_of_text_records = number_of_text_records
|
||||
self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) +
|
||||
size_of_last_text_record)
|
||||
self.masthead_offset = masthead_offset
|
||||
self.secondary_record_offset = None
|
||||
|
||||
self.oeb = oeb
|
||||
self.log = oeb.log
|
||||
self.opts = opts
|
||||
|
||||
self.is_periodical = is_periodical
|
||||
if self.is_periodical and self.masthead_offset is None:
|
||||
raise ValueError('Periodicals must have a masthead')
|
||||
|
||||
self.log('Generating MOBI index for a %s'%('periodical' if
|
||||
self.is_periodical else 'book'))
|
||||
self.is_flat_periodical = False
|
||||
if self.is_periodical:
|
||||
periodical_node = iter(oeb.toc).next()
|
||||
sections = tuple(periodical_node)
|
||||
self.is_flat_periodical = len(sections) == 1
|
||||
|
||||
self.records = []
|
||||
|
||||
if self.is_periodical:
|
||||
# Ensure all articles have an author and description before
|
||||
# creating the CNCX
|
||||
for node in oeb.toc.iterdescendants():
|
||||
if node.klass == 'article':
|
||||
aut, desc = node.author, node.description
|
||||
if not aut: aut = _('Unknown')
|
||||
if not desc: desc = _('No details available')
|
||||
node.author, node.description = aut, desc
|
||||
|
||||
|
||||
self.cncx = CNCX(oeb.toc, self.is_periodical)
|
||||
|
||||
if self.is_periodical:
|
||||
self.indices = self.create_periodical_index()
|
||||
else:
|
||||
self.indices = self.create_book_index()
|
||||
|
||||
self.records.append(self.create_index_record())
|
||||
self.records.insert(0, self.create_header())
|
||||
self.records.extend(self.cncx.records)
|
||||
|
||||
if is_periodical:
|
||||
self.secondary_record_offset = len(self.records)
|
||||
self.records.append(self.create_header(secondary=True))
|
||||
self.records.append(self.create_index_record(secondary=True))
|
||||
|
||||
self.calculate_trailing_byte_sequences()
|
||||
|
||||
def create_index_record(self, secondary=False): # {{{
|
||||
header_length = 192
|
||||
buf = StringIO()
|
||||
indices = list(SecondaryIndexEntry.entries) if secondary else self.indices
|
||||
|
||||
# Write index entries
|
||||
offsets = []
|
||||
for i in indices:
|
||||
offsets.append(buf.tell())
|
||||
buf.write(i.bytestring)
|
||||
index_block = align_block(buf.getvalue())
|
||||
|
||||
# Write offsets to index entries as an IDXT block
|
||||
idxt_block = b'IDXT'
|
||||
buf.truncate(0)
|
||||
for offset in offsets:
|
||||
buf.write(pack(b'>H', header_length+offset))
|
||||
idxt_block = align_block(idxt_block + buf.getvalue())
|
||||
body = index_block + idxt_block
|
||||
|
||||
header = b'INDX'
|
||||
buf.truncate(0)
|
||||
buf.write(pack(b'>I', header_length))
|
||||
buf.write(b'\0'*4) # Unknown
|
||||
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
|
||||
buf.write(b'\0'*4) # Unknown
|
||||
# IDXT block offset
|
||||
buf.write(pack(b'>I', header_length + len(index_block)))
|
||||
# Number of index entries
|
||||
buf.write(pack(b'>I', len(offsets)))
|
||||
# Unknown
|
||||
buf.write(b'\xff'*8)
|
||||
# Unknown
|
||||
buf.write(b'\0'*156)
|
||||
|
||||
header += buf.getvalue()
|
||||
|
||||
ans = header + body
|
||||
if len(ans) > 0x10000:
|
||||
raise ValueError('Too many entries (%d) in the TOC'%len(offsets))
|
||||
return ans
|
||||
# }}}
|
||||
|
||||
def create_header(self, secondary=False): # {{{
|
||||
buf = StringIO()
|
||||
if secondary:
|
||||
tagx_block = TAGX().secondary
|
||||
else:
|
||||
tagx_block = (TAGX().periodical if self.is_periodical else
|
||||
TAGX().flat_book)
|
||||
header_length = 192
|
||||
|
||||
# Ident 0 - 4
|
||||
buf.write(b'INDX')
|
||||
|
||||
# Header length 4 - 8
|
||||
buf.write(pack(b'>I', header_length))
|
||||
|
||||
# Unknown 8-16
|
||||
buf.write(b'\0'*8)
|
||||
|
||||
# Index type: 0 - normal, 2 - inflection 16 - 20
|
||||
buf.write(pack(b'>I', 2))
|
||||
|
||||
# IDXT offset 20-24
|
||||
buf.write(pack(b'>I', 0)) # Filled in later
|
||||
|
||||
# Number of index records 24-28
|
||||
buf.write(pack(b'>I', 1 if secondary else len(self.records)))
|
||||
|
||||
# Index Encoding 28-32
|
||||
buf.write(pack(b'>I', 65001)) # utf-8
|
||||
|
||||
# Unknown 32-36
|
||||
buf.write(b'\xff'*4)
|
||||
|
||||
# Number of index entries 36-40
|
||||
indices = list(SecondaryIndexEntry.entries) if secondary else self.indices
|
||||
buf.write(pack(b'>I', len(indices)))
|
||||
|
||||
# ORDT offset 40-44
|
||||
buf.write(pack(b'>I', 0))
|
||||
|
||||
# LIGT offset 44-48
|
||||
buf.write(pack(b'>I', 0))
|
||||
|
||||
# Number of LIGT entries 48-52
|
||||
buf.write(pack(b'>I', 0))
|
||||
|
||||
# Number of CNCX records 52-56
|
||||
buf.write(pack(b'>I', 0 if secondary else len(self.cncx.records)))
|
||||
|
||||
# Unknown 56-180
|
||||
buf.write(b'\0'*124)
|
||||
|
||||
# TAGX offset 180-184
|
||||
buf.write(pack(b'>I', header_length))
|
||||
|
||||
# Unknown 184-192
|
||||
buf.write(b'\0'*8)
|
||||
|
||||
# TAGX block
|
||||
buf.write(tagx_block)
|
||||
|
||||
num = len(indices)
|
||||
|
||||
# The index of the last entry in the NCX
|
||||
idx = indices[-1].index
|
||||
buf.write(encode_number_as_hex(idx) if isinstance(idx, int) else
|
||||
idx.encode('ascii'))
|
||||
|
||||
# The number of entries in the NCX
|
||||
buf.write(pack(b'>H', num))
|
||||
|
||||
# Padding
|
||||
pad = (4 - (buf.tell()%4))%4
|
||||
if pad:
|
||||
buf.write(b'\0'*pad)
|
||||
|
||||
idxt_offset = buf.tell()
|
||||
|
||||
buf.write(b'IDXT')
|
||||
buf.write(pack(b'>H', header_length + len(tagx_block)))
|
||||
buf.write(b'\0')
|
||||
buf.seek(20)
|
||||
buf.write(pack(b'>I', idxt_offset))
|
||||
|
||||
return align_block(buf.getvalue())
|
||||
# }}}
|
||||
|
||||
def create_book_index(self): # {{{
|
||||
indices = []
|
||||
seen = set()
|
||||
id_offsets = self.serializer.id_offsets
|
||||
|
||||
for node in self.oeb.toc.iterdescendants():
|
||||
try:
|
||||
offset = id_offsets[node.href]
|
||||
label = self.cncx[node.title]
|
||||
except:
|
||||
self.log.warn('TOC item %s not found in document'%node.href)
|
||||
continue
|
||||
if offset in seen:
|
||||
continue
|
||||
seen.add(offset)
|
||||
index = IndexEntry(offset, label)
|
||||
indices.append(index)
|
||||
|
||||
indices.sort(key=lambda x:x.offset)
|
||||
|
||||
# Set lengths
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
index.length = next_offset - index.offset
|
||||
|
||||
# Remove empty nodes
|
||||
indices = [i for i in indices if i.length > 0]
|
||||
|
||||
# Set index values
|
||||
for i, index in enumerate(indices):
|
||||
index.index = i
|
||||
|
||||
# Set lengths again to close up any gaps left by filtering
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
index.length = next_offset - index.offset
|
||||
|
||||
return indices
|
||||
|
||||
# }}}
|
||||
|
||||
def create_periodical_index(self): # {{{
|
||||
periodical_node = iter(self.oeb.toc).next()
|
||||
periodical_node_offset = self.serializer.body_start_offset
|
||||
periodical_node_size = (self.serializer.body_end_offset -
|
||||
periodical_node_offset)
|
||||
|
||||
normalized_sections = []
|
||||
|
||||
id_offsets = self.serializer.id_offsets
|
||||
|
||||
periodical = PeriodicalIndexEntry(periodical_node_offset,
|
||||
self.cncx[periodical_node.title],
|
||||
self.cncx[periodical_node.klass], 0)
|
||||
periodical.length = periodical_node_size
|
||||
periodical.first_child_index = 1
|
||||
periodical.image_index = self.masthead_offset
|
||||
|
||||
seen_sec_offsets = set()
|
||||
seen_art_offsets = set()
|
||||
|
||||
for sec in periodical_node:
|
||||
normalized_articles = []
|
||||
try:
|
||||
offset = id_offsets[sec.href]
|
||||
label = self.cncx[sec.title]
|
||||
klass = self.cncx[sec.klass]
|
||||
except:
|
||||
continue
|
||||
if offset in seen_sec_offsets:
|
||||
continue
|
||||
seen_sec_offsets.add(offset)
|
||||
section = PeriodicalIndexEntry(offset, label, klass, 1)
|
||||
section.parent_index = 0
|
||||
for art in sec:
|
||||
try:
|
||||
offset = id_offsets[art.href]
|
||||
label = self.cncx[art.title]
|
||||
klass = self.cncx[art.klass]
|
||||
except:
|
||||
continue
|
||||
if offset in seen_art_offsets:
|
||||
continue
|
||||
seen_art_offsets.add(offset)
|
||||
article = PeriodicalIndexEntry(offset, label, klass, 2)
|
||||
normalized_articles.append(article)
|
||||
article.author_offset = self.cncx[art.author]
|
||||
article.desc_offset = self.cncx[art.description]
|
||||
|
||||
if normalized_articles:
|
||||
normalized_articles.sort(key=lambda x:x.offset)
|
||||
normalized_sections.append((section, normalized_articles))
|
||||
|
||||
normalized_sections.sort(key=lambda x:x[0].offset)
|
||||
|
||||
# Set lengths
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, normalized_articles = x
|
||||
try:
|
||||
sec.length = normalized_sections[s+1][0].offset - sec.offset
|
||||
except:
|
||||
sec.length = self.serializer.body_end_offset - sec.offset
|
||||
for i, art in enumerate(normalized_articles):
|
||||
try:
|
||||
art.length = normalized_articles[i+1].offset - art.offset
|
||||
except:
|
||||
art.length = sec.offset + sec.length - art.offset
|
||||
|
||||
# Filter
|
||||
for i, x in list(enumerate(normalized_sections)):
|
||||
sec, normalized_articles = x
|
||||
normalized_articles = list(filter(lambda x: x.length > 0,
|
||||
normalized_articles))
|
||||
normalized_sections[i] = (sec, normalized_articles)
|
||||
|
||||
normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1],
|
||||
normalized_sections))
|
||||
|
||||
# Set indices
|
||||
i = 0
|
||||
for sec, articles in normalized_sections:
|
||||
i += 1
|
||||
sec.index = i
|
||||
sec.parent_index = 0
|
||||
|
||||
for sec, articles in normalized_sections:
|
||||
for art in articles:
|
||||
i += 1
|
||||
art.index = i
|
||||
art.parent_index = sec.index
|
||||
|
||||
for sec, normalized_articles in normalized_sections:
|
||||
sec.first_child_index = normalized_articles[0].index
|
||||
sec.last_child_index = normalized_articles[-1].index
|
||||
|
||||
# Set lengths again to close up any gaps left by filtering
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, articles = x
|
||||
try:
|
||||
next_offset = normalized_sections[s+1][0].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
sec.length = next_offset - sec.offset
|
||||
|
||||
for a, art in enumerate(articles):
|
||||
try:
|
||||
next_offset = articles[a+1].offset
|
||||
except:
|
||||
next_offset = sec.next_offset
|
||||
art.length = next_offset - art.offset
|
||||
|
||||
# Sanity check
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, articles = x
|
||||
try:
|
||||
next_sec = normalized_sections[s+1][0]
|
||||
except:
|
||||
if (sec.length == 0 or sec.next_offset !=
|
||||
self.serializer.body_end_offset):
|
||||
raise ValueError('Invalid section layout')
|
||||
else:
|
||||
if next_sec.offset != sec.next_offset or sec.length == 0:
|
||||
raise ValueError('Invalid section layout')
|
||||
for a, art in enumerate(articles):
|
||||
try:
|
||||
next_art = articles[a+1]
|
||||
except:
|
||||
if (art.length == 0 or art.next_offset !=
|
||||
sec.next_offset):
|
||||
raise ValueError('Invalid article layout')
|
||||
else:
|
||||
if art.length == 0 or art.next_offset != next_art.offset:
|
||||
raise ValueError('Invalid article layout')
|
||||
|
||||
# Flatten
|
||||
indices = [periodical]
|
||||
for sec, articles in normalized_sections:
|
||||
indices.append(sec)
|
||||
periodical.last_child_index = sec.index
|
||||
|
||||
for sec, articles in normalized_sections:
|
||||
for a in articles:
|
||||
indices.append(a)
|
||||
|
||||
return indices
|
||||
# }}}
|
||||
|
||||
# TBS {{{
|
||||
def calculate_trailing_byte_sequences(self):
|
||||
self.tbs_map = {}
|
||||
found_node = False
|
||||
sections = [i for i in self.indices if i.depth == 1]
|
||||
section_map = OrderedDict((i.index, i) for i in
|
||||
sorted(sections, key=lambda x:x.offset))
|
||||
|
||||
deepest = max(i.depth for i in self.indices)
|
||||
|
||||
for i in xrange(self.number_of_text_records):
|
||||
offset = i * RECORD_SIZE
|
||||
next_offset = offset + RECORD_SIZE
|
||||
data = {'ends':[], 'completes':[], 'starts':[],
|
||||
'spans':None, 'offset':offset, 'record_number':i+1}
|
||||
|
||||
for index in self.indices:
|
||||
if index.offset >= next_offset:
|
||||
# Node starts after current record
|
||||
if index.depth == deepest:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
if index.next_offset <= offset:
|
||||
# Node ends before current record
|
||||
continue
|
||||
if index.offset >= offset:
|
||||
# Node starts in current record
|
||||
if index.next_offset <= next_offset:
|
||||
# Node ends in current record
|
||||
data['completes'].append(index)
|
||||
else:
|
||||
data['starts'].append(index)
|
||||
else:
|
||||
# Node starts before current records
|
||||
if index.next_offset <= next_offset:
|
||||
# Node ends in current record
|
||||
data['ends'].append(index)
|
||||
elif index.depth == deepest:
|
||||
data['spans'] = index
|
||||
|
||||
if (data['ends'] or data['completes'] or data['starts'] or
|
||||
data['spans'] is not None):
|
||||
self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not
|
||||
found_node, section_map=section_map)
|
||||
found_node = True
|
||||
else:
|
||||
self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False,
|
||||
after_first=found_node, section_map=section_map)
|
||||
|
||||
def get_trailing_byte_sequence(self, num):
|
||||
return self.tbs_map[num].bytestring
|
||||
# }}}
|
||||
|
||||
# }}}
|
||||
|
590
src/calibre/ebooks/mobi/writer2/main.py
Normal file
@ -0,0 +1,590 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, random, time
|
||||
from cStringIO import StringIO
|
||||
from struct import pack
|
||||
|
||||
from calibre.ebooks import normalize, generate_masthead
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
from calibre.ebooks.mobi.writer2.serializer import Serializer
|
||||
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
|
||||
from calibre.ebooks.mobi.utils import (rescale_image, encint,
|
||||
encode_trailing_data, align_block, detect_periodical)
|
||||
from calibre.ebooks.mobi.writer2.indexer import Indexer
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
'publisher': 101,
|
||||
'description': 103,
|
||||
'identifier': 104,
|
||||
'subject': 105,
|
||||
'pubdate': 106,
|
||||
'review': 107,
|
||||
'contributor': 108,
|
||||
'rights': 109,
|
||||
'type': 111,
|
||||
'source': 112,
|
||||
'versionnumber': 114,
|
||||
'startreading': 116,
|
||||
'coveroffset': 201,
|
||||
'thumboffset': 202,
|
||||
'hasfakecover': 203,
|
||||
'lastupdatetime': 502,
|
||||
'title': 503,
|
||||
}
|
||||
|
||||
# Disabled as I dont care about uncrossable breaks
|
||||
WRITE_UNCROSSABLE_BREAKS = False
|
||||
|
||||
MAX_THUMB_SIZE = 16 * 1024
|
||||
MAX_THUMB_DIMEN = (180, 240)
|
||||
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def __init__(self, opts, write_page_breaks_after_item=True):
|
||||
self.opts = opts
|
||||
self.write_page_breaks_after_item = write_page_breaks_after_item
|
||||
self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
|
||||
self.prefer_author_sort = opts.prefer_author_sort
|
||||
self.last_text_record_idx = 1
|
||||
|
||||
def __call__(self, oeb, path_or_stream):
|
||||
self.log = oeb.log
|
||||
if hasattr(path_or_stream, 'write'):
|
||||
return self.dump_stream(oeb, path_or_stream)
|
||||
with open(path_or_stream, 'w+b') as stream:
|
||||
return self.dump_stream(oeb, stream)
|
||||
|
||||
def write(self, *args):
|
||||
for datum in args:
|
||||
self.stream.write(datum)
|
||||
|
||||
def tell(self):
|
||||
return self.stream.tell()
|
||||
|
||||
def dump_stream(self, oeb, stream):
|
||||
self.oeb = oeb
|
||||
self.stream = stream
|
||||
self.records = [None]
|
||||
self.generate_content()
|
||||
self.generate_record0()
|
||||
self.write_header()
|
||||
self.write_content()
|
||||
|
||||
def generate_content(self):
|
||||
self.is_periodical = detect_periodical(self.oeb.toc, self.oeb.log)
|
||||
# Image records are stored in their own list, they are merged into the
|
||||
# main record list at the end
|
||||
self.generate_images()
|
||||
self.generate_text()
|
||||
# The uncrossable breaks trailing entries come before the indexing
|
||||
# trailing entries
|
||||
self.write_uncrossable_breaks()
|
||||
# Index records come after text records
|
||||
self.generate_index()
|
||||
|
||||
# Indexing {{{
|
||||
def generate_index(self):
|
||||
self.primary_index_record_idx = None
|
||||
try:
|
||||
self.indexer = Indexer(self.serializer, self.last_text_record_idx,
|
||||
len(self.records[self.last_text_record_idx]),
|
||||
self.masthead_offset, self.is_periodical,
|
||||
self.opts, self.oeb)
|
||||
except:
|
||||
self.log.exception('Failed to generate MOBI index:')
|
||||
else:
|
||||
self.primary_index_record_idx = len(self.records)
|
||||
for i in xrange(len(self.records)):
|
||||
if i == 0: continue
|
||||
tbs = self.indexer.get_trailing_byte_sequence(i)
|
||||
self.records[i] += encode_trailing_data(tbs)
|
||||
self.records.extend(self.indexer.records)
|
||||
|
||||
# }}}
|
||||
|
||||
def write_uncrossable_breaks(self): # {{{
|
||||
'''
|
||||
Write information about uncrossable breaks (non linear items in
|
||||
the spine.
|
||||
'''
|
||||
if not WRITE_UNCROSSABLE_BREAKS:
|
||||
return
|
||||
|
||||
breaks = self.serializer.breaks
|
||||
|
||||
for i in xrange(1, self.last_text_record_idx+1):
|
||||
offset = i * RECORD_SIZE
|
||||
pbreak = 0
|
||||
running = offset
|
||||
|
||||
buf = StringIO()
|
||||
|
||||
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
||||
pbreak = (breaks.pop(0) - running) >> 3
|
||||
encoded = encint(pbreak)
|
||||
buf.write(encoded)
|
||||
running += pbreak << 3
|
||||
encoded = encode_trailing_data(buf.getvalue())
|
||||
self.records[i] += encoded
|
||||
# }}}
|
||||
|
||||
# Images {{{
|
||||
|
||||
def generate_images(self):
|
||||
oeb = self.oeb
|
||||
oeb.logger.info('Serializing images...')
|
||||
self.image_records = []
|
||||
|
||||
mh_href = self.masthead_offset = None
|
||||
if 'masthead' in oeb.guide:
|
||||
mh_href = oeb.guide['masthead'].href
|
||||
elif self.is_periodical:
|
||||
# Generate a default masthead
|
||||
data = generate_masthead(unicode(self.oeb.metadata('title')[0]))
|
||||
self.image_records.append(data)
|
||||
self.masthead_offset = 0
|
||||
|
||||
cover_href = self.cover_offset = self.thumbnail_offset = None
|
||||
if (oeb.metadata.cover and
|
||||
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||
cover_id = unicode(oeb.metadata.cover[0])
|
||||
item = oeb.manifest.ids[cover_id]
|
||||
cover_href = item.href
|
||||
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type not in OEB_RASTER_IMAGES: continue
|
||||
try:
|
||||
data = rescale_image(item.data)
|
||||
except:
|
||||
oeb.logger.warn('Bad image file %r' % item.href)
|
||||
continue
|
||||
else:
|
||||
if item.href == mh_href:
|
||||
self.masthead_offset = len(self.image_records) - 1
|
||||
elif item.href == cover_href:
|
||||
self.image_records.append(data)
|
||||
self.cover_offset = len(self.image_records) - 1
|
||||
try:
|
||||
data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
|
||||
maxsizeb=MAX_THUMB_SIZE)
|
||||
except:
|
||||
oeb.logger.warn('Failed to generate thumbnail')
|
||||
else:
|
||||
self.image_records.append(data)
|
||||
self.thumbnail_offset = len(self.image_records) - 1
|
||||
finally:
|
||||
item.unload_data_from_memory()
|
||||
|
||||
# }}}
|
||||
|
||||
# Text {{{
|
||||
|
||||
def generate_text(self):
|
||||
self.oeb.logger.info('Serializing markup content...')
|
||||
self.serializer = Serializer(self.oeb, self.images,
|
||||
write_page_breaks_after_item=self.write_page_breaks_after_item)
|
||||
text = self.serializer()
|
||||
self.text_length = len(text)
|
||||
text = StringIO(text)
|
||||
nrecords = 0
|
||||
records_size = 0
|
||||
|
||||
if self.compression != UNCOMPRESSED:
|
||||
self.oeb.logger.info(' Compressing markup content...')
|
||||
|
||||
while text.tell() < self.text_length:
|
||||
data, overlap = self.read_text_record(text)
|
||||
if self.compression == PALMDOC:
|
||||
data = compress_doc(data)
|
||||
|
||||
data += overlap
|
||||
data += pack(b'>B', len(overlap))
|
||||
|
||||
self.records.append(data)
|
||||
records_size += len(data)
|
||||
nrecords += 1
|
||||
|
||||
self.last_text_record_idx = nrecords
|
||||
self.first_non_text_record_idx = nrecords + 1
|
||||
# Pad so that the next records starts at a 4 byte boundary
|
||||
if records_size % 4 != 0:
|
||||
self.records.append(b'\x00'*(records_size % 4))
|
||||
self.first_non_text_record_idx += 1
|
||||
|
||||
def read_text_record(self, text):
|
||||
'''
|
||||
Return a Palmdoc record of size RECORD_SIZE from the text file object.
|
||||
In case the record ends in the middle of a multibyte character return
|
||||
the overlap as well.
|
||||
|
||||
Returns data, overlap: where both are byte strings. overlap is the
|
||||
extra bytes needed to complete the truncated multibyte character.
|
||||
'''
|
||||
opos = text.tell()
|
||||
text.seek(0, 2)
|
||||
# npos is the position of the next record
|
||||
npos = min((opos + RECORD_SIZE, text.tell()))
|
||||
# Number of bytes from the next record needed to complete the last
|
||||
# character in this record
|
||||
extra = 0
|
||||
|
||||
last = b''
|
||||
while not last.decode('utf-8', 'ignore'):
|
||||
# last contains no valid utf-8 characters
|
||||
size = len(last) + 1
|
||||
text.seek(npos - size)
|
||||
last = text.read(size)
|
||||
|
||||
# last now has one valid utf-8 char and possibly some bytes that belong
|
||||
# to a truncated char
|
||||
|
||||
try:
|
||||
last.decode('utf-8', 'strict')
|
||||
except UnicodeDecodeError:
|
||||
# There are some truncated bytes in last
|
||||
prev = len(last)
|
||||
while True:
|
||||
text.seek(npos - prev)
|
||||
last = text.read(len(last) + 1)
|
||||
try:
|
||||
last.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
extra = len(last) - prev
|
||||
|
||||
text.seek(opos)
|
||||
data = text.read(RECORD_SIZE)
|
||||
overlap = text.read(extra)
|
||||
text.seek(npos)
|
||||
|
||||
return data, overlap
|
||||
|
||||
# }}}
|
||||
|
||||
def generate_record0(self): # MOBI header {{{
|
||||
metadata = self.oeb.metadata
|
||||
exth = self.build_exth()
|
||||
first_image_record = None
|
||||
if self.image_records:
|
||||
first_image_record = len(self.records)
|
||||
self.records.extend(self.image_records)
|
||||
last_content_record = len(self.records) - 1
|
||||
|
||||
# FCIS/FLIS (Seems to serve no purpose)
|
||||
flis_number = len(self.records)
|
||||
self.records.append(
|
||||
b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+
|
||||
b'\xff'*4)
|
||||
fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
|
||||
fcis += pack(b'>I', self.text_length)
|
||||
fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
|
||||
fcis_number = len(self.records)
|
||||
self.records.append(fcis)
|
||||
|
||||
# EOF record
|
||||
self.records.append(b'\xE9\x8E\x0D\x0A')
|
||||
|
||||
record0 = StringIO()
|
||||
# The MOBI Header
|
||||
record0.write(pack(b'>HHIHHHH',
|
||||
self.compression, # compression type # compression type
|
||||
0, # Unused
|
||||
self.text_length, # Text length
|
||||
self.last_text_record_idx, # Number of text records or last tr idx
|
||||
RECORD_SIZE, # Text record size
|
||||
0, # Unused
|
||||
0 # Unused
|
||||
)) # 0 - 15 (0x0 - 0xf)
|
||||
uid = random.randint(0, 0xffffffff)
|
||||
title = normalize(unicode(metadata.title[0])).encode('utf-8')
|
||||
|
||||
# 0x0 - 0x3
|
||||
record0.write(b'MOBI')
|
||||
|
||||
# 0x4 - 0x7 : Length of header
|
||||
# 0x8 - 0x11 : MOBI type
|
||||
# type meaning
|
||||
# 0x002 MOBI book (chapter - chapter navigation)
|
||||
# 0x101 News - Hierarchical navigation with sections and articles
|
||||
# 0x102 News feed - Flat navigation
|
||||
# 0x103 News magazine - same as 0x101
|
||||
# 0xC - 0xF : Text encoding (65001 is utf-8)
|
||||
# 0x10 - 0x13 : UID
|
||||
# 0x14 - 0x17 : Generator version
|
||||
|
||||
bt = 0x002
|
||||
if self.primary_index_record_idx is not None:
|
||||
if self.indexer.is_flat_periodical:
|
||||
bt = 0x102
|
||||
elif self.indexer.is_periodical:
|
||||
bt = 0x101
|
||||
|
||||
record0.write(pack(b'>IIIII',
|
||||
0xe8, bt, 65001, uid, 6))
|
||||
|
||||
# 0x18 - 0x1f : Unknown
|
||||
record0.write(b'\xff' * 8)
|
||||
|
||||
# 0x20 - 0x23 : Secondary index record
|
||||
sir = 0xffffffff
|
||||
if (self.primary_index_record_idx is not None and
|
||||
self.indexer.secondary_record_offset is not None):
|
||||
sir = (self.primary_index_record_idx +
|
||||
self.indexer.secondary_record_offset)
|
||||
record0.write(pack(b'>I', sir))
|
||||
|
||||
# 0x24 - 0x3f : Unknown
|
||||
record0.write(b'\xff' * 28)
|
||||
|
||||
# 0x40 - 0x43 : Offset of first non-text record
|
||||
record0.write(pack(b'>I',
|
||||
self.first_non_text_record_idx))
|
||||
|
||||
# 0x44 - 0x4b : title offset, title length
|
||||
record0.write(pack(b'>II',
|
||||
0xe8 + 16 + len(exth), len(title)))
|
||||
|
||||
# 0x4c - 0x4f : Language specifier
|
||||
record0.write(iana2mobi(
|
||||
str(metadata.language[0])))
|
||||
|
||||
# 0x50 - 0x57 : Input language and Output language
|
||||
record0.write(b'\0' * 8)
|
||||
|
||||
# 0x58 - 0x5b : Format version
|
||||
# 0x5c - 0x5f : First image record number
|
||||
record0.write(pack(b'>II',
|
||||
6, first_image_record if first_image_record else len(self.records)))
|
||||
|
||||
# 0x60 - 0x63 : First HUFF/CDIC record number
|
||||
# 0x64 - 0x67 : Number of HUFF/CDIC records
|
||||
# 0x68 - 0x6b : First DATP record number
|
||||
# 0x6c - 0x6f : Number of DATP records
|
||||
record0.write(b'\0' * 16)
|
||||
|
||||
# 0x70 - 0x73 : EXTH flags
|
||||
# Bit 6 (0b1000000) being set indicates the presence of an EXTH header
|
||||
# The purpose of the other bits is unknown
|
||||
exth_flags = 0b1010000
|
||||
if self.is_periodical:
|
||||
exth_flags |= 0b1000
|
||||
record0.write(pack(b'>I', exth_flags))
|
||||
|
||||
# 0x74 - 0x93 : Unknown
|
||||
record0.write(b'\0' * 32)
|
||||
|
||||
# 0x94 - 0x97 : DRM offset
|
||||
# 0x98 - 0x9b : DRM count
|
||||
# 0x9c - 0x9f : DRM size
|
||||
# 0xa0 - 0xa3 : DRM flags
|
||||
record0.write(pack(b'>IIII',
|
||||
0xffffffff, 0xffffffff, 0, 0))
|
||||
|
||||
|
||||
# 0xa4 - 0xaf : Unknown
|
||||
record0.write(b'\0'*12)
|
||||
|
||||
# 0xb0 - 0xb1 : First content record number
|
||||
# 0xb2 - 0xb3 : last content record number
|
||||
# (Includes Image, DATP, HUFF, DRM)
|
||||
record0.write(pack(b'>HH', 1, last_content_record))
|
||||
|
||||
# 0xb4 - 0xb7 : Unknown
|
||||
record0.write(b'\0\0\0\x01')
|
||||
|
||||
# 0xb8 - 0xbb : FCIS record number
|
||||
record0.write(pack(b'>I', fcis_number))
|
||||
|
||||
# 0xbc - 0xbf : Unknown (FCIS record count?)
|
||||
record0.write(pack(b'>I', 1))
|
||||
|
||||
# 0xc0 - 0xc3 : FLIS record number
|
||||
record0.write(pack(b'>I', flis_number))
|
||||
|
||||
# 0xc4 - 0xc7 : Unknown (FLIS record count?)
|
||||
record0.write(pack(b'>I', 1))
|
||||
|
||||
# 0xc8 - 0xcf : Unknown
|
||||
record0.write(b'\0'*8)
|
||||
|
||||
# 0xd0 - 0xdf : Unknown
|
||||
record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
|
||||
|
||||
# 0xe0 - 0xe3 : Extra record data
|
||||
# Extra record data flags:
|
||||
# - 0b1 : <extra multibyte bytes><size>
|
||||
# - 0b10 : <TBS indexing description of this HTML record><size>
|
||||
# - 0b100: <uncrossable breaks><size>
|
||||
# Setting bit 2 (0x2) disables <guide><reference type="start"> functionality
|
||||
extra_data_flags = 0b1 # Has multibyte overlap bytes
|
||||
if self.primary_index_record_idx is not None:
|
||||
extra_data_flags |= 0b10
|
||||
if WRITE_UNCROSSABLE_BREAKS:
|
||||
extra_data_flags |= 0b100
|
||||
record0.write(pack(b'>I', extra_data_flags))
|
||||
|
||||
# 0xe4 - 0xe7 : Primary index record
|
||||
record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx
|
||||
is None else self.primary_index_record_idx))
|
||||
|
||||
record0.write(exth)
|
||||
record0.write(title)
|
||||
record0 = record0.getvalue()
|
||||
# Add some buffer so that Amazon can add encryption information if this
|
||||
# MOBI is submitted for publication
|
||||
record0 += (b'\0' * (1024*8))
|
||||
self.records[0] = align_block(record0)
|
||||
# }}}
|
||||
|
||||
def build_exth(self): # EXTH Header {{{
|
||||
oeb = self.oeb
|
||||
exth = StringIO()
|
||||
nrecs = 0
|
||||
for term in oeb.metadata:
|
||||
if term not in EXTH_CODES: continue
|
||||
code = EXTH_CODES[term]
|
||||
items = oeb.metadata[term]
|
||||
if term == 'creator':
|
||||
if self.prefer_author_sort:
|
||||
creators = [normalize(unicode(c.file_as or c)) for c in items]
|
||||
else:
|
||||
creators = [normalize(unicode(c)) for c in items]
|
||||
items = ['; '.join(creators)]
|
||||
for item in items:
|
||||
data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
|
||||
if term == 'identifier':
|
||||
if data.lower().startswith('urn:isbn:'):
|
||||
data = data[9:]
|
||||
elif item.scheme.lower() == 'isbn':
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
data = data.encode('utf-8')
|
||||
exth.write(pack(b'>II', code, len(data) + 8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
if term == 'rights' :
|
||||
try:
|
||||
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
|
||||
except:
|
||||
rights = b'Unknown'
|
||||
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||
exth.write(rights)
|
||||
nrecs += 1
|
||||
|
||||
# Write UUID as ASIN
|
||||
uuid = None
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
for x in oeb.metadata['identifier']:
|
||||
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
||||
unicode(x).startswith('urn:uuid:')):
|
||||
uuid = unicode(x).split(':')[-1]
|
||||
break
|
||||
if uuid is None:
|
||||
from uuid import uuid4
|
||||
uuid = str(uuid4())
|
||||
|
||||
if isinstance(uuid, unicode):
|
||||
uuid = uuid.encode('utf-8')
|
||||
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
||||
exth.write(uuid)
|
||||
nrecs += 1
|
||||
|
||||
# Write cdetype
|
||||
if self.is_periodical:
|
||||
data = b'NWPR'
|
||||
else:
|
||||
data = b'EBOK'
|
||||
exth.write(pack(b'>II', 501, len(data)+8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
|
||||
# Add a publication date entry
|
||||
if oeb.metadata['date']:
|
||||
datestr = str(oeb.metadata['date'][0])
|
||||
elif oeb.metadata['timestamp']:
|
||||
datestr = str(oeb.metadata['timestamp'][0])
|
||||
|
||||
if datestr is None:
|
||||
raise ValueError("missing date or timestamp")
|
||||
|
||||
datestr = bytes(datestr)
|
||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
if self.is_periodical:
|
||||
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
exth.write(pack(b'>III', EXTH_CODES['versionnumber'], 12, 7))
|
||||
nrecs += 1
|
||||
|
||||
if self.is_periodical:
|
||||
# Pretend to be amazon's super secret periodical generator
|
||||
vals = {204:201, 205:2, 206:0, 207:101}
|
||||
else:
|
||||
# Pretend to be kindlegen 1.2
|
||||
vals = {204:201, 205:1, 206:2, 207:33307}
|
||||
for code, val in vals:
|
||||
exth.write(pack(b'>III', code, 12, val))
|
||||
nrecs += 1
|
||||
|
||||
if self.cover_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
|
||||
self.cover_offset))
|
||||
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
|
||||
nrecs += 2
|
||||
if self.thumbnail_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
||||
self.thumbnail_offset))
|
||||
nrecs += 1
|
||||
|
||||
if self.serializer.start_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
|
||||
self.serializer.start_offset))
|
||||
nrecs += 1
|
||||
|
||||
exth = exth.getvalue()
|
||||
trail = len(exth) % 4
|
||||
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
|
||||
return b''.join(exth)
|
||||
# }}}
|
||||
|
||||
def write_header(self): # PalmDB header {{{
|
||||
'''
|
||||
Write the PalmDB header
|
||||
'''
|
||||
title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace(
|
||||
' ', '_')
|
||||
title = title + (b'\0' * (32 - len(title)))
|
||||
now = int(time.time())
|
||||
nrecords = len(self.records)
|
||||
self.write(title, pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
|
||||
b'BOOK', b'MOBI', pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
|
||||
offset = self.tell() + (8 * nrecords) + 2
|
||||
for i, record in enumerate(self.records):
|
||||
self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2*i)[1:])
|
||||
offset += len(record)
|
||||
self.write(b'\0\0')
|
||||
# }}}
|
||||
|
||||
def write_content(self):
|
||||
for record in self.records:
|
||||
self.write(record)
|
||||
|
||||
|
306
src/calibre/ebooks/mobi/writer2/serializer.py
Normal file
@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
|
||||
namespace, prefixname, urlnormalize)
|
||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||
|
||||
from collections import defaultdict
|
||||
from urlparse import urldefrag
|
||||
from cStringIO import StringIO
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||
|
||||
def __init__(self, oeb, images, write_page_breaks_after_item=True):
|
||||
'''
|
||||
Write all the HTML markup in oeb into a single in memory buffer
|
||||
containing a single html document with links replaced by offsets into
|
||||
the buffer.
|
||||
|
||||
:param oeb: OEBBook object that encapsulates the document to be
|
||||
processed.
|
||||
|
||||
:param images: Mapping of image hrefs (urlnormalized) to image record
|
||||
indices.
|
||||
|
||||
:param write_page_breaks_after_item: If True a MOBIpocket pagebreak tag
|
||||
is written after every element of the spine in ``oeb``.
|
||||
'''
|
||||
self.oeb = oeb
|
||||
self.images = images
|
||||
self.logger = oeb.logger
|
||||
self.write_page_breaks_after_item = write_page_breaks_after_item
|
||||
|
||||
# If not None, this is a number pointing to the location at which to
|
||||
# open the MOBI file on the Kindle
|
||||
self.start_offset = None
|
||||
|
||||
# Mapping of hrefs (urlnormalized) to the offset in the buffer where
|
||||
# the resource pointed to by the href lives. Used at the end to fill in
|
||||
# the correct values into all filepos="..." links.
|
||||
self.id_offsets = {}
|
||||
|
||||
# Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
|
||||
# where filepos="..." elements are written corresponding to links that
|
||||
# point to the href. This is used at the end to fill in the correct values.
|
||||
self.href_offsets = defaultdict(list)
|
||||
|
||||
# List of offsets in the buffer of non linear items in the spine. These
|
||||
# become uncrossable breaks in the MOBI
|
||||
self.breaks = []
|
||||
|
||||
self.find_blocks()
|
||||
|
||||
def find_blocks(self):
|
||||
'''
|
||||
Mark every item in the spine if it is the start/end of a
|
||||
section/article, so that it can be wrapped in divs appropriately.
|
||||
'''
|
||||
for item in self.oeb.spine:
|
||||
item.is_section_start = item.is_section_end = False
|
||||
item.is_article_start = item.is_article_end = False
|
||||
|
||||
def spine_item(tocitem):
|
||||
href = urldefrag(tocitem.href)[0]
|
||||
for item in self.oeb.spine:
|
||||
if item.href == href:
|
||||
return item
|
||||
|
||||
for item in self.oeb.toc.iterdescendants():
|
||||
if item.klass == 'section':
|
||||
articles = list(item)
|
||||
if not articles: continue
|
||||
spine_item(item).is_section_start = True
|
||||
for i, article in enumerate(articles):
|
||||
si = spine_item(article)
|
||||
si.is_article_start = True
|
||||
|
||||
items = list(self.oeb.spine)
|
||||
in_sec = in_art = False
|
||||
for i, item in enumerate(items):
|
||||
try:
|
||||
prev_item = items[i-1]
|
||||
except:
|
||||
prev_item = None
|
||||
if in_art and item.is_article_start == True:
|
||||
prev_item.is_article_end = True
|
||||
in_art = False
|
||||
if in_sec and item.is_section_start == True:
|
||||
prev_item.is_section_end = True
|
||||
in_sec = False
|
||||
if item.is_section_start: in_sec = True
|
||||
if item.is_article_start: in_art = True
|
||||
|
||||
item.is_section_end = item.is_article_end = True
|
||||
|
||||
def __call__(self):
|
||||
'''
|
||||
Return the document serialized as a single UTF-8 encoded bytestring.
|
||||
'''
|
||||
buf = self.buf = StringIO()
|
||||
buf.write(b'<html>')
|
||||
self.serialize_head()
|
||||
self.serialize_body()
|
||||
buf.write(b'</html>')
|
||||
self.fixup_links()
|
||||
return buf.getvalue()
|
||||
|
||||
def serialize_head(self):
|
||||
buf = self.buf
|
||||
buf.write(b'<head>')
|
||||
if len(self.oeb.guide) > 0:
|
||||
self.serialize_guide()
|
||||
buf.write(b'</head>')
|
||||
|
||||
def serialize_guide(self):
|
||||
'''
|
||||
The Kindle decides where to open a book based on the presence of
|
||||
an item in the guide that looks like
|
||||
<reference type="text" title="Start" href="chapter-one.xhtml"/>
|
||||
|
||||
Similarly an item with type="toc" controls where the Goto Table of
|
||||
Contents operation on the kindle goes.
|
||||
'''
|
||||
|
||||
buf = self.buf
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
buf.write(b'<guide>')
|
||||
for ref in self.oeb.guide.values():
|
||||
path = urldefrag(ref.href)[0]
|
||||
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
|
||||
continue
|
||||
|
||||
buf.write(b'<reference type="')
|
||||
if ref.type.startswith('other.') :
|
||||
self.serialize_text(ref.type.replace('other.',''), quot=True)
|
||||
else:
|
||||
self.serialize_text(ref.type, quot=True)
|
||||
buf.write(b'" ')
|
||||
if ref.title is not None:
|
||||
buf.write(b'title="')
|
||||
self.serialize_text(ref.title, quot=True)
|
||||
buf.write(b'" ')
|
||||
if ref.title == 'start':
|
||||
self._start_href = ref.href
|
||||
self.serialize_href(ref.href)
|
||||
# Space required or won't work, I kid you not
|
||||
buf.write(b' />')
|
||||
|
||||
buf.write(b'</guide>')
|
||||
|
||||
def serialize_href(self, href, base=None):
|
||||
'''
|
||||
Serialize the href attribute of an <a> or <reference> tag. It is
|
||||
serialized as filepos="000000000" and a pointer to its location is
|
||||
stored in self.href_offsets so that the correct value can be filled in
|
||||
at the end.
|
||||
'''
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
path, frag = urldefrag(urlnormalize(href))
|
||||
if path and base:
|
||||
path = base.abshref(path)
|
||||
if path and path not in hrefs:
|
||||
return False
|
||||
buf = self.buf
|
||||
item = hrefs[path] if path else None
|
||||
if item and item.spine_position is None:
|
||||
return False
|
||||
path = item.href if item else base.href
|
||||
href = '#'.join((path, frag)) if frag else path
|
||||
buf.write(b'filepos=')
|
||||
self.href_offsets[href].append(buf.tell())
|
||||
buf.write(b'0000000000')
|
||||
return True
|
||||
|
||||
def serialize_body(self):
|
||||
'''
|
||||
Serialize all items in the spine of the document. Non linear items are
|
||||
moved to the end.
|
||||
'''
|
||||
buf = self.buf
|
||||
self.anchor_offset = buf.tell()
|
||||
buf.write(b'<body>')
|
||||
self.body_start_offset = buf.tell()
|
||||
spine = [item for item in self.oeb.spine if item.linear]
|
||||
spine.extend([item for item in self.oeb.spine if not item.linear])
|
||||
for item in spine:
|
||||
self.serialize_item(item)
|
||||
self.body_end_offset = buf.tell()
|
||||
buf.write(b'</body>')
|
||||
|
||||
def serialize_item(self, item):
|
||||
'''
|
||||
Serialize an individual item from the spine of the input document.
|
||||
A reference to this item is stored in self.href_offsets
|
||||
'''
|
||||
buf = self.buf
|
||||
if not item.linear:
|
||||
self.breaks.append(buf.tell() - 1)
|
||||
self.id_offsets[urlnormalize(item.href)] = buf.tell()
|
||||
if item.is_section_start:
|
||||
buf.write(b'<div>')
|
||||
if item.is_article_start:
|
||||
buf.write(b'<div>')
|
||||
for elem in item.data.find(XHTML('body')):
|
||||
self.serialize_elem(elem, item)
|
||||
if item.is_article_end:
|
||||
# Kindle periodical article end marker
|
||||
buf.write(b'<div></div>')
|
||||
if self.write_page_breaks_after_item:
|
||||
buf.write(b'<mbp:pagebreak/>')
|
||||
if item.is_article_end:
|
||||
buf.write(b'</div>')
|
||||
if item.is_section_end:
|
||||
buf.write(b'</div>')
|
||||
self.anchor_offset = None
|
||||
|
||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||
buf = self.buf
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
or namespace(elem.tag) not in nsrmap:
|
||||
return
|
||||
tag = prefixname(elem.tag, nsrmap)
|
||||
# Previous layers take care of @name
|
||||
id_ = elem.attrib.pop('id', None)
|
||||
if id_:
|
||||
href = '#'.join((item.href, id_))
|
||||
offset = self.anchor_offset or buf.tell()
|
||||
self.id_offsets[urlnormalize(href)] = offset
|
||||
if self.anchor_offset is not None and \
|
||||
tag == 'a' and not elem.attrib and \
|
||||
not len(elem) and not elem.text:
|
||||
return
|
||||
self.anchor_offset = buf.tell()
|
||||
buf.write(b'<')
|
||||
buf.write(tag.encode('utf-8'))
|
||||
if elem.attrib:
|
||||
for attr, val in elem.attrib.items():
|
||||
if namespace(attr) not in nsrmap:
|
||||
continue
|
||||
attr = prefixname(attr, nsrmap)
|
||||
buf.write(b' ')
|
||||
if attr == 'href':
|
||||
if self.serialize_href(val, item):
|
||||
continue
|
||||
elif attr == 'src':
|
||||
href = urlnormalize(item.abshref(val))
|
||||
if href in self.images:
|
||||
index = self.images[href]
|
||||
buf.write(b'recindex="%05d"' % index)
|
||||
continue
|
||||
buf.write(attr.encode('utf-8'))
|
||||
buf.write(b'="')
|
||||
self.serialize_text(val, quot=True)
|
||||
buf.write(b'"')
|
||||
buf.write(b'>')
|
||||
if elem.text or len(elem) > 0:
|
||||
if elem.text:
|
||||
self.anchor_offset = None
|
||||
self.serialize_text(elem.text)
|
||||
for child in elem:
|
||||
self.serialize_elem(child, item)
|
||||
if child.tail:
|
||||
self.anchor_offset = None
|
||||
self.serialize_text(child.tail)
|
||||
buf.write(b'</%s>' % tag.encode('utf-8'))
|
||||
|
||||
def serialize_text(self, text, quot=False):
|
||||
text = text.replace('&', '&')
|
||||
text = text.replace('<', '<')
|
||||
text = text.replace('>', '>')
|
||||
text = text.replace(u'\u00AD', '') # Soft-hyphen
|
||||
if quot:
|
||||
text = text.replace('"', '"')
|
||||
self.buf.write(text.encode('utf-8'))
|
||||
|
||||
def fixup_links(self):
|
||||
'''
|
||||
Fill in the correct values for all filepos="..." links with the offsets
|
||||
of the linked to content (as stored in id_offsets).
|
||||
'''
|
||||
buf = self.buf
|
||||
id_offsets = self.id_offsets
|
||||
for href, hoffs in self.href_offsets.items():
|
||||
is_start = (href and href == getattr(self, '_start_href', None))
|
||||
# Iterate over all filepos items
|
||||
if href not in id_offsets:
|
||||
self.logger.warn('Hyperlink target %r not found' % href)
|
||||
# Link to the top of the document, better than just ignoring
|
||||
href, _ = urldefrag(href)
|
||||
if href in self.id_offsets:
|
||||
ioff = self.id_offsets[href]
|
||||
if is_start:
|
||||
self.start_offset = ioff
|
||||
for hoff in hoffs:
|
||||
buf.seek(hoff)
|
||||
buf.write(b'%010d' % ioff)
|
||||
|
||||
|
@ -1180,8 +1180,9 @@ class Manifest(object):
|
||||
if memory is None:
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
pt = PersistentTemporaryFile(suffix='_oeb_base_mem_unloader.img')
|
||||
with pt:
|
||||
pt.write(self._data)
|
||||
pt.close()
|
||||
self.oeb._temp_files.append(pt.name)
|
||||
def loader(*args):
|
||||
with open(pt.name, 'rb') as f:
|
||||
ans = f.read()
|
||||
@ -1196,8 +1197,6 @@ class Manifest(object):
|
||||
self._loader = loader2
|
||||
self._data = None
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
data = self.data
|
||||
if isinstance(data, etree._Element):
|
||||
@ -1681,8 +1680,15 @@ class TOC(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
def iterdescendants(self):
|
||||
def iterdescendants(self, breadth_first=False):
|
||||
"""Iterate over all descendant nodes in depth-first order."""
|
||||
if breadth_first:
|
||||
for child in self.nodes:
|
||||
yield child
|
||||
for child in self.nodes:
|
||||
for node in child.iterdescendants(breadth_first=True):
|
||||
yield node
|
||||
else:
|
||||
for child in self.nodes:
|
||||
for node in child.iter():
|
||||
yield node
|
||||
@ -1913,6 +1919,14 @@ class OEBBook(object):
|
||||
self.toc = TOC()
|
||||
self.pages = PageList()
|
||||
self.auto_generated_toc = True
|
||||
self._temp_files = []
|
||||
|
||||
def clean_temp_files(self):
|
||||
for path in self._temp_files:
|
||||
try:
|
||||
os.remove(path)
|
||||
except:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
|
@ -92,7 +92,7 @@ class EbookIterator(object):
|
||||
self.config = DynamicConfig(name='iterator')
|
||||
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
|
||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||
self.ebook_ext = ext
|
||||
self.ebook_ext = ext.replace('original_', '')
|
||||
|
||||
def search(self, text, index, backwards=False):
|
||||
text = text.lower()
|
||||
|
@ -163,6 +163,8 @@ class OEBReader(object):
|
||||
if item.media_type in check:
|
||||
try:
|
||||
item.data
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
self.logger.exception('Failed to parse content in %s'%
|
||||
item.href)
|
||||
@ -186,8 +188,13 @@ class OEBReader(object):
|
||||
href, _ = urldefrag(href)
|
||||
if not href:
|
||||
continue
|
||||
try:
|
||||
href = item.abshref(urlnormalize(href))
|
||||
scheme = urlparse(href).scheme
|
||||
except:
|
||||
self.oeb.log.exception(
|
||||
'Skipping invalid href: %r'%href)
|
||||
continue
|
||||
if not scheme and href not in known:
|
||||
new.add(href)
|
||||
elif item.media_type in OEB_STYLES:
|
||||
|
@ -318,7 +318,8 @@ class CSSFlattener(object):
|
||||
for edge in ('top', 'bottom'):
|
||||
cssdict['%s-%s'%(prop, edge)] = '0pt'
|
||||
if self.context.insert_blank_line:
|
||||
cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em'
|
||||
cssdict['margin-top'] = cssdict['margin-bottom'] = \
|
||||
'%fem'%self.context.insert_blank_line_size
|
||||
if self.context.remove_paragraph_spacing:
|
||||
cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
|
||||
|
||||
|
@ -36,5 +36,8 @@ class Clean(object):
|
||||
href = urldefrag(self.oeb.guide[x].href)[0]
|
||||
if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc',
|
||||
'title-page', 'copyright-page', 'start'):
|
||||
item = self.oeb.guide[x]
|
||||
if item.title and item.title.lower() == 'start':
|
||||
continue
|
||||
self.oeb.guide.remove(x)
|
||||
|
||||
|
@ -45,9 +45,10 @@ body > .calibre_toc_block {
|
||||
}
|
||||
|
||||
class HTMLTOCAdder(object):
|
||||
def __init__(self, title=None, style='nested'):
|
||||
def __init__(self, title=None, style='nested', position='end'):
|
||||
self.title = title
|
||||
self.style = style
|
||||
self.position = position
|
||||
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
@ -98,7 +99,10 @@ class HTMLTOCAdder(object):
|
||||
self.add_toc_level(body, oeb.toc)
|
||||
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
|
||||
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
|
||||
if self.position == 'end':
|
||||
oeb.spine.add(item, linear=False)
|
||||
else:
|
||||
oeb.spine.insert(0, item, linear=True)
|
||||
oeb.guide.add('toc', 'Table of Contents', href)
|
||||
|
||||
def add_toc_level(self, elem, toc):
|
||||
|
@ -47,15 +47,19 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
|
||||
m.add('series', mi.series)
|
||||
elif override_input_metadata:
|
||||
m.clear('series')
|
||||
if not mi.is_null('isbn'):
|
||||
identifiers = mi.get_identifiers()
|
||||
set_isbn = False
|
||||
for typ, val in identifiers.iteritems():
|
||||
has = False
|
||||
if typ.lower() == 'isbn':
|
||||
set_isbn = True
|
||||
for x in m.identifier:
|
||||
if x.scheme.lower() == 'isbn':
|
||||
x.content = mi.isbn
|
||||
if x.scheme.lower() == typ.lower():
|
||||
x.content = val
|
||||
has = True
|
||||
if not has:
|
||||
m.add('identifier', mi.isbn, scheme='ISBN')
|
||||
elif override_input_metadata:
|
||||
m.add('identifier', val, scheme=typ.upper())
|
||||
if override_input_metadata and not set_isbn:
|
||||
m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
|
||||
if not mi.is_null('language'):
|
||||
m.clear('language')
|
||||
|
@ -47,7 +47,10 @@ class ManifestTrimmer(object):
|
||||
item.data is not None:
|
||||
hrefs = [r[2] for r in iterlinks(item.data)]
|
||||
for href in hrefs:
|
||||
try:
|
||||
href = item.abshref(urlnormalize(href))
|
||||
except:
|
||||
continue
|
||||
if href in oeb.manifest.hrefs:
|
||||
found = oeb.manifest.hrefs[href]
|
||||
if found not in used:
|
||||
|
@ -165,6 +165,7 @@ class PDFWriter(QObject): # {{{
|
||||
printer = get_pdf_printer(self.opts)
|
||||
printer.setOutputFileName(item_path)
|
||||
self.view.print_(printer)
|
||||
printer.abort()
|
||||
self._render_book()
|
||||
|
||||
def _delete_tmpdir(self):
|
||||
@ -186,6 +187,7 @@ class PDFWriter(QObject): # {{{
|
||||
draw_image_page(printer, painter, p,
|
||||
preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
|
||||
painter.end()
|
||||
printer.abort()
|
||||
|
||||
|
||||
def _write(self):
|
||||
|