Sync to trunk.
249
Changelog.yaml
@ -4,6 +4,255 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.7.8
|
||||||
|
date: 2010-07-09
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "New tool to help prepare EPUBs for publication"
|
||||||
|
type: major
|
||||||
|
description: >
|
||||||
|
"calibre now contains a new command line tool called epub-fix that can automatically fix
|
||||||
|
common problems in EPUB files that cause them to be rejected by poorly designed publishing services.
|
||||||
|
The tool is plugin based for extensible functionality in the future. Currently, it can fix unmanifested files
|
||||||
|
and workaround the date and svg preserveaspectratio bugs of epubcheck."
|
||||||
|
|
||||||
|
- title: "New icons for the toolbar buttons by Kamil Tatara"
|
||||||
|
|
||||||
|
- title: "Display rating (when available) in cover browser"
|
||||||
|
|
||||||
|
- title: "Clicking on the central cover int the cover browser now opens that book in the viewer"
|
||||||
|
|
||||||
|
- title: "Use the status bar instead of the area to the right of the location view to display status information"
|
||||||
|
|
||||||
|
- title: "Driver for the Pandigital Novel e-book reader"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "News download: Don not specify a font family for article descriptions"
|
||||||
|
|
||||||
|
- title: "News download: Fix regression introduced in 0.7.0 that broke download of some embedded content feeds"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Partial support for nested superscript and subscripts."
|
||||||
|
tickets: [6132]
|
||||||
|
|
||||||
|
- title: "CHM Input: Fix handling of buggy CHM files with no .hhc"
|
||||||
|
tickets: [6087]
|
||||||
|
|
||||||
|
- title: "EPUB Input: Fix bug in unzipping EPUB files that have been zipped in depth first order."
|
||||||
|
tickets: [6127]
|
||||||
|
|
||||||
|
- title: "TXT Input: Convert HTML entities to characters."
|
||||||
|
tickets: [6114]
|
||||||
|
|
||||||
|
- title: "LRF Input: Handle LRF files with random null bytes in the text"
|
||||||
|
tickets: [6097]
|
||||||
|
|
||||||
|
- title: "Kobo driver: Fix detection of txt/html files on the device"
|
||||||
|
|
||||||
|
- title: "Fix opening of books when calibre library is on an unmapped network share in windows"
|
||||||
|
|
||||||
|
- title: "SONY driver: Only update the timestamp in the XML db for newly added books"
|
||||||
|
|
||||||
|
- title: "Cover browser: Fix rendering of center cover when width of cover browser is less than the width of a single cover"
|
||||||
|
|
||||||
|
- title: "Cover browser: Correct fix for setPixel out of bounds warning causing UI slowdown in calibre"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "evz.ro"
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: "Anchorage Daily News, China Economic Net, BBC Chinese and Singtao Daily"
|
||||||
|
author: rty
|
||||||
|
|
||||||
|
- title: Big Oven
|
||||||
|
author: Starson17
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Haaretz
|
||||||
|
- Editor and Publisher
|
||||||
|
- Estadao
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.7.7
|
||||||
|
date: 2010-07-02
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Support for the Nokia E52"
|
||||||
|
|
||||||
|
- title: "Searching on the size column"
|
||||||
|
|
||||||
|
- title: "iTunes driver: Add option to disable cover fetching for speeding up the fetching of large book collections"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "SONY driver: Only update metadata when books are sent to device."
|
||||||
|
|
||||||
|
- title: "TXT Input: Ensure the generated html is splittable"
|
||||||
|
tickets: [5904]
|
||||||
|
|
||||||
|
- title: "Fix infinite loop in default cover generation."
|
||||||
|
tickets: [6061]
|
||||||
|
|
||||||
|
- title: "HTML Input: Fix a parsing bug that was triggered in rare conditions"
|
||||||
|
tickets: [6064]
|
||||||
|
|
||||||
|
- title: "HTML2Zip plugin: Do not replace ligatures"
|
||||||
|
tickets: [6019]
|
||||||
|
|
||||||
|
- title: "iTunes driver: Fix transmission of non integral series numbers"
|
||||||
|
tickets: [6046]
|
||||||
|
|
||||||
|
- title: "Simplify implementation of cover caching and ensure cover browser is updated when covers are changed"
|
||||||
|
|
||||||
|
- title: "PDF metadata: Fix last character corrupted when setting metadata in encrypted files."
|
||||||
|
|
||||||
|
- title: "PDF metadata: Update the version of PoDoFo used to set metadata to 0.8.1. Hopefully that means more PDF files will work"
|
||||||
|
|
||||||
|
- title: "Device drivers: Speedup for dumping metadata cache to devices on Windows XP"
|
||||||
|
|
||||||
|
- title: "EPUB Output: Ensure that language setting is conformant to the specs"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Fix a memory leak and a crash in the palmdoc compression routine"
|
||||||
|
|
||||||
|
- title: "Metadata download: Fix a regression that resulted in a failed download for some books"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Foreign Policy and Alo!"
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Statesman and ifzm
|
||||||
|
author: rty
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Akter
|
||||||
|
- The Old New Thing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.7.6
|
||||||
|
date: 2010-06-28
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Add support for the new firmware of the Azbooka"
|
||||||
|
tickets: [5994]
|
||||||
|
|
||||||
|
- title: "A few speedups for calibre startup, should add up to a few seconds of startup time on slower machines"
|
||||||
|
|
||||||
|
- title: "Support for the Sweem MM300"
|
||||||
|
|
||||||
|
- title: "Add keyboard shorcut for Download metadata and covers"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression in 0.7.5 that broke conversion of malformed HTML files (like those Microsoft Word outputs)"
|
||||||
|
type: major
|
||||||
|
tickets: [5991]
|
||||||
|
|
||||||
|
- title: "Don't download tags from librarything, as the tagging there is not very good"
|
||||||
|
|
||||||
|
- title: "Add mimetype for FB2 so that it can be served by the content server"
|
||||||
|
tickets: [6011]
|
||||||
|
|
||||||
|
- title: "Ensure cover is not resized to less than the available space in the Edit Meta Information dialog"
|
||||||
|
tickets: [6001]
|
||||||
|
|
||||||
|
- title: "SONY driver: Only update collections when sending book to device for the first time"
|
||||||
|
|
||||||
|
- title: "calibre should now work on windows when the location for the library contains non-ascii characters"
|
||||||
|
tickets: [5983]
|
||||||
|
|
||||||
|
- title: "Cover browser once again distorts instead of cropping covers that have an incorrect aspect ratio"
|
||||||
|
|
||||||
|
- title: "ISBNDb metadata plugin: Fix bug causing only first page of results to be fetched"
|
||||||
|
|
||||||
|
- title: "Move iTunes driver to the bottom so that it doesn't interfere with device detection for people that have iphones and an ereader plugged in"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Houston Chronicle
|
||||||
|
- Hindu
|
||||||
|
- Times of India
|
||||||
|
- New York Times
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Winnipeg Sun
|
||||||
|
author: rty
|
||||||
|
|
||||||
|
- version: 0.7.5
|
||||||
|
date: 2010-06-25
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "New driver for the Kobo featuring closer integration with the device."
|
||||||
|
|
||||||
|
- title: "Support for the Dell Streak, Eken Android tablet and the Astak Mentor EB600"
|
||||||
|
|
||||||
|
- title: "New series type custom column"
|
||||||
|
|
||||||
|
- title: "Add option in Send to device menu to connect to iTunes without any iDevice (experimental)"
|
||||||
|
|
||||||
|
- title: "iPad driver: Make setting iTunes Category from series optional. News download now optimizations for iPad output."
|
||||||
|
|
||||||
|
- title: "Add option to disable book cover animation"
|
||||||
|
tickets: [5909]
|
||||||
|
|
||||||
|
- title: "Edit meta information dialog: Remember last used size and splitter position."
|
||||||
|
tickets: [5908]
|
||||||
|
|
||||||
|
- title: "Metadata download: If any results have a published date, ensure they all do"
|
||||||
|
|
||||||
|
- title: "SONY driver: Add a preference setting in Preferences->Add/Save->Send to device to control how colelctions are managed on the device by calibre"
|
||||||
|
|
||||||
|
- title: "Metadata download: Filter out non book results. Also sort results by availability of covers for the isbn"
|
||||||
|
tickets: [5946]
|
||||||
|
|
||||||
|
- title: "Bulk editing for device collections in the device view via the context menu"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "When converting books using the calibre GUI, set the language of the output book to be the same as the language of the User Interface, instead of undefined. Fixes use of dictionary in iBooks"
|
||||||
|
|
||||||
|
- title: "PDF Output: Fix setting top/bottom margnis has no effect"
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Fix typo causing remove footer regex to always fail"
|
||||||
|
|
||||||
|
- title: "Handle device being yanked with queued device jobs gracefully"
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Handle deeply nested XML structures"
|
||||||
|
tickets: [5931]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Fix handling of lists with a specified left margin"
|
||||||
|
tickets: [5877]
|
||||||
|
|
||||||
|
- title: "Restore workaround for ADE buggy rendering of anchors as links. However, make it overridable by extra CSS"
|
||||||
|
|
||||||
|
- title: "Fix LibraryThing metadata download plugin"
|
||||||
|
|
||||||
|
- title: "Fix multiple ratings displayed in Tag Browser for some legacy databases"
|
||||||
|
|
||||||
|
- title: "Fix invocation of postprocess file type plugins plugins"
|
||||||
|
|
||||||
|
- title: "HTML Input: Handle @import directives in linked css files."
|
||||||
|
tickets: [5135]
|
||||||
|
|
||||||
|
- title: "HTML Input: Handle absolute paths in resource links on windows correctly."
|
||||||
|
tickets: [3031]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Handle font-face rules specify multiple families to be substituted"
|
||||||
|
|
||||||
|
- title: "Cover browser: Set aspect ratio of covers to 3:4 instead of 2:3. Crop rather than distort covers whoose aspect ratio is different from this. Antialias the rendering of the central cover"
|
||||||
|
|
||||||
|
- title: "Reset Tag browser if the text in the search box is edited"
|
||||||
|
|
||||||
|
- title: "Fix detection of SD card in Samsung Galaxy windows driver"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "L'Osservatore Romano"
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: China Press, London Free Press, People Daily
|
||||||
|
author: rty
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Zaobao
|
||||||
|
- New Scientist
|
||||||
|
- National Post
|
||||||
|
- London review of books
|
||||||
|
|
||||||
- version: 0.7.4
|
- version: 0.7.4
|
||||||
date: 2010-06-19
|
date: 2010-06-19
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 89 KiB After Width: | Height: | Size: 3.8 KiB |
Before Width: | Height: | Size: 75 KiB After Width: | Height: | Size: 11 KiB |
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 5.0 KiB |
3191
resources/images/default_cover.svg
Normal file
After Width: | Height: | Size: 105 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
BIN
resources/images/devices/itunes.png
Normal file
After Width: | Height: | Size: 25 KiB |
@ -1752,7 +1752,7 @@
|
|||||||
sodipodi:cy="93.331604"
|
sodipodi:cy="93.331604"
|
||||||
sodipodi:cx="-166.53223"
|
sodipodi:cx="-166.53223"
|
||||||
id="path6082"
|
id="path6082"
|
||||||
style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
|
style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
|
||||||
sodipodi:type="arc" /></clipPath><radialGradient
|
sodipodi:type="arc" /></clipPath><radialGradient
|
||||||
inkscape:collect="always"
|
inkscape:collect="always"
|
||||||
xlink:href="#linearGradient5990"
|
xlink:href="#linearGradient5990"
|
||||||
@ -2513,7 +2513,7 @@
|
|||||||
transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)"
|
transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)"
|
||||||
clip-path="none" /><path
|
clip-path="none" /><path
|
||||||
sodipodi:type="arc"
|
sodipodi:type="arc"
|
||||||
style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
|
style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
|
||||||
id="path3915"
|
id="path3915"
|
||||||
sodipodi:cx="-166.53223"
|
sodipodi:cx="-166.53223"
|
||||||
sodipodi:cy="93.331604"
|
sodipodi:cy="93.331604"
|
||||||
@ -2901,22 +2901,8 @@
|
|||||||
id="g133">
|
id="g133">
|
||||||
<defs
|
<defs
|
||||||
id="defs135" />
|
id="defs135" />
|
||||||
<use
|
|
||||||
|
|
||||||
id="use138"
|
|
||||||
x="0"
|
|
||||||
y="0"
|
|
||||||
width="121"
|
|
||||||
height="120" />
|
|
||||||
<clipPath
|
<clipPath
|
||||||
id="XMLID_215_">
|
id="XMLID_215_">
|
||||||
<use
|
|
||||||
|
|
||||||
id="use141"
|
|
||||||
x="0"
|
|
||||||
y="0"
|
|
||||||
width="121"
|
|
||||||
height="120" />
|
|
||||||
</clipPath>
|
</clipPath>
|
||||||
<g
|
<g
|
||||||
clip-path="url(#XMLID_215_)"
|
clip-path="url(#XMLID_215_)"
|
||||||
|
Before Width: | Height: | Size: 116 KiB After Width: | Height: | Size: 116 KiB |
269
resources/images/dialog_question.svg
Normal file
@ -0,0 +1,269 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448) -->
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://web.resource.org/cc/"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
version="1.0"
|
||||||
|
id="Livello_1"
|
||||||
|
width="128"
|
||||||
|
height="128"
|
||||||
|
viewBox="0 0 139 139"
|
||||||
|
overflow="visible"
|
||||||
|
enable-background="new 0 0 139 139"
|
||||||
|
xml:space="preserve"
|
||||||
|
sodipodi:version="0.32"
|
||||||
|
inkscape:version="0.45+devel"
|
||||||
|
sodipodi:docname="system-help.svgz"
|
||||||
|
inkscape:output_extension="org.inkscape.output.svgz.inkscape"
|
||||||
|
style="overflow:visible"><metadata
|
||||||
|
id="metadata3164"><rdf:RDF><cc:Work
|
||||||
|
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
|
||||||
|
id="defs3162"><filter
|
||||||
|
inkscape:collect="always"
|
||||||
|
x="-0.132641"
|
||||||
|
width="1.265282"
|
||||||
|
y="-0.34752154"
|
||||||
|
height="1.6950431"
|
||||||
|
id="filter3547"><feGaussianBlur
|
||||||
|
inkscape:collect="always"
|
||||||
|
stdDeviation="2.7512044"
|
||||||
|
id="feGaussianBlur3549" /></filter><filter
|
||||||
|
inkscape:collect="always"
|
||||||
|
id="filter5097"><feGaussianBlur
|
||||||
|
inkscape:collect="always"
|
||||||
|
stdDeviation="2.32"
|
||||||
|
id="feGaussianBlur5099" /></filter><filter
|
||||||
|
inkscape:collect="always"
|
||||||
|
x="-0.143268"
|
||||||
|
width="1.286536"
|
||||||
|
y="-0.072184406"
|
||||||
|
height="1.1443688"
|
||||||
|
id="filter5125"><feGaussianBlur
|
||||||
|
inkscape:collect="always"
|
||||||
|
stdDeviation="1.91024"
|
||||||
|
id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
|
||||||
|
inkscape:window-height="697"
|
||||||
|
inkscape:window-width="1024"
|
||||||
|
inkscape:pageshadow="2"
|
||||||
|
inkscape:pageopacity="0.0"
|
||||||
|
guidetolerance="10.0"
|
||||||
|
gridtolerance="10.0"
|
||||||
|
objecttolerance="10.0"
|
||||||
|
borderopacity="1.0"
|
||||||
|
bordercolor="#666666"
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
id="base"
|
||||||
|
inkscape:zoom="2.9352518"
|
||||||
|
inkscape:cx="99.496726"
|
||||||
|
inkscape:cy="69.329657"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="0"
|
||||||
|
inkscape:current-layer="Livello_1"
|
||||||
|
height="128px"
|
||||||
|
width="128px" />
|
||||||
|
<filter
|
||||||
|
id="AI_Sfocatura_4">
|
||||||
|
<feGaussianBlur
|
||||||
|
stdDeviation="4"
|
||||||
|
id="feGaussianBlur3096" />
|
||||||
|
</filter>
|
||||||
|
<filter
|
||||||
|
id="AI_Sfocatura_2">
|
||||||
|
<feGaussianBlur
|
||||||
|
stdDeviation="2"
|
||||||
|
id="feGaussianBlur3099" />
|
||||||
|
</filter>
|
||||||
|
<radialGradient
|
||||||
|
id="XMLID_12_"
|
||||||
|
cx="69.600098"
|
||||||
|
cy="69.576698"
|
||||||
|
r="58"
|
||||||
|
gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
|
||||||
|
gradientUnits="userSpaceOnUse">
|
||||||
|
<stop
|
||||||
|
offset="0"
|
||||||
|
style="stop-color:#000000"
|
||||||
|
id="stop3102" />
|
||||||
|
<stop
|
||||||
|
offset="1"
|
||||||
|
style="stop-color:#000000;stop-opacity:0;"
|
||||||
|
id="stop3104" />
|
||||||
|
</radialGradient>
|
||||||
|
<circle
|
||||||
|
sodipodi:ry="58"
|
||||||
|
sodipodi:rx="58"
|
||||||
|
sodipodi:cy="69.599998"
|
||||||
|
sodipodi:cx="69.599998"
|
||||||
|
style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
|
||||||
|
id="circle5091"
|
||||||
|
r="58"
|
||||||
|
cy="69.599998"
|
||||||
|
cx="69.599998"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
|
||||||
|
cx="69.599998"
|
||||||
|
cy="122.173"
|
||||||
|
rx="58"
|
||||||
|
ry="10.573"
|
||||||
|
id="ellipse3106"
|
||||||
|
style="opacity:0.6;fill:url(#XMLID_12_)"
|
||||||
|
sodipodi:cx="69.599998"
|
||||||
|
sodipodi:cy="122.173"
|
||||||
|
sodipodi:rx="58"
|
||||||
|
sodipodi:ry="10.573"
|
||||||
|
transform="translate(-9.9998474e-2,1.9102535)" />
|
||||||
|
|
||||||
|
<radialGradient
|
||||||
|
id="XMLID_13_"
|
||||||
|
cx="69.600098"
|
||||||
|
cy="69.600098"
|
||||||
|
r="58"
|
||||||
|
gradientUnits="userSpaceOnUse">
|
||||||
|
<stop
|
||||||
|
offset="0.6154"
|
||||||
|
style="stop-color:#EEEEEE"
|
||||||
|
id="stop3113" />
|
||||||
|
<stop
|
||||||
|
offset="0.8225"
|
||||||
|
style="stop-color:#DDDDDD"
|
||||||
|
id="stop3115" />
|
||||||
|
<stop
|
||||||
|
offset="1"
|
||||||
|
style="stop-color:#FFFFFF"
|
||||||
|
id="stop3117" />
|
||||||
|
</radialGradient>
|
||||||
|
<circle
|
||||||
|
cx="69.599998"
|
||||||
|
cy="69.599998"
|
||||||
|
r="58"
|
||||||
|
id="circle3119"
|
||||||
|
style="fill:url(#XMLID_13_)"
|
||||||
|
sodipodi:cx="69.599998"
|
||||||
|
sodipodi:cy="69.599998"
|
||||||
|
sodipodi:rx="58"
|
||||||
|
sodipodi:ry="58"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
|
||||||
|
<linearGradient
|
||||||
|
id="XMLID_14_"
|
||||||
|
gradientUnits="userSpaceOnUse"
|
||||||
|
x1="27.6001"
|
||||||
|
y1="69.600098"
|
||||||
|
x2="111.6001"
|
||||||
|
y2="69.600098"
|
||||||
|
gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
|
||||||
|
<stop
|
||||||
|
offset="0"
|
||||||
|
style="stop-color:#2A94EC"
|
||||||
|
id="stop3122" />
|
||||||
|
<stop
|
||||||
|
offset="1"
|
||||||
|
style="stop-color:#0057AE"
|
||||||
|
id="stop3124" />
|
||||||
|
</linearGradient>
|
||||||
|
<path
|
||||||
|
d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
|
||||||
|
id="path3126"
|
||||||
|
style="fill:url(#XMLID_14_)" />
|
||||||
|
<g
|
||||||
|
id="circle22111"
|
||||||
|
cy="92"
|
||||||
|
rx="36"
|
||||||
|
ry="36"
|
||||||
|
cx="343.99899"
|
||||||
|
enable-background="new "
|
||||||
|
style="opacity:0.3;filter:url(#filter3547)"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
|
||||||
|
<path
|
||||||
|
d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
|
||||||
|
id="path3129"
|
||||||
|
style="fill:#a8dde0" />
|
||||||
|
</g>
|
||||||
|
<linearGradient
|
||||||
|
id="circle16776_1_"
|
||||||
|
gradientUnits="userSpaceOnUse"
|
||||||
|
x1="135.5601"
|
||||||
|
y1="417.66461"
|
||||||
|
x2="161.87621"
|
||||||
|
y2="417.66461"
|
||||||
|
gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
|
||||||
|
<stop
|
||||||
|
offset="0"
|
||||||
|
style="stop-color:#FFFFFF"
|
||||||
|
id="stop3132" />
|
||||||
|
<stop
|
||||||
|
offset="1"
|
||||||
|
style="stop-color:#ffffff;stop-opacity:0;"
|
||||||
|
id="stop3134" />
|
||||||
|
</linearGradient>
|
||||||
|
<path
|
||||||
|
id="circle16776"
|
||||||
|
enable-background="new "
|
||||||
|
d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
|
||||||
|
style="opacity:0.8;fill:url(#circle16776_1_)" />
|
||||||
|
<g
|
||||||
|
id="g3137"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
|
||||||
|
<defs
|
||||||
|
id="defs3139"><path
|
||||||
|
id="XMLID_10_"
|
||||||
|
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
|
||||||
|
<clipPath
|
||||||
|
id="XMLID_6_">
|
||||||
|
<use
|
||||||
|
xlink:href="#XMLID_10_"
|
||||||
|
id="use3143"
|
||||||
|
x="0"
|
||||||
|
y="0"
|
||||||
|
width="139"
|
||||||
|
height="139" />
|
||||||
|
</clipPath>
|
||||||
|
<g
|
||||||
|
clip-path="url(#XMLID_6_)"
|
||||||
|
id="g3145"
|
||||||
|
style="filter:url(#AI_Sfocatura_2)">
|
||||||
|
<path
|
||||||
|
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
|
||||||
|
id="path3147"
|
||||||
|
style="fill:none;stroke:#00316e;stroke-width:2" />
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<g
|
||||||
|
transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
|
||||||
|
id="g5119"
|
||||||
|
style="fill:#00316e;filter:url(#filter5125)"><path
|
||||||
|
style="fill:#00316e"
|
||||||
|
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
|
||||||
|
id="path5121" /><circle
|
||||||
|
style="fill:#00316e"
|
||||||
|
sodipodi:ry="8"
|
||||||
|
sodipodi:rx="8"
|
||||||
|
sodipodi:cy="93.599998"
|
||||||
|
sodipodi:cx="69.599998"
|
||||||
|
cx="69.599998"
|
||||||
|
cy="93.599998"
|
||||||
|
r="8"
|
||||||
|
id="circle5123" /></g><g
|
||||||
|
id="g5101"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
|
||||||
|
id="path3157"
|
||||||
|
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
|
||||||
|
style="fill:#ffffff" /><circle
|
||||||
|
id="circle3159"
|
||||||
|
r="8"
|
||||||
|
cy="93.599998"
|
||||||
|
cx="69.599998"
|
||||||
|
sodipodi:cx="69.599998"
|
||||||
|
sodipodi:cy="93.599998"
|
||||||
|
sodipodi:rx="8"
|
||||||
|
sodipodi:ry="8"
|
||||||
|
style="fill:#ffffff" /></g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 8.4 KiB |
4298
resources/images/edit_copy.svg
Normal file
After Width: | Height: | Size: 133 KiB |
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 3.1 KiB |
203
resources/images/help.svg
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||||
|
|
||||||
|
<svg
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||||
|
version="1.0"
|
||||||
|
width="128"
|
||||||
|
height="128"
|
||||||
|
viewBox="0 0 139 139"
|
||||||
|
id="Livello_1"
|
||||||
|
xml:space="preserve"
|
||||||
|
style="overflow:visible"><defs
|
||||||
|
id="defs3162"><filter
|
||||||
|
x="-0.132641"
|
||||||
|
y="-0.34752154"
|
||||||
|
width="1.265282"
|
||||||
|
height="1.6950431"
|
||||||
|
color-interpolation-filters="sRGB"
|
||||||
|
id="filter3547"><feGaussianBlur
|
||||||
|
id="feGaussianBlur3549"
|
||||||
|
stdDeviation="2.7512044" /></filter><filter
|
||||||
|
color-interpolation-filters="sRGB"
|
||||||
|
id="filter5097"><feGaussianBlur
|
||||||
|
id="feGaussianBlur5099"
|
||||||
|
stdDeviation="2.32" /></filter><filter
|
||||||
|
x="-0.143268"
|
||||||
|
y="-0.072184406"
|
||||||
|
width="1.286536"
|
||||||
|
height="1.1443688"
|
||||||
|
color-interpolation-filters="sRGB"
|
||||||
|
id="filter5125"><feGaussianBlur
|
||||||
|
id="feGaussianBlur5127"
|
||||||
|
stdDeviation="1.91024" /></filter></defs>
|
||||||
|
<filter
|
||||||
|
color-interpolation-filters="sRGB"
|
||||||
|
id="AI_Sfocatura_4">
|
||||||
|
<feGaussianBlur
|
||||||
|
id="feGaussianBlur3096"
|
||||||
|
stdDeviation="4" />
|
||||||
|
</filter>
|
||||||
|
<filter
|
||||||
|
color-interpolation-filters="sRGB"
|
||||||
|
id="AI_Sfocatura_2">
|
||||||
|
<feGaussianBlur
|
||||||
|
id="feGaussianBlur3099"
|
||||||
|
stdDeviation="2" />
|
||||||
|
</filter>
|
||||||
|
<radialGradient
|
||||||
|
cx="69.600098"
|
||||||
|
cy="69.576698"
|
||||||
|
r="58"
|
||||||
|
id="XMLID_12_"
|
||||||
|
gradientUnits="userSpaceOnUse"
|
||||||
|
gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)">
|
||||||
|
<stop
|
||||||
|
id="stop3102"
|
||||||
|
style="stop-color:#000000;stop-opacity:1"
|
||||||
|
offset="0" />
|
||||||
|
<stop
|
||||||
|
id="stop3104"
|
||||||
|
style="stop-color:#000000;stop-opacity:0"
|
||||||
|
offset="1" />
|
||||||
|
</radialGradient>
|
||||||
|
<circle
|
||||||
|
cx="69.599998"
|
||||||
|
cy="69.599998"
|
||||||
|
r="58"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
|
||||||
|
id="circle5091"
|
||||||
|
style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)" />
|
||||||
|
|
||||||
|
<radialGradient
|
||||||
|
cx="69.600098"
|
||||||
|
cy="69.600098"
|
||||||
|
r="58"
|
||||||
|
id="XMLID_13_"
|
||||||
|
gradientUnits="userSpaceOnUse">
|
||||||
|
<stop
|
||||||
|
id="stop3113"
|
||||||
|
style="stop-color:#eeeeee;stop-opacity:1"
|
||||||
|
offset="0.61540002" />
|
||||||
|
<stop
|
||||||
|
id="stop3115"
|
||||||
|
style="stop-color:#dddddd;stop-opacity:1"
|
||||||
|
offset="0.82249999" />
|
||||||
|
<stop
|
||||||
|
id="stop3117"
|
||||||
|
style="stop-color:#ffffff;stop-opacity:1"
|
||||||
|
offset="1" />
|
||||||
|
</radialGradient>
|
||||||
|
<circle
|
||||||
|
cx="69.599998"
|
||||||
|
cy="69.599998"
|
||||||
|
r="58"
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
|
||||||
|
id="circle3119"
|
||||||
|
style="fill:url(#XMLID_13_)" />
|
||||||
|
<linearGradient
|
||||||
|
x1="27.6001"
|
||||||
|
y1="69.600098"
|
||||||
|
x2="111.6001"
|
||||||
|
y2="69.600098"
|
||||||
|
id="XMLID_14_"
|
||||||
|
gradientUnits="userSpaceOnUse"
|
||||||
|
gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
|
||||||
|
<stop
|
||||||
|
id="stop3122"
|
||||||
|
style="stop-color:#2a94ec;stop-opacity:1"
|
||||||
|
offset="0" />
|
||||||
|
<stop
|
||||||
|
id="stop3124"
|
||||||
|
style="stop-color:#0057ae;stop-opacity:1"
|
||||||
|
offset="1" />
|
||||||
|
</linearGradient>
|
||||||
|
<path
|
||||||
|
d="m 26.062502,67.328127 c 0,25.149228 20.460149,45.609373 45.609375,45.609373 25.149227,0 45.609373,-20.460145 45.609373,-45.609373 0,-25.149226 -20.460146,-45.609374 -45.609373,-45.609374 -25.149226,0 -45.609375,20.460148 -45.609375,45.609374 z"
|
||||||
|
id="path3126"
|
||||||
|
style="fill:url(#XMLID_14_)" />
|
||||||
|
<g
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
|
||||||
|
id="circle22111"
|
||||||
|
style="opacity:0.3;filter:url(#filter3547)">
|
||||||
|
<path
|
||||||
|
d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 43.007,92.976 50.885,87.768 64.16,86.41 c 13.274,-1.356 26.919,1.648 30.477,6.716 3.556,5.068 -4.322,10.275 -17.596,11.633 z"
|
||||||
|
id="path3129"
|
||||||
|
style="fill:#a8dde0" />
|
||||||
|
</g>
|
||||||
|
<linearGradient
|
||||||
|
x1="135.5601"
|
||||||
|
y1="417.66461"
|
||||||
|
x2="161.87621"
|
||||||
|
y2="417.66461"
|
||||||
|
id="circle16776_1_"
|
||||||
|
gradientUnits="userSpaceOnUse"
|
||||||
|
gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
|
||||||
|
<stop
|
||||||
|
id="stop3132"
|
||||||
|
style="stop-color:#ffffff;stop-opacity:1"
|
||||||
|
offset="0" />
|
||||||
|
<stop
|
||||||
|
id="stop3134"
|
||||||
|
style="stop-color:#ffffff;stop-opacity:0"
|
||||||
|
offset="1" />
|
||||||
|
</linearGradient>
|
||||||
|
<path
|
||||||
|
d="m 71.671877,24.06655 c -21.383195,0 -39.252297,14.70468 -43.558039,34.283047 8.584336,7.792687 24.872313,-3.99082 43.558039,-3.99082 18.685727,0 34.974783,11.783507 43.558033,3.99082 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
|
||||||
|
id="circle16776"
|
||||||
|
style="opacity:0.8;fill:url(#circle16776_1_)" />
|
||||||
|
<g
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
|
||||||
|
id="g3137">
|
||||||
|
<defs
|
||||||
|
id="defs3139"><path
|
||||||
|
d="m 27.6,69.6 c 0,23.159 18.841,42 42,42 23.159,0 42,-18.841 42,-42 0,-23.159 -18.841,-42 -42,-42 -23.159,0 -42,18.841 -42,42 z"
|
||||||
|
id="XMLID_10_" /></defs>
|
||||||
|
<clipPath
|
||||||
|
id="XMLID_6_">
|
||||||
|
<use
|
||||||
|
id="use3143"
|
||||||
|
x="0"
|
||||||
|
y="0"
|
||||||
|
width="139"
|
||||||
|
height="139"
|
||||||
|
xlink:href="#XMLID_10_" />
|
||||||
|
</clipPath>
|
||||||
|
<g
|
||||||
|
clip-path="url(#XMLID_6_)"
|
||||||
|
id="g3145"
|
||||||
|
style="filter:url(#AI_Sfocatura_2)">
|
||||||
|
<path
|
||||||
|
d="m 27.6,69.6 c 0,23.159 18.841,42 42,42 23.159,0 42,-18.841 42,-42 0,-23.159 -18.841,-42 -42,-42 -23.159,0 -42,18.841 -42,42 z"
|
||||||
|
id="path3147"
|
||||||
|
style="fill:none;stroke:#00316e;stroke-width:2" />
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<g
|
||||||
|
transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
|
||||||
|
id="g5119"
|
||||||
|
style="fill:#00316e;filter:url(#filter5125)"><path
|
||||||
|
d="m 63.37,80.089 -0.178,-2.343 c -0.18,-4.598 1.248,-9.284 5.259,-14.062 2.853,-3.424 5.169,-6.398 5.169,-9.463 0,-3.064 -2.049,-5.227 -6.418,-5.318 -3.029,0 -6.506,0.992 -8.913,2.614 l -2.941,-9.733 c 3.208,-1.894 8.467,-3.696 14.885,-3.696 11.677,0 17.115,6.58 17.115,13.97 0,6.939 -4.279,11.357 -7.667,15.231 -3.209,3.605 -4.635,7.121 -4.546,11.177 l 0,1.622 -11.765,0 0,0.001 z"
|
||||||
|
id="path5121"
|
||||||
|
style="fill:#00316e" /><circle
|
||||||
|
cx="69.599998"
|
||||||
|
cy="93.599998"
|
||||||
|
r="8"
|
||||||
|
id="circle5123"
|
||||||
|
style="fill:#00316e" /></g><g
|
||||||
|
transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"
|
||||||
|
id="g5101"><path
|
||||||
|
d="m 63.37,80.089 -0.178,-2.343 c -0.18,-4.598 1.248,-9.284 5.259,-14.062 2.853,-3.424 5.169,-6.398 5.169,-9.463 0,-3.064 -2.049,-5.227 -6.418,-5.318 -3.029,0 -6.506,0.992 -8.913,2.614 l -2.941,-9.733 c 3.208,-1.894 8.467,-3.696 14.885,-3.696 11.677,0 17.115,6.58 17.115,13.97 0,6.939 -4.279,11.357 -7.667,15.231 -3.209,3.605 -4.635,7.121 -4.546,11.177 l 0,1.622 -11.765,0 0,0.001 z"
|
||||||
|
id="path3157"
|
||||||
|
style="fill:#ffffff" /><circle
|
||||||
|
cx="69.599998"
|
||||||
|
cy="93.599998"
|
||||||
|
r="8"
|
||||||
|
id="circle3159"
|
||||||
|
style="fill:#ffffff" /></g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 6.3 KiB |
Before Width: | Height: | Size: 109 KiB After Width: | Height: | Size: 4.1 KiB |
BIN
resources/images/news/alo_novine.png
Normal file
After Width: | Height: | Size: 753 B |
BIN
resources/images/news/elpais_impreso.png
Normal file
After Width: | Height: | Size: 717 B |
BIN
resources/images/news/evz.ro.png
Normal file
After Width: | Height: | Size: 836 B |
BIN
resources/images/news/haaretz.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
resources/images/news/lrb.png
Normal file
After Width: | Height: | Size: 315 B |
BIN
resources/images/news/lrb_payed.png
Normal file
After Width: | Height: | Size: 315 B |
Before Width: | Height: | Size: 58 KiB After Width: | Height: | Size: 6.3 KiB |
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 3.6 KiB |
Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 7.2 KiB |
Before Width: | Height: | Size: 73 KiB After Width: | Height: | Size: 2.2 KiB |
@ -15,7 +15,7 @@ class Akter(BasicNewsRecipe):
|
|||||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
|
masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
|
||||||
@ -23,9 +23,9 @@ class Akter(BasicNewsRecipe):
|
|||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
PREFIX = 'http://www.akter.co.rs'
|
PREFIX = 'http://www.akter.co.rs'
|
||||||
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
extra_css = """
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
.article_description,body,.lokacija{font-family: Arial,Helvetica,sans1,sans-serif}
|
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
.color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
|
.color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
|
||||||
border-left: 1px solid #D00000; color: #D00000}
|
border-left: 1px solid #D00000; color: #D00000}
|
||||||
img{margin-bottom: 0.8em} """
|
img{margin-bottom: 0.8em} """
|
||||||
|
65
resources/recipes/alo_novine.recipe
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.alo.rs
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Alo_Novine(BasicNewsRecipe):
|
||||||
|
title = 'Alo!'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "News Portal from Serbia"
|
||||||
|
publisher = 'Alo novine d.o.o.'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
delay = 4
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'sr'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
|
.lead {font-size: 1.3em}
|
||||||
|
h1{color: #DB0700}
|
||||||
|
.article_uvod{font-style: italic; font-size: 1.2em}
|
||||||
|
img{margin-bottom: 0.8em} """
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher': publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','embed'])]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
|
||||||
|
,(u'Politika' , u'http://www.alo.rs/rss/politika')
|
||||||
|
,(u'Vesti' , u'http://www.alo.rs/rss/vesti')
|
||||||
|
,(u'Sport' , u'http://www.alo.rs/rss/sport')
|
||||||
|
,(u'Ljudi' , u'http://www.alo.rs/rss/ljudi')
|
||||||
|
,(u'Saveti' , u'http://www.alo.rs/rss/saveti')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
artl = url.rpartition('/')[0]
|
||||||
|
artid = artl.rpartition('/')[2]
|
||||||
|
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
|
||||||
|
|
||||||
|
def image_url_processor(self, baseurl, url):
|
||||||
|
return url.replace('alo.rs//','alo.rs/')
|
||||||
|
|
40
resources/recipes/anchorage_daily.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||||
|
title = u'Anchorage Daily News'
|
||||||
|
__author__ = 'rty'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
|
||||||
|
(u'Business', u'http://www.adn.com/money/index.xml'),
|
||||||
|
(u'Sports', u'http://www.adn.com/sports/index.xml'),
|
||||||
|
(u'Politics', u'http://www.adn.com/politics/index.xml'),
|
||||||
|
(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
|
||||||
|
(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
|
||||||
|
]
|
||||||
|
description = ''''Alaska's Newspaper'''
|
||||||
|
publisher = 'http://www.adn.com'
|
||||||
|
category = 'news, Alaska, Anchorage'
|
||||||
|
language = 'en'
|
||||||
|
extra_css = '''
|
||||||
|
p{font-weight: normal;text-align: justify}
|
||||||
|
'''
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'latin-1'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'left_col story_mainbar'}),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':'story_tools'}),
|
||||||
|
dict(name='p', attrs={'class':'ad_label'}),
|
||||||
|
]
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'advertisement'}),
|
||||||
|
]
|
@ -12,9 +12,9 @@ class AssociatedPress(BasicNewsRecipe):
|
|||||||
|
|
||||||
max_articles_per_feed = 15
|
max_articles_per_feed = 15
|
||||||
html2lrf_options = ['--force-page-break-before-tag="chapter"']
|
html2lrf_options = ['--force-page-break-before-tag="chapter"']
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
||||||
(r'<body class="apple-rss-no-unread-mode" onLoad="setup(null)">.*?<!-- start Entries -->', lambda match : '<body>'),
|
(r'<body class="apple-rss-no-unread-mode" onLoad="setup(null)">.*?<!-- start Entries -->', lambda match : '<body>'),
|
||||||
@ -25,10 +25,10 @@ class AssociatedPress(BasicNewsRecipe):
|
|||||||
(r'<p class="ap-story-p">', lambda match : '<p>'),
|
(r'<p class="ap-story-p">', lambda match : '<p>'),
|
||||||
(r'Learn more about our <a href="http://apdigitalnews.com/privacy.html">Privacy Policy</a>.*?</body>', lambda match : '</body>'),
|
(r'Learn more about our <a href="http://apdigitalnews.com/privacy.html">Privacy Policy</a>.*?</body>', lambda match : '</body>'),
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'),
|
feeds = [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'),
|
||||||
('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'),
|
('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'),
|
||||||
('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'),
|
('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'),
|
||||||
@ -38,4 +38,4 @@ class AssociatedPress(BasicNewsRecipe):
|
|||||||
('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'),
|
('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'),
|
||||||
('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'),
|
('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'),
|
||||||
('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'),
|
('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'),
|
||||||
]
|
]
|
||||||
|
39
resources/recipes/bbc_chinese.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||||
|
title = u'BBC Chinese'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
|
||||||
|
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
|
||||||
|
(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
|
||||||
|
(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
|
||||||
|
(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
|
||||||
|
(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
|
||||||
|
(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
||||||
|
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
|
||||||
|
h1 {font-family: 'DroidFont', serif;}\n
|
||||||
|
.articledescription {font-family: 'DroidFont', serif;}
|
||||||
|
'''
|
||||||
|
__author__ = 'rty'
|
||||||
|
__version__ = '1.0'
|
||||||
|
language = 'zh'
|
||||||
|
pubisher = 'British Broadcasting Corporation'
|
||||||
|
description = 'BBC news in Chinese'
|
||||||
|
category = 'News, Chinese'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1'),
|
||||||
|
dict(name='p', attrs={'class':['primary-topic','summary']}),
|
||||||
|
dict(name='div', attrs={'class':['bodytext','datestamp']}),
|
||||||
|
]
|
64
resources/recipes/big_oven.recipe
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BigOven(BasicNewsRecipe):
|
||||||
|
title = 'BigOven'
|
||||||
|
__author__ = 'Starson17'
|
||||||
|
description = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
|
||||||
|
language = 'en'
|
||||||
|
category = 'news, food, recipes, gourmet'
|
||||||
|
publisher = 'Starson17'
|
||||||
|
use_embedded_content= False
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 24
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
cover_url = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
|
conversion_options = {'linearize_tables' : True
|
||||||
|
, 'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://www.bigoven.com/')
|
||||||
|
br.select_form(name='form1')
|
||||||
|
br['TopMenu_bo1$email'] = self.username
|
||||||
|
br['TopMenu_bo1$password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
remove_attributes = ['style', 'font']
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='h1')
|
||||||
|
,dict(name='div', attrs={'class':'img'})
|
||||||
|
,dict(name='div', attrs={'id':'intro'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'style':["overflow: visible;"]})
|
||||||
|
,dict(name='div', attrs={'class':['ctas']})
|
||||||
|
#,dict(name='a', attrs={'class':['edit']})
|
||||||
|
,dict(name='p', attrs={'class':['byline']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for tag in soup.findAll(name='a', attrs={'class':['edit']}):
|
||||||
|
tag.parent.extract()
|
||||||
|
for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
|
||||||
|
tag.replaceWith(tag.string)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
39
resources/recipes/china_economic_net.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1278162597(BasicNewsRecipe):
|
||||||
|
__author__ = 'rty'
|
||||||
|
title = u'China Economic Net'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
pubisher = 'www.ce.cn - China Economic net - Beijing'
|
||||||
|
description = 'China Economic Net Magazine'
|
||||||
|
category = 'Economic News Magazine, Chinese, China'
|
||||||
|
feeds = [
|
||||||
|
(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
|
||||||
|
(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
|
||||||
|
(u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'),
|
||||||
|
(u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'),
|
||||||
|
(u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml')
|
||||||
|
]
|
||||||
|
masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif'
|
||||||
|
extra_css = '''
|
||||||
|
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
||||||
|
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
|
||||||
|
h1 {font-family: 'DroidFont', serif;}\n
|
||||||
|
.articledescription {font-family: 'DroidFont', serif;}
|
||||||
|
'''
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'zh-cn'
|
||||||
|
encoding = 'gb2312'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
|
||||||
|
dict(name='h1', attrs={'id':'articleTitle'}),
|
||||||
|
dict(name='div', attrs={'class':'laiyuan'}),
|
||||||
|
dict(name='div', attrs={'id':'articleText'}),
|
||||||
|
]
|
71
resources/recipes/china_press.recipe
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1277228948(BasicNewsRecipe):
|
||||||
|
title = u'China Press USA'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
__author__ = 'rty'
|
||||||
|
__version__ = '1.0'
|
||||||
|
language = 'zh'
|
||||||
|
pubisher = 'www.chinapressusa.com'
|
||||||
|
description = 'Overseas Chinese Network Newspaper in the USA'
|
||||||
|
category = 'News in Chinese, USA'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
#encoding = 'GB2312'
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url ='http://www.chinapressusa.com/common/images/logo.gif'
|
||||||
|
extra_css = '''
|
||||||
|
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
||||||
|
body {
|
||||||
|
margin-right: 8pt;
|
||||||
|
font-family: 'DroidFont', serif;}
|
||||||
|
h1 {font-family: 'DroidFont', serif, sans-serif}
|
||||||
|
.show {font-family: 'DroidFont', serif, sans-serif}
|
||||||
|
'''
|
||||||
|
feeds = [
|
||||||
|
(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
|
||||||
|
(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
|
||||||
|
(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
|
||||||
|
]
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'show'}),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
# dict(name='table', attrs={'class':'xle'}),
|
||||||
|
dict(name='div', attrs={'class':'time'}),
|
||||||
|
]
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'bank17'}),
|
||||||
|
# dict(name='a', attrs={'class':'ab12'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag, position):
|
||||||
|
pager = soup.find('div',attrs={'id':'displaypagenum'})
|
||||||
|
if pager:
|
||||||
|
nexturl = self.INDEX + pager.a['href']
|
||||||
|
soup2 = self.index_to_soup(nexturl)
|
||||||
|
texttag = soup2.find('div', attrs={'class':'show'})
|
||||||
|
for it in texttag.findAll(style=True):
|
||||||
|
del it['style']
|
||||||
|
newpos = len(texttag.contents)
|
||||||
|
self.append_page(soup2,texttag,newpos)
|
||||||
|
texttag.extract()
|
||||||
|
appendtag.insert(position,texttag)
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
self.append_page(soup, soup.body, 3)
|
||||||
|
pager = soup.find('div',attrs={'id':'displaypagenum'})
|
||||||
|
if pager:
|
||||||
|
pager.extract()
|
||||||
|
return soup
|
@ -1,14 +1,29 @@
|
|||||||
import re
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010 elsuave'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class EandP(BasicNewsRecipe):
|
class EandP(BasicNewsRecipe):
|
||||||
title = u'Editor and Publisher'
|
title = u'Editor and Publisher'
|
||||||
__author__ = u'Xanthan Gum'
|
__author__ = u'elsuave (modified from Xanthan Gum)'
|
||||||
description = 'News about newspapers and journalism.'
|
description = 'News about newspapers and journalism.'
|
||||||
|
publisher = 'Editor and Publisher'
|
||||||
|
category = 'news, journalism, industry'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
max_articles_per_feed = 25
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
oldest_article = 7
|
html2lrf_options = [
|
||||||
max_articles_per_feed = 100
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
# Font formatting code borrowed from kwetal
|
# Font formatting code borrowed from kwetal
|
||||||
|
|
||||||
@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe):
|
|||||||
h2{font-size: large;}
|
h2{font-size: large;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Delete everything before the article
|
# Keep only div:itemmgap
|
||||||
|
|
||||||
remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'itemmgap'})
|
||||||
|
]
|
||||||
|
|
||||||
# Delete everything after the article
|
# Remove commenting/social media lins
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
|
remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
|
||||||
lambda match: '</body>'),]
|
|
||||||
|
|
||||||
|
feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
|
||||||
|
(u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
|
||||||
|
(u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
|
||||||
|
(u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
|
||||||
|
(u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
|
||||||
|
(u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]
|
||||||
|
|
||||||
feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
|
|
||||||
(u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
|
|
||||||
(u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
|
|
||||||
(u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
|
|
||||||
(u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
|
|
||||||
|
86
resources/recipes/elpais_impreso.recipe
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.elpais.com/diario/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElPaisImpresa(BasicNewsRecipe):
|
||||||
|
title = 'El País - edicion impresa'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'el periodico global en Español'
|
||||||
|
publisher = 'EDICIONES EL PAIS, S.L.'
|
||||||
|
category = 'news, politics,Spain,actualidad,noticias,informacion,videos,fotografias,audios,graficos,nacional,internacional,deportes,economia,tecnologia,cultura,gente,television,sociedad,opinion,blogs,foros,chats,encuestas,entrevistas,participacion'
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'latin1'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.elpais.com/im/tit_logo_global.gif'
|
||||||
|
index = 'http://www.elpais.com/diario/'
|
||||||
|
extra_css = ' p{text-align: justify} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Internacional' , index + u'internacional/' )
|
||||||
|
,(u'España' , index + u'espana/' )
|
||||||
|
,(u'Economia' , index + u'economia/' )
|
||||||
|
,(u'Opinion' , index + u'opinion/' )
|
||||||
|
,(u'Viñetas' , index + u'vineta/' )
|
||||||
|
,(u'Sociedad' , index + u'sociedad/' )
|
||||||
|
,(u'Cultura' , index + u'cultura/' )
|
||||||
|
,(u'Tendencias' , index + u'tendencias/' )
|
||||||
|
,(u'Gente' , index + u'gente/' )
|
||||||
|
,(u'Obituarios' , index + u'obituarios/' )
|
||||||
|
,(u'Deportes' , index + u'deportes/' )
|
||||||
|
,(u'Pantallas' , index + u'radioytv/' )
|
||||||
|
,(u'Ultima' , index + u'ultima/' )
|
||||||
|
,(u'Educacion' , index + u'educacion/' )
|
||||||
|
,(u'Saludo' , index + u'salud/' )
|
||||||
|
,(u'Ciberpais' , index + u'ciberpais/' )
|
||||||
|
,(u'EP3' , index + u'ep3/' )
|
||||||
|
,(u'Cine' , index + u'cine/' )
|
||||||
|
,(u'Babelia' , index + u'babelia/' )
|
||||||
|
,(u'El viajero' , index + u'viajero/' )
|
||||||
|
,(u'Negocios' , index + u'negocios/' )
|
||||||
|
,(u'Domingo' , index + u'domingo/' )
|
||||||
|
,(u'El Pais semanal' , index + u'eps/' )
|
||||||
|
,(u'Quadern Catalunya' , index + u'quadern-catalunya/' )
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags=[dict(attrs={'class':['cabecera_noticia','contenido_noticia']})]
|
||||||
|
remove_attributes=['width','height']
|
||||||
|
remove_tags=[dict(name='link')]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
for item in soup.findAll('a',attrs={'class':['g19r003','g19i003','g17r003','g17i003']}):
|
||||||
|
url = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
|
||||||
|
title = self.tag_to_string(item)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?print=1'
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010, elsuave'
|
||||||
'''
|
'''
|
||||||
estadao.com.br
|
estadao.com.br
|
||||||
'''
|
'''
|
||||||
@ -10,12 +10,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Estadao(BasicNewsRecipe):
|
class Estadao(BasicNewsRecipe):
|
||||||
title = 'O Estado de S. Paulo'
|
title = 'O Estado de S. Paulo'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'elsuave (modified from Darko Miletic)'
|
||||||
description = 'News from Brasil in Portuguese'
|
description = 'News from Brasil in Portuguese'
|
||||||
publisher = 'O Estado de S. Paulo'
|
publisher = 'O Estado de S. Paulo'
|
||||||
category = 'news, politics, Brasil'
|
category = 'news, politics, Brasil'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 25
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
@ -30,13 +30,14 @@ class Estadao(BasicNewsRecipe):
|
|||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['bb-md-noticia','c5']})
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['script','object','form','ul'])
|
dict(name=['script','object','form','ul'])
|
||||||
,dict(name='div', attrs={'id':['votacao','estadaohoje']})
|
,dict(name='div', attrs={'class':['fnt2 Color_04 bold','right fnt2 innerTop15 dvTmFont','™_01 right outerLeft15','tituloBox','tags']})
|
||||||
,dict(name='p', attrs={'id':'ctrl_texto'})
|
,dict(name='div', attrs={'id':['bb-md-noticia-subcom']})
|
||||||
,dict(name='p', attrs={'class':'texto'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -51,13 +52,12 @@ class Estadao(BasicNewsRecipe):
|
|||||||
,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
|
,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
ifr = soup.find('iframe')
|
|
||||||
if ifr:
|
|
||||||
ifr.extract()
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
language = 'pt'
|
language = 'pt'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
|
if '/Multimidia/' not in url:
|
||||||
|
return url
|
||||||
|
|
||||||
|
52
resources/recipes/evz.ro.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
evz.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class EVZ_Ro(BasicNewsRecipe):
|
||||||
|
title = 'evz.ro'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Romania'
|
||||||
|
publisher = 'evz.ro'
|
||||||
|
category = 'news, politics, Romania'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'ro'
|
||||||
|
masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
|
||||||
|
extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
|
||||||
|
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['form','embed','iframe','object','base','link','script','noscript'])
|
||||||
|
,dict(attrs={'class':['section','statsInfo','email il']})
|
||||||
|
,dict(attrs={'id' :'gallery'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = dict(attrs={'class':'section'})
|
||||||
|
keep_only_tags = [dict(attrs={'class':'single'})]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
45
resources/recipes/foreign_policy.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.foreignpolicy.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ForeignPolicy(BasicNewsRecipe):
|
||||||
|
title = 'Foreign Policy'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'International News'
|
||||||
|
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
|
||||||
|
category = 'news, politics, USA'
|
||||||
|
oldest_article = 31
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
|
||||||
|
remove_tags = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?print=yes&page=full'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
@ -1,56 +1,95 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
haaretz.com
|
www.haaretz.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre import strftime
|
||||||
|
from time import gmtime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Haaretz_en(BasicNewsRecipe):
|
class HaaretzPrint_en(BasicNewsRecipe):
|
||||||
title = 'Haaretz in English'
|
title = 'Haaretz - print edition'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. '
|
description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
|
||||||
publisher = 'haaretz.com'
|
publisher = 'Haaretz'
|
||||||
category = 'news, politics, Israel'
|
category = "news, Haaretz, Israel news, Israel newspapers, Israel business news, Israel financial news, Israeli news,Israeli newspaper, Israeli newspapers, news from Israel, news in Israel, news Israel, news on Israel, newspaper Israel, Israel sports news, Israel diplomacy news"
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en_IL'
|
language = 'en_IL'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
remove_empty_feeds = True
|
PREFIX = 'http://www.haaretz.com'
|
||||||
masthead_url = 'http://www.haaretz.com/images/logos/logoGrey.gif'
|
masthead_url = PREFIX + '/images/logos/logoGrey.gif'
|
||||||
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher': publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')]
|
keep_only_tags = [dict(attrs={'id':'threecolumns'})]
|
||||||
remove_tags_before = dict(name='h1')
|
remove_attributes = ['width','height']
|
||||||
remove_tags_after = dict(attrs={'id':'innerArticle'})
|
remove_tags = [
|
||||||
keep_only_tags = [dict(attrs={'id':'content'})]
|
dict(name=['iframe','link','object','embed'])
|
||||||
|
,dict(name='div',attrs={'class':'rightcol'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Opinion' , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false' )
|
(u'News' , PREFIX + u'/print-edition/news' )
|
||||||
,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false')
|
,(u'Opinion' , PREFIX + u'/print-edition/opinion' )
|
||||||
,(u'National' , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false' )
|
,(u'Business' , PREFIX + u'/print-edition/business' )
|
||||||
,(u'International' , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false' )
|
,(u'Real estate' , PREFIX + u'/print-edition/real-estate' )
|
||||||
,(u'Jewish World' , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false' )
|
,(u'Sports' , PREFIX + u'/print-edition/sports' )
|
||||||
,(u'Business' , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false' )
|
,(u'Travel' , PREFIX + u'/print-edition/travel' )
|
||||||
,(u'Real Estate' , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false' )
|
,(u'Books' , PREFIX + u'/print-edition/books' )
|
||||||
,(u'Features' , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false' )
|
,(u'Food & Wine' , PREFIX + u'/print-edition/food-wine' )
|
||||||
,(u'Arts and leisure' , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false' )
|
,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' )
|
||||||
,(u'Books' , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false' )
|
,(u'Features' , PREFIX + u'/print-edition/features' )
|
||||||
,(u'Food and Wine' , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false' )
|
|
||||||
,(u'Sports' , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false' )
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
article = url.rpartition('/')[2]
|
||||||
|
return 'http://www.haaretz.com/misc/article-print-page/' + article
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
for item in soup.findAll(attrs={'class':'text'}):
|
||||||
|
sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
|
||||||
|
desc = item.find('p')
|
||||||
|
description = ''
|
||||||
|
if sp:
|
||||||
|
if desc:
|
||||||
|
description = self.tag_to_string(desc)
|
||||||
|
link = sp.a
|
||||||
|
url = self.PREFIX + link['href']
|
||||||
|
title = self.tag_to_string(link)
|
||||||
|
times = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :times
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
|
||||||
import re
|
import time
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class TheHindu(BasicNewsRecipe):
|
class TheHindu(BasicNewsRecipe):
|
||||||
@ -10,45 +10,41 @@ class TheHindu(BasicNewsRecipe):
|
|||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags_before = {'name':'font', 'class':'storyhead'}
|
keep_only_tags = [dict(id='content')]
|
||||||
preprocess_regexps = [
|
remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
|
||||||
(re.compile(r'<!-- story ends -->.*', re.DOTALL),
|
dict(id=['email-section', 'right-column', 'printfooter'])]
|
||||||
lambda match: '</body></html>'),
|
|
||||||
]
|
extra_css = '.photo-caption { font-size: smaller }'
|
||||||
extra_css = '''
|
|
||||||
.storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
|
|
||||||
body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
|
|
||||||
'''
|
|
||||||
feeds = [
|
|
||||||
(u'Main - Front Page', u'http://www.hindu.com/rss/01hdline.xml'),
|
|
||||||
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
|
|
||||||
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
|
|
||||||
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
|
|
||||||
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
|
|
||||||
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
|
|
||||||
(u'Main - Weather / Religion / Crossword / Cartoon',
|
|
||||||
u'http://www.hindu.com/rss/10hdline.xml'),
|
|
||||||
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
|
|
||||||
(u'Supplement - Literary Review',
|
|
||||||
u'http://www.hindu.com/rss/lrhdline.xml'),
|
|
||||||
(u'Supplement - Sunday Magazine',
|
|
||||||
u'http://www.hindu.com/rss/maghdline.xml'),
|
|
||||||
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
|
|
||||||
(u'Supplement - Business Review',
|
|
||||||
u'http://www.hindu.com/rss/bizhdline.xml'),
|
|
||||||
(u'Supplement - Book Review',
|
|
||||||
u'http://www.hindu.com/rss/brhdline.xml'),
|
|
||||||
(u'Supplement - Science & Technology',
|
|
||||||
u'http://www.hindu.com/rss/setahdline.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
for t in soup.findAll(['table', 'tr', 'td','center']):
|
for t in soup.findAll(['table', 'tr', 'td','center']):
|
||||||
t.name = 'div'
|
t.name = 'div'
|
||||||
|
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
today = time.strftime('%Y-%m-%d')
|
||||||
|
soup = self.index_to_soup(
|
||||||
|
'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
|
||||||
|
div = soup.find(id='left-column')
|
||||||
|
feeds = []
|
||||||
|
current_section = None
|
||||||
|
current_articles = []
|
||||||
|
for x in div.findAll(['h3', 'div']):
|
||||||
|
if current_section and x.get('class', '') == 'tpaper':
|
||||||
|
a = x.find('a', href=True)
|
||||||
|
if a is not None:
|
||||||
|
current_articles.append({'url':a['href']+'?css=print',
|
||||||
|
'title':self.tag_to_string(a), 'date': '',
|
||||||
|
'description':''})
|
||||||
|
if x.name == 'h3':
|
||||||
|
if current_section and current_articles:
|
||||||
|
feeds.append((current_section, current_articles))
|
||||||
|
current_section = self.tag_to_string(x)
|
||||||
|
current_articles = []
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
import string, pprint
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class HoustonChronicle(BasicNewsRecipe):
|
class HoustonChronicle(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'The Houston Chronicle'
|
title = u'The Houston Chronicle'
|
||||||
description = 'News from Houston, Texas'
|
description = 'News from Houston, Texas'
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -38,54 +41,23 @@ class HoustonChronicle(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.chron.com/news/')
|
categories = ['news', 'sports', 'business', 'entertainment', 'life',
|
||||||
container = soup.find('table', attrs={'class':'body-columns'})
|
'travel']
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
current_section = 'Top Stories'
|
for cat in categories:
|
||||||
current_articles = []
|
articles = []
|
||||||
|
soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
|
||||||
self.log('\tFound section:', current_section)
|
for elem in soup.findAll(comptype='story', storyid=True):
|
||||||
|
a = elem.find('a', href=True)
|
||||||
for div in container.findAll('div'):
|
if a is None: continue
|
||||||
if div.get('class', None) == 'module-mast':
|
url = a['href']
|
||||||
t = self.tag_to_string(div).replace(u'\xbb', '').strip()
|
if not url.startswith('http://'):
|
||||||
if t and 'interactives' not in t:
|
url = 'http://www.chron.com'+url
|
||||||
if current_section and current_articles:
|
articles.append({'title':self.tag_to_string(a), 'url':url,
|
||||||
feeds.append((current_section, current_articles))
|
'description':'', 'date':''})
|
||||||
current_section = t
|
pprint.pprint(articles[-1])
|
||||||
current_articles = []
|
if articles:
|
||||||
self.log('\tFound section:', current_section)
|
feeds.append((string.capwords(cat), articles))
|
||||||
elif div.get('storyid', False):
|
|
||||||
a = div.find('a', href=True)
|
|
||||||
if a:
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
url = a.get('href')
|
|
||||||
if title and url:
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://www.chron.com'+url
|
|
||||||
self.log('\t\tFound article:', title)
|
|
||||||
self.log('\t\t\t', url)
|
|
||||||
current_articles.append({'title':title, 'url':url,
|
|
||||||
'date':'', 'description':''})
|
|
||||||
elif div.get('class', None) == 'columnbox' and \
|
|
||||||
'special' in current_section.lower():
|
|
||||||
a = div.find('a')
|
|
||||||
if a:
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
url = a.get('href')
|
|
||||||
if title and url:
|
|
||||||
if not url.startswith('/'): continue
|
|
||||||
url = 'http://www.chron.com'+url
|
|
||||||
self.log('\t\tFound article:', title)
|
|
||||||
self.log('\t\t\t', url)
|
|
||||||
a.extract()
|
|
||||||
desc = self.tag_to_string(div)
|
|
||||||
current_articles.append({'title':title, 'url':url,
|
|
||||||
'date':'', 'description':desc})
|
|
||||||
|
|
||||||
if current_section and current_articles:
|
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
50
resources/recipes/ifzm.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1277305250(BasicNewsRecipe):
|
||||||
|
title = u'infzm - China Southern Weekly'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'),
|
||||||
|
(u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', u'http://www.infzm.com/rss/economic.xml'),
|
||||||
|
(u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', u'http://www.infzm.com/rss/culture.xml'),
|
||||||
|
(u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', u'http://www.infzm.com/rss/lifestyle.xml'),
|
||||||
|
(u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', u'http://www.infzm.com/rss/opinion.xml')
|
||||||
|
]
|
||||||
|
__author__ = 'rty'
|
||||||
|
__version__ = '1.0'
|
||||||
|
language = 'zh'
|
||||||
|
pubisher = 'http://www.infzm.com'
|
||||||
|
description = 'Chinese Weekly Tabloid'
|
||||||
|
category = 'News, China'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
#encoding = 'GB2312'
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
||||||
|
body {
|
||||||
|
margin-right: 8pt;
|
||||||
|
font-family: 'DroidFont', serif;}
|
||||||
|
.detailContent {font-family: 'DroidFont', serif, sans-serif}
|
||||||
|
'''
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'detailContent'}),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['detailTools', 'detailSideL', 'pageNum']}),
|
||||||
|
]
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'id':'pageNum'}),
|
||||||
|
]
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(color=True):
|
||||||
|
del item['font']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
lrb.co.uk
|
lrb.co.uk
|
||||||
'''
|
'''
|
||||||
@ -8,32 +8,38 @@ lrb.co.uk
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class LondonReviewOfBooks(BasicNewsRecipe):
|
class LondonReviewOfBooks(BasicNewsRecipe):
|
||||||
title = u'London Review of Books'
|
title = 'London Review of Books (free)'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
|
description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
|
||||||
category = 'news, literature, England'
|
category = 'news, literature, UK'
|
||||||
publisher = 'London Review of Books'
|
publisher = 'LRB ltd.'
|
||||||
oldest_article = 7
|
oldest_article = 15
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
masthead_url = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
|
||||||
|
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : language
|
,'language' : language
|
||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div' , attrs={'id' :'main'})]
|
keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
|
||||||
remove_tags = [
|
remove_attributes = ['width','height']
|
||||||
dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
|
|
||||||
,dict(name='div' , attrs={'id' :['mainmenu','precontent','otherarticles'] })
|
|
||||||
,dict(name='span', attrs={'class':['inlineright','article-icons']})
|
|
||||||
,dict(name='ul' , attrs={'class':'article-controls'})
|
|
||||||
,dict(name='p' , attrs={'class':'meta-info' })
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
|
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover_url = None
|
||||||
|
soup = self.index_to_soup('http://www.lrb.co.uk/')
|
||||||
|
cover_item = soup.find('p',attrs={'class':'cover'})
|
||||||
|
if cover_item:
|
||||||
|
cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
75
resources/recipes/lrb_payed.recipe
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
lrb.co.uk
|
||||||
|
'''
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class LondonReviewOfBooksPayed(BasicNewsRecipe):
|
||||||
|
title = 'London Review of Books'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
|
||||||
|
category = 'news, literature, UK'
|
||||||
|
publisher = 'LRB Ltd.'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'en_GB'
|
||||||
|
no_stylesheets = True
|
||||||
|
delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
INDEX = 'http://www.lrb.co.uk'
|
||||||
|
LOGIN = INDEX + '/login'
|
||||||
|
masthead_url = INDEX + '/assets/images/lrb_logo_big.gif'
|
||||||
|
needs_subscription = True
|
||||||
|
publication_type = 'magazine'
|
||||||
|
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
|
||||||
|
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open(self.LOGIN)
|
||||||
|
br.select_form(nr=1)
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
cover_item = soup.find('p',attrs={'class':'cover'})
|
||||||
|
lrbtitle = self.title
|
||||||
|
if cover_item:
|
||||||
|
self.cover_url = self.INDEX + cover_item.a.img['src']
|
||||||
|
content = self.INDEX + cover_item.a['href']
|
||||||
|
soup2 = self.index_to_soup(content)
|
||||||
|
sitem = soup2.find(attrs={'class':'article-list'})
|
||||||
|
lrbtitle = soup2.head.title.string
|
||||||
|
for item in sitem.findAll('a',attrs={'class':'title'}):
|
||||||
|
description = u''
|
||||||
|
title_prefix = u''
|
||||||
|
feed_link = item
|
||||||
|
if feed_link.has_key('href'):
|
||||||
|
url = self.INDEX + feed_link['href']
|
||||||
|
title = title_prefix + self.tag_to_string(feed_link)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
return [(lrbtitle, articles)]
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
|
||||||
|
remove_attributes = ['width','height']
|
@ -7,18 +7,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
description = 'Canadian national newspaper'
|
description = 'Canadian national newspaper'
|
||||||
timefmt = ' [%d %b, %Y]'
|
timefmt = ' [%d %b, %Y]'
|
||||||
needs_subscription = False
|
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
|
needs_subscription = False
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||||
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'})
|
remove_tags_after = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='iframe'),
|
dict(name='iframe'),
|
||||||
dict(name='div', attrs={'class':'story-tools'}),
|
dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
|
||||||
#dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
|
#dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
|
||||||
#dict(name='form', attrs={'onsubmit':''}),
|
#dict(name='form', attrs={'onsubmit':''}),
|
||||||
#dict(name='table', attrs={'cellspacing':'0'}),
|
dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
# def preprocess_html(self, soup):
|
# def preprocess_html(self, soup):
|
||||||
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.nejm_get_index()
|
soup = self.nejm_get_index()
|
||||||
|
|
||||||
div = soup.find(id='LegoText4')
|
div = soup.find(id='npContentMain')
|
||||||
|
|
||||||
current_section = None
|
current_section = None
|
||||||
current_articles = []
|
current_articles = []
|
||||||
@ -50,7 +50,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
current_section = self.tag_to_string(x)
|
current_section = self.tag_to_string(x)
|
||||||
current_articles = []
|
current_articles = []
|
||||||
self.log('\tFound section:', current_section)
|
self.log('\tFound section:', current_section)
|
||||||
if current_section is not None and x.name == 'h3':
|
if current_section is not None and x.name == 'h5':
|
||||||
# Article found
|
# Article found
|
||||||
title = self.tag_to_string(x)
|
title = self.tag_to_string(x)
|
||||||
a = x.find('a', href=lambda x: x and 'story' in x)
|
a = x.find('a', href=lambda x: x and 'story' in x)
|
||||||
@ -59,8 +59,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
if not url or not title:
|
if not url or not title:
|
||||||
continue
|
continue
|
||||||
if url.startswith('story'):
|
#if url.startswith('story'):
|
||||||
url = 'http://www.nationalpost.com/todays-paper/'+url
|
url = 'http://www.nationalpost.com/todays-paper/'+url
|
||||||
self.log('\t\tFound article:', title)
|
self.log('\t\tFound article:', title)
|
||||||
self.log('\t\t\t', url)
|
self.log('\t\t\t', url)
|
||||||
current_articles.append({'title': title, 'url':url,
|
current_articles.append({'title': title, 'url':url,
|
||||||
@ -70,28 +70,11 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
story = soup.find(name='div', attrs={'class':'triline'})
|
story = soup.find(name='div', attrs={'id':'npContentMain'})
|
||||||
page2_link = soup.find('p','pagenav')
|
##td = heading.findParent(name='td')
|
||||||
if page2_link:
|
##td.extract()
|
||||||
atag = page2_link.find('a',href=True)
|
|
||||||
if atag:
|
|
||||||
page2_url = atag['href']
|
|
||||||
if page2_url.startswith('story'):
|
|
||||||
page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
|
|
||||||
elif page2_url.startswith( '/todays-paper/story.html'):
|
|
||||||
page2_url = 'http://www.nationalpost.com/'+page2_url
|
|
||||||
page2_soup = self.index_to_soup(page2_url)
|
|
||||||
if page2_soup:
|
|
||||||
page2_content = page2_soup.find('div','story-content')
|
|
||||||
if page2_content:
|
|
||||||
full_story = BeautifulSoup('<div></div>')
|
|
||||||
full_story.insert(0,story)
|
|
||||||
full_story.insert(1,page2_content)
|
|
||||||
story = full_story
|
|
||||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
body = soup.find(name='body')
|
body = soup.find(name='body')
|
||||||
body.insert(0, story)
|
body.insert(0, story)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -32,15 +32,16 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
|
preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
||||||
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools']})
|
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial']})
|
||||||
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
||||||
,dict(name='meta' , attrs={'name' :'description' })
|
,dict(name='meta' , attrs={'name' :'description' })
|
||||||
|
,dict(name='a' , attrs={'rel' :'tag' })
|
||||||
]
|
]
|
||||||
remove_tags_after = dict(attrs={'class':'nbpcopy'})
|
remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
|
||||||
remove_attributes = ['height','width']
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
@ -17,7 +17,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
title = 'New York Times Top Stories'
|
title = 'New York Times Top Stories'
|
||||||
__author__ = 'GRiker'
|
__author__ = 'GRiker'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
requires_version = (0, 7, 3)
|
requires_version = (0, 7, 5)
|
||||||
description = 'Top Stories from the New York Times'
|
description = 'Top Stories from the New York Times'
|
||||||
|
|
||||||
# List of sections typically included in Top Stories. Use a keyword from the
|
# List of sections typically included in Top Stories. Use a keyword from the
|
||||||
@ -79,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'doubleRule',
|
'doubleRule',
|
||||||
'dottedLine',
|
'dottedLine',
|
||||||
'entry-meta',
|
'entry-meta',
|
||||||
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
@ -88,6 +89,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
|
'subNavigation clearfix',
|
||||||
'subNavigation tabContent active',
|
'subNavigation tabContent active',
|
||||||
'subNavigation tabContent active clearfix',
|
'subNavigation tabContent active clearfix',
|
||||||
]}),
|
]}),
|
||||||
@ -110,6 +112,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
@ -458,8 +461,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if mp_off >= 0:
|
if mp_off >= 0:
|
||||||
c = c[:mp_off]
|
c = c[:mp_off]
|
||||||
emTag.insert(0, c)
|
emTag.insert(0, c)
|
||||||
hrTag = Tag(soup, 'hr')
|
#hrTag = Tag(soup, 'hr')
|
||||||
#hrTag['style'] = "margin-top:0em;margin-bottom:0em"
|
#hrTag['class'] = 'caption_divider'
|
||||||
|
hrTag = Tag(soup, 'div')
|
||||||
|
hrTag['class'] = 'divider'
|
||||||
emTag.insert(1, hrTag)
|
emTag.insert(1, hrTag)
|
||||||
caption.replaceWith(emTag)
|
caption.replaceWith(emTag)
|
||||||
|
|
||||||
|
@ -13,14 +13,14 @@ Story
|
|||||||
import re, string, time
|
import re, string, time
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The New York Times'
|
title = 'The New York Times'
|
||||||
__author__ = 'GRiker'
|
__author__ = 'GRiker'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
requires_version = (0, 7, 3)
|
requires_version = (0, 7, 5)
|
||||||
|
|
||||||
description = 'Daily news from the New York Times (subscription version)'
|
description = 'Daily news from the New York Times (subscription version)'
|
||||||
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
|
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
|
||||||
@ -66,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'doubleRule',
|
'doubleRule',
|
||||||
'dottedLine',
|
'dottedLine',
|
||||||
'entry-meta',
|
'entry-meta',
|
||||||
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
@ -75,6 +76,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
|
'subNavigation clearfix',
|
||||||
'subNavigation tabContent active',
|
'subNavigation tabContent active',
|
||||||
'subNavigation tabContent active clearfix',
|
'subNavigation tabContent active clearfix',
|
||||||
]}),
|
]}),
|
||||||
@ -97,6 +99,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
@ -333,7 +336,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
self.log(">>> No class:'columnGroup first' found <<<")
|
self.log(">>> No class:'columnGroup first' found <<<")
|
||||||
# Change class="kicker" to <h3>
|
# Change class="kicker" to <h3>
|
||||||
kicker = soup.find(True, {'class':'kicker'})
|
kicker = soup.find(True, {'class':'kicker'})
|
||||||
if kicker and kicker.contents[0]:
|
if kicker and kicker.contents and kicker.contents[0]:
|
||||||
h3Tag = Tag(soup, "h3")
|
h3Tag = Tag(soup, "h3")
|
||||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
||||||
use_alt=False)))
|
use_alt=False)))
|
||||||
@ -348,8 +351,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if mp_off >= 0:
|
if mp_off >= 0:
|
||||||
c = c[:mp_off]
|
c = c[:mp_off]
|
||||||
emTag.insert(0, c)
|
emTag.insert(0, c)
|
||||||
hrTag = Tag(soup, 'hr')
|
#hrTag = Tag(soup, 'hr')
|
||||||
#hrTag['style'] = "margin-top:0em;margin-bottom:0em"
|
#hrTag['class'] = 'caption_divider'
|
||||||
|
hrTag = Tag(soup, 'div')
|
||||||
|
hrTag['class'] = 'divider'
|
||||||
emTag.insert(1, hrTag)
|
emTag.insert(1, hrTag)
|
||||||
caption.replaceWith(emTag)
|
caption.replaceWith(emTag)
|
||||||
|
|
||||||
@ -417,12 +422,11 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log) :
|
def populate_article_metadata(self,article,soup,first):
|
||||||
print "\npostprocess_book()\n"
|
'''
|
||||||
|
Extract author and description from article, add to article metadata
|
||||||
def extract_byline(href) :
|
'''
|
||||||
# <meta name="byline" content=
|
def extract_author(soup):
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
||||||
if byline :
|
if byline :
|
||||||
author = byline['content']
|
author = byline['content']
|
||||||
@ -432,50 +436,34 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if byline:
|
if byline:
|
||||||
author = byline.renderContents()
|
author = byline.renderContents()
|
||||||
else:
|
else:
|
||||||
print "couldn't find byline in %s" % href
|
|
||||||
print soup.prettify()
|
print soup.prettify()
|
||||||
return None
|
return None
|
||||||
# Kill commas - Kindle switches to '&'
|
return author
|
||||||
return re.sub(',','',author)
|
|
||||||
|
|
||||||
def extract_description(href) :
|
def extract_description(soup):
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
description = soup.find('meta',attrs={'name':['description','description ']})
|
description = soup.find('meta',attrs={'name':['description','description ']})
|
||||||
if description :
|
if description :
|
||||||
# print repr(description['content'])
|
|
||||||
# print self.massageNCXText(description['content'])
|
|
||||||
return self.massageNCXText(description['content'])
|
return self.massageNCXText(description['content'])
|
||||||
else:
|
else:
|
||||||
# Take first paragraph of article
|
# Take first paragraph of article
|
||||||
articleBody = soup.find('div',attrs={'id':'articleBody'})
|
articlebody = soup.find('div',attrs={'id':'articlebody'})
|
||||||
if not articleBody:
|
if not articlebody:
|
||||||
# Try again with class instead of id
|
# Try again with class instead of id
|
||||||
articleBody = soup.find('div',attrs={'class':'articleBody'})
|
articlebody = soup.find('div',attrs={'class':'articlebody'})
|
||||||
if not articleBody:
|
if not articlebody:
|
||||||
print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:'
|
print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
|
||||||
print soup.prettify()
|
print soup.prettify()
|
||||||
return None
|
return None
|
||||||
paras = articleBody.findAll('p')
|
paras = articlebody.findAll('p')
|
||||||
for p in paras:
|
for p in paras:
|
||||||
if p.renderContents() > '' :
|
if p.renderContents() > '' :
|
||||||
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Method entry point here
|
if not article.author:
|
||||||
# Single section toc looks different than multi-section tocs
|
article.author = extract_author(soup)
|
||||||
if oeb.toc.depth() == 2 :
|
if not article.summary:
|
||||||
for article in oeb.toc :
|
article.summary = article.text_summary = extract_description(soup)
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href).decode('utf-8')
|
|
||||||
elif oeb.toc.depth() == 3 :
|
|
||||||
for section in oeb.toc :
|
|
||||||
for article in section :
|
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href)
|
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
def strip_anchors(self,soup):
|
||||||
paras = soup.findAll(True)
|
paras = soup.findAll(True)
|
||||||
|
@ -28,7 +28,7 @@ class OldNewThing(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width','height']
|
||||||
keep_only_tags = [dict(attrs={'class':['postsub','comment']})]
|
keep_only_tags = [dict(attrs={'class':'full-post'})]
|
||||||
|
remove_tags = [dict(attrs={'class':['post-attributes','post-tags','post-actions']})]
|
||||||
feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]
|
feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]
|
||||||
|
|
||||||
|
79
resources/recipes/singtao_daily.recipe
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||||
|
title = u'Singtao Daily - Canada'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'rty'
|
||||||
|
description = 'Toronto Canada Chinese Newspaper'
|
||||||
|
publisher = 'news.singtao.ca'
|
||||||
|
category = 'Chinese, News, Canada'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'zh'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg'
|
||||||
|
extra_css = '''
|
||||||
|
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\
|
||||||
|
|
||||||
|
body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\
|
||||||
|
|
||||||
|
h1 {font-family: 'DroidFont', serif;}\
|
||||||
|
|
||||||
|
.articledescription {font-family: 'DroidFont', serif;}
|
||||||
|
'''
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':['title','storybody']}),
|
||||||
|
dict(name='div', attrs={'class':'content'})
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
for title, url in [
|
||||||
|
('Editorial',
|
||||||
|
'http://news.singtao.ca/toronto/editorial.html'),
|
||||||
|
('Toronto \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'),
|
||||||
|
'http://news.singtao.ca/toronto/city.html'),
|
||||||
|
('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'),
|
||||||
|
'http://news.singtao.ca/toronto/canada.html'),
|
||||||
|
('Entertainment',
|
||||||
|
'http://news.singtao.ca/toronto/entertainment.html'),
|
||||||
|
('World',
|
||||||
|
'http://news.singtao.ca/toronto/world.html'),
|
||||||
|
('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'),
|
||||||
|
'http://news.singtao.ca/toronto/finance.html'),
|
||||||
|
('Sports', 'http://news.singtao.ca/toronto/sports.html'),
|
||||||
|
]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def parse_section(self, url):
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
div = soup.find(attrs={'class': ['newslist paddingL10T10','newslist3 paddingL10T10']})
|
||||||
|
#date = div.find(attrs={'class': 'underlineBLK'})
|
||||||
|
current_articles = []
|
||||||
|
for li in div.findAll('li'):
|
||||||
|
a = li.find('a', href = True)
|
||||||
|
if a is None:
|
||||||
|
continue
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
if not url or not title:
|
||||||
|
continue
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://news.singtao.ca'+url
|
||||||
|
# self.log('\ \ Found article:', title)
|
||||||
|
# self.log('\ \ \ ', url)
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
|
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(width=True):
|
||||||
|
del item['width']
|
||||||
|
return soup
|
35
resources/recipes/statesman.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1278049615(BasicNewsRecipe):
|
||||||
|
title = u'Statesman'
|
||||||
|
pubisher = 'http://www.statesman.com/'
|
||||||
|
description = 'Austin Texas Daily Newspaper'
|
||||||
|
category = 'News, Austin, Texas'
|
||||||
|
__author__ = 'rty'
|
||||||
|
oldest_article = 3
|
||||||
|
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
|
||||||
|
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
|
||||||
|
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
|
||||||
|
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
|
||||||
|
(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
|
||||||
|
]
|
||||||
|
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
|
||||||
|
#temp_files = []
|
||||||
|
#articles_are_obfuscated = True
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':'cxArticleOptions'}),
|
||||||
|
]
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'cxArticleHeader'}),
|
||||||
|
dict(name='div', attrs={'id':'cxArticleBodyText'}),
|
||||||
|
]
|
@ -1,21 +1,16 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
|
|
||||||
class TimesOfIndia(BasicNewsRecipe):
|
class TimesOfIndia(BasicNewsRecipe):
|
||||||
title = u'Times of India'
|
title = u'Times of India'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
oldest_article = 1 #days
|
oldest_article = 1 #days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
remove_stylesheets = True
|
no_stylesheets = True
|
||||||
|
keep_only_tags = [dict(attrs={'class':'prttabl'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='iframe'),
|
dict(style=lambda x: x and 'float' in x)
|
||||||
dict(name='td', attrs={'class':'newptool1'}),
|
|
||||||
dict(name='div', attrs={'id':'newptool'}),
|
|
||||||
dict(name='ul', attrs={'class':'newtabcontent_tabs_new'}),
|
|
||||||
dict(name='b', text='Topics'),
|
|
||||||
dict(name='span', text=':'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -42,13 +37,8 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
('Most Read',
|
('Most Read',
|
||||||
'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
|
'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
|
||||||
]
|
]
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?prtpage=1'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
heading = soup.find(name='h1', attrs={'class':'heading'})
|
|
||||||
td = heading.findParent(name='td')
|
|
||||||
td.extract()
|
|
||||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
|
||||||
body = soup.find(name='body')
|
|
||||||
body.insert(0, td)
|
|
||||||
td.name = 'div'
|
|
||||||
return soup
|
return soup
|
||||||
|
35
resources/recipes/winnipeg_sun.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1277647803(BasicNewsRecipe):
|
||||||
|
title = u'Winnipeg Sun'
|
||||||
|
__author__ = 'rty'
|
||||||
|
__version__ = '1.0'
|
||||||
|
oldest_article = 2
|
||||||
|
pubisher = 'www.winnipegsun.com'
|
||||||
|
description = 'Winnipeg Newspaper'
|
||||||
|
category = 'News, Winnipeg, Canada'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_CA'
|
||||||
|
feeds = [
|
||||||
|
(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
|
||||||
|
(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
|
||||||
|
(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
|
||||||
|
(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
|
||||||
|
(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
|
||||||
|
(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
|
||||||
|
]
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'article'}),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
|
||||||
|
dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
|
||||||
|
dict(name='div', attrs={'id':'commentsBottom'}),
|
||||||
|
]
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'bottomBox clear'})
|
||||||
|
]
|
@ -15,22 +15,22 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
recursions = 1
|
recursions = 1
|
||||||
language = 'zh'
|
language = 'zh'
|
||||||
|
|
||||||
encoding = 'gbk'
|
encoding = 'gbk'
|
||||||
# multithreaded_fetch = True
|
# multithreaded_fetch = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='table', attrs={'cellpadding':'9'}),
|
dict(name='td', attrs={'class':'text'}),
|
||||||
dict(name='table', attrs={'class':'cont'}),
|
|
||||||
dict(name='div', attrs={'id':'content'}),
|
|
||||||
dict(name='span', attrs={'class':'page'}),
|
dict(name='span', attrs={'class':'page'}),
|
||||||
|
dict(name='div', attrs={'id':'content'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='table', attrs={'cellspacing':'9'}),
|
dict(name='table', attrs={'cellspacing':'9'}),
|
||||||
|
dict(name='fieldset'),
|
||||||
|
dict(name='div', attrs={'width':'30%'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '\
|
extra_css = '\n\
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\
|
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\
|
||||||
body{font-family: serif1, serif}\n\
|
body{font-family: serif1, serif}\n\
|
||||||
.article_description{font-family: serif1, serif}\n\
|
.article_description{font-family: serif1, serif}\n\
|
||||||
@ -41,7 +41,10 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
.article {font-size:medium}\n\
|
.article {font-size:medium}\n\
|
||||||
.navbar {font-size: small}\n\
|
.navbar {font-size: small}\n\
|
||||||
.feed{font-size: medium}\n\
|
.feed{font-size: medium}\n\
|
||||||
.small{font-size: small; padding-right: 8%}\n'
|
.small{font-size: small;padding-right: 8pt}\n\
|
||||||
|
.text{padding-right: 8pt}\n\
|
||||||
|
p{text-indent: 0cm}\n\
|
||||||
|
div#content{padding-right: 10pt}'
|
||||||
|
|
||||||
INDEXES = [
|
INDEXES = [
|
||||||
(u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml')
|
(u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml')
|
||||||
@ -51,27 +54,35 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
DESC_SENSE = u'\u8054\u5408\u65e9\u62a5\u7f51'
|
DESC_SENSE = u'\u8054\u5408\u65e9\u62a5\u7f51'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'),
|
(u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'),
|
||||||
(u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'),
|
(u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'),
|
||||||
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'),
|
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'),
|
||||||
(u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'),
|
(u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'),
|
||||||
(u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'),
|
(u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'),
|
||||||
(u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'),
|
(u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'),
|
||||||
(u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'),
|
(u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'),
|
||||||
(u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'),
|
(u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'),
|
||||||
(u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'),
|
(u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'),
|
||||||
(u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'),
|
(u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'),
|
||||||
(u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'),
|
(u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'),
|
||||||
(u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
|
(u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for tag in soup.findAll(name='a'):
|
||||||
|
if tag.has_key('href'):
|
||||||
|
tag_url = tag['href']
|
||||||
|
if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
|
||||||
|
del tag['href']
|
||||||
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||||
tag.name = 'div'
|
tag.name = 'div'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_feeds(self):
|
def parse_feeds(self):
|
||||||
self.log.debug('ZAOBAO overrided parse_feeds()')
|
self.log_debug(_('ZAOBAO overrided parse_feeds()'))
|
||||||
parsed_feeds = BasicNewsRecipe.parse_feeds(self)
|
parsed_feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
for id, obj in enumerate(self.INDEXES):
|
for id, obj in enumerate(self.INDEXES):
|
||||||
@ -88,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
a_title = self.tag_to_string(a)
|
a_title = self.tag_to_string(a)
|
||||||
date = ''
|
date = ''
|
||||||
description = ''
|
description = ''
|
||||||
self.log.debug('adding %s at %s'%(a_title,a_url))
|
self.log_debug(_('adding %s at %s')%(a_title,a_url))
|
||||||
articles.append({
|
articles.append({
|
||||||
'title':a_title,
|
'title':a_title,
|
||||||
'date':date,
|
'date':date,
|
||||||
@ -97,26 +108,25 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
})
|
})
|
||||||
|
|
||||||
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
||||||
max_articles_per_feed=self.max_articles_per_feed,
|
max_articles_per_feed=self.max_articles_per_feed)
|
||||||
log=self.log)
|
|
||||||
|
|
||||||
self.log.debug('adding %s to feed'%(title))
|
self.log_debug(_('adding %s to feed')%(title))
|
||||||
for feed in pfeeds:
|
for feed in pfeeds:
|
||||||
self.log.debug('adding feed: %s'%(feed.title))
|
self.log_debug(_('adding feed: %s')%(feed.title))
|
||||||
feed.description = self.DESC_SENSE
|
feed.description = self.DESC_SENSE
|
||||||
parsed_feeds.append(feed)
|
parsed_feeds.append(feed)
|
||||||
for a, article in enumerate(feed):
|
for a, article in enumerate(feed):
|
||||||
self.log.debug('added article %s from %s'%(article.title, article.url))
|
self.log_debug(_('added article %s from %s')%(article.title, article.url))
|
||||||
self.log.debug('added feed %s'%(feed.title))
|
self.log_debug(_('added feed %s')%(feed.title))
|
||||||
|
|
||||||
for i, feed in enumerate(parsed_feeds):
|
for i, feed in enumerate(parsed_feeds):
|
||||||
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
|
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
|
||||||
weired_encoding_detected = False
|
weired_encoding_detected = False
|
||||||
if not isinstance(feed.description, unicode) and self.encoding and feed.description:
|
if not isinstance(feed.description, unicode) and self.encoding and feed.description:
|
||||||
self.log.debug('Feed %s is not encoded correctly, manually replace it'%(feed.title))
|
self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
|
||||||
feed.description = feed.description.decode(self.encoding, 'replace')
|
feed.description = feed.description.decode(self.encoding, 'replace')
|
||||||
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
|
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
|
||||||
self.log.debug('Feed %s is strangely encoded, manually redo all'%(feed.title))
|
self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
|
||||||
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
||||||
weired_encoding_detected = True
|
weired_encoding_detected = True
|
||||||
|
|
||||||
@ -138,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
||||||
|
|
||||||
if article.title == "Untitled article":
|
if article.title == "Untitled article":
|
||||||
self.log.debug('Removing empty article %s from %s'%(article.title, article.url))
|
self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
|
||||||
# remove the article
|
# remove the article
|
||||||
feed.articles[a:a+1] = []
|
feed.articles[a:a+1] = []
|
||||||
return parsed_feeds
|
return parsed_feeds
|
||||||
|
@ -406,3 +406,8 @@ img, object, svg|svg {
|
|||||||
width: auto;
|
width: auto;
|
||||||
height: auto;
|
height: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* These are needed because ADE renders anchors the same as links */
|
||||||
|
|
||||||
|
a { text-decoration: inherit; color: inherit; cursor: inherit }
|
||||||
|
a[href] { text-decoration: underline; color: blue; cursor: pointer }
|
||||||
|
@ -40,19 +40,20 @@ class LinuxFreeze(Command):
|
|||||||
'/usr/bin/pdftohtml',
|
'/usr/bin/pdftohtml',
|
||||||
'/usr/lib/libwmflite-0.2.so.7',
|
'/usr/lib/libwmflite-0.2.so.7',
|
||||||
'/usr/lib/liblcms.so.1',
|
'/usr/lib/liblcms.so.1',
|
||||||
|
'/usr/lib/liblcms2.so.2',
|
||||||
|
'/usr/lib/libstlport.so.5.1',
|
||||||
'/tmp/calibre-mount-helper',
|
'/tmp/calibre-mount-helper',
|
||||||
'/usr/lib/libunrar.so',
|
'/usr/lib/libunrar.so',
|
||||||
'/usr/lib/libchm.so.0',
|
'/usr/lib/libchm.so.0',
|
||||||
'/usr/lib/libsqlite3.so.0',
|
'/usr/lib/libsqlite3.so.0',
|
||||||
'/usr/lib/libsqlite3.so.0',
|
'/usr/lib/libsqlite3.so.0',
|
||||||
'/usr/lib/libmng.so.1',
|
'/usr/lib/libmng.so.1',
|
||||||
'/usr/lib/libpodofo.so.0.6.99',
|
'/usr/lib/libpodofo.so.0.8.1',
|
||||||
'/lib/libz.so.1',
|
'/lib/libz.so.1',
|
||||||
'/lib/libuuid.so.1',
|
'/lib/libuuid.so.1',
|
||||||
'/usr/lib/libtiff.so.3',
|
'/usr/lib/libtiff.so.5',
|
||||||
'/lib/libbz2.so.1',
|
'/lib/libbz2.so.1',
|
||||||
'/usr/lib/libpoppler.so.5',
|
'/usr/lib/libpoppler.so.6',
|
||||||
'/usr/lib/libpoppler-qt4.so.3',
|
|
||||||
'/usr/lib/libxml2.so.2',
|
'/usr/lib/libxml2.so.2',
|
||||||
'/usr/lib/libopenjpeg.so.2',
|
'/usr/lib/libopenjpeg.so.2',
|
||||||
'/usr/lib/libxslt.so.1',
|
'/usr/lib/libxslt.so.1',
|
||||||
@ -61,10 +62,10 @@ class LinuxFreeze(Command):
|
|||||||
'/usr/lib/libgthread-2.0.so.0',
|
'/usr/lib/libgthread-2.0.so.0',
|
||||||
stdcpp,
|
stdcpp,
|
||||||
ffi,
|
ffi,
|
||||||
'/usr/lib/libpng12.so.0',
|
'/usr/lib/libpng14.so.14',
|
||||||
'/usr/lib/libexslt.so.0',
|
'/usr/lib/libexslt.so.0',
|
||||||
'/usr/lib/libMagickWand.so.2',
|
'/usr/lib/libMagickWand.so.3',
|
||||||
'/usr/lib/libMagickCore.so.2',
|
'/usr/lib/libMagickCore.so.3',
|
||||||
'/usr/lib/libgcrypt.so.11',
|
'/usr/lib/libgcrypt.so.11',
|
||||||
'/usr/lib/libgpg-error.so.0',
|
'/usr/lib/libgpg-error.so.0',
|
||||||
'/usr/lib/libphonon.so.4',
|
'/usr/lib/libphonon.so.4',
|
||||||
|
@ -265,6 +265,9 @@ class Py2App(object):
|
|||||||
@flush
|
@flush
|
||||||
def get_local_dependencies(self, path_to_lib):
|
def get_local_dependencies(self, path_to_lib):
|
||||||
for x in self.get_dependencies(path_to_lib):
|
for x in self.get_dependencies(path_to_lib):
|
||||||
|
if x.startswith('libpodofo'):
|
||||||
|
yield x, x
|
||||||
|
continue
|
||||||
for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
|
for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
|
||||||
'/opt/local/lib/',
|
'/opt/local/lib/',
|
||||||
'/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
|
'/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
|
||||||
@ -397,7 +400,7 @@ class Py2App(object):
|
|||||||
@flush
|
@flush
|
||||||
def add_podofo(self):
|
def add_podofo(self):
|
||||||
info('\nAdding PoDoFo')
|
info('\nAdding PoDoFo')
|
||||||
pdf = join(SW, 'lib', 'libpodofo.0.6.99.dylib')
|
pdf = join(SW, 'lib', 'libpodofo.0.8.1.dylib')
|
||||||
self.install_dylib(pdf)
|
self.install_dylib(pdf)
|
||||||
|
|
||||||
@flush
|
@flush
|
||||||
|
@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
|
|||||||
from setup.build_environment import msvc, MT, RC
|
from setup.build_environment import msvc, MT, RC
|
||||||
from setup.installer.windows.wix import WixMixIn
|
from setup.installer.windows.wix import WixMixIn
|
||||||
|
|
||||||
QT_DIR = 'C:\\Qt\\4.6.0'
|
QT_DIR = 'C:\\Qt\\4.6.3'
|
||||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||||
LIBUSB_DIR = 'C:\\libusb'
|
LIBUSB_DIR = 'C:\\libusb'
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
|
@ -162,9 +162,50 @@ SET(WANT_LIB64 FALSE)
|
|||||||
SET(PODOFO_BUILD_SHARED TRUE)
|
SET(PODOFO_BUILD_SHARED TRUE)
|
||||||
SET(PODOFO_BUILD_STATIC FALSE)
|
SET(PODOFO_BUILD_STATIC FALSE)
|
||||||
|
|
||||||
cp build/podofo-0.7.0/build/src/Release/podofo.dll bin/
|
cp build/podofo/build/src/Release/podofo.dll bin/
|
||||||
cp build/podofo-0.7.0/build/src/Release/podofo.lib lib/
|
cp build/podofo/build/src/Release/podofo.lib lib/
|
||||||
cp build/podofo-0.7.0/build/src/Release/podofo.exp lib/
|
cp build/podofo/build/src/Release/podofo.exp lib/
|
||||||
|
|
||||||
|
cp build/podofo/build/podofo_config.h include/podofo/
|
||||||
|
cp -r build/podofo/src/* include/podofo/
|
||||||
|
|
||||||
|
The following patch was required to get it to compile:
|
||||||
|
|
||||||
|
Index: src/PdfImage.cpp
|
||||||
|
===================================================================
|
||||||
|
--- src/PdfImage.cpp (revision 1261)
|
||||||
|
+++ src/PdfImage.cpp (working copy)
|
||||||
|
@@ -627,7 +627,7 @@
|
||||||
|
|
||||||
|
long lLen = static_cast<long>(pInfo->rowbytes * height);
|
||||||
|
char* pBuffer = static_cast<char*>(malloc(sizeof(char) * lLen));
|
||||||
|
- png_bytep pRows[height];
|
||||||
|
+ png_bytepp pRows = static_cast<png_bytepp>(malloc(sizeof(png_bytep)*height));
|
||||||
|
for(int y=0; y<height; y++)
|
||||||
|
{
|
||||||
|
pRows[y] = reinterpret_cast<png_bytep>(pBuffer + (y * pInfo->rowbytes));
|
||||||
|
@@ -672,6 +672,7 @@
|
||||||
|
this->SetImageData( width, height, pInfo->bit_depth, &stream );
|
||||||
|
|
||||||
|
free(pBuffer);
|
||||||
|
+ free(pRows);
|
||||||
|
}
|
||||||
|
#endif // PODOFO_HAVE_PNG_LIB
|
||||||
|
|
||||||
|
Index: src/PdfFiltersPrivate.cpp
|
||||||
|
===================================================================
|
||||||
|
--- src/PdfFiltersPrivate.cpp (revision 1261)
|
||||||
|
+++ src/PdfFiltersPrivate.cpp (working copy)
|
||||||
|
@@ -1019,7 +1019,7 @@
|
||||||
|
/*
|
||||||
|
* Prepare for input from a memory buffer.
|
||||||
|
*/
|
||||||
|
-GLOBAL(void)
|
||||||
|
+void
|
||||||
|
jpeg_memory_src (j_decompress_ptr cinfo, const JOCTET * buffer, size_t bufsize)
|
||||||
|
{
|
||||||
|
my_src_ptr src;
|
||||||
|
|
||||||
|
|
||||||
ImageMagick
|
ImageMagick
|
||||||
--------------
|
--------------
|
||||||
|
@ -154,6 +154,10 @@
|
|||||||
<CustomAction Id="LaunchApplication" BinaryKey="WixCA"
|
<CustomAction Id="LaunchApplication" BinaryKey="WixCA"
|
||||||
DllEntry="WixShellExec" Impersonate="yes"/>
|
DllEntry="WixShellExec" Impersonate="yes"/>
|
||||||
|
|
||||||
|
<InstallUISequence>
|
||||||
|
<FileCost Suppress="yes" />
|
||||||
|
</InstallUISequence>
|
||||||
|
|
||||||
</Product>
|
</Product>
|
||||||
</Wix>
|
</Wix>
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ mimetypes.add_type('application/epub+zip', '.epub')
|
|||||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||||
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
||||||
mimetypes.add_type('image/svg+xml', '.svg')
|
mimetypes.add_type('image/svg+xml', '.svg')
|
||||||
|
mimetypes.add_type('text/fb2+xml', '.fb2')
|
||||||
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
|
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
|
||||||
mimetypes.add_type('application/x-sony-bbeb', '.lrx')
|
mimetypes.add_type('application/x-sony-bbeb', '.lrx')
|
||||||
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
|
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
|
||||||
@ -43,6 +44,7 @@ mimetypes.add_type('application/x-mobipocket-ebook', '.prc')
|
|||||||
mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
|
mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
|
||||||
mimetypes.add_type('application/x-cbz', '.cbz')
|
mimetypes.add_type('application/x-cbz', '.cbz')
|
||||||
mimetypes.add_type('application/x-cbr', '.cbr')
|
mimetypes.add_type('application/x-cbr', '.cbr')
|
||||||
|
mimetypes.add_type('application/x-koboreader-ebook', '.kobo')
|
||||||
mimetypes.add_type('image/wmf', '.wmf')
|
mimetypes.add_type('image/wmf', '.wmf')
|
||||||
guess_type = mimetypes.guess_type
|
guess_type = mimetypes.guess_type
|
||||||
import cssutils
|
import cssutils
|
||||||
@ -340,13 +342,6 @@ def detect_ncpus():
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def launch(path_or_url):
|
|
||||||
from PyQt4.QtCore import QUrl
|
|
||||||
from PyQt4.QtGui import QDesktopServices
|
|
||||||
if os.path.exists(path_or_url):
|
|
||||||
path_or_url = 'file:'+path_or_url
|
|
||||||
QDesktopServices.openUrl(QUrl(path_or_url))
|
|
||||||
|
|
||||||
relpath = os.path.relpath
|
relpath = os.path.relpath
|
||||||
_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
|
_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
|
||||||
def english_sort(x, y):
|
def english_sort(x, y):
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.7.4'
|
__version__ = '0.7.8'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
|
|||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||||
|
|
||||||
|
# To archive plugins {{{
|
||||||
class HTML2ZIP(FileTypePlugin):
|
class HTML2ZIP(FileTypePlugin):
|
||||||
name = 'HTML to ZIP'
|
name = 'HTML to ZIP'
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
@ -30,6 +31,7 @@ every time you add an HTML file to the library.\
|
|||||||
|
|
||||||
with TemporaryDirectory('_plugin_html2zip') as tdir:
|
with TemporaryDirectory('_plugin_html2zip') as tdir:
|
||||||
recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
|
recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
|
||||||
|
recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
|
||||||
if self.site_customization and self.site_customization.strip():
|
if self.site_customization and self.site_customization.strip():
|
||||||
recs.append(['input_encoding', self.site_customization.strip(),
|
recs.append(['input_encoding', self.site_customization.strip(),
|
||||||
OptionRecommendation.HIGH])
|
OptionRecommendation.HIGH])
|
||||||
@ -81,7 +83,9 @@ class PML2PMLZ(FileTypePlugin):
|
|||||||
|
|
||||||
return of.name
|
return of.name
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Metadata reader plugins {{{
|
||||||
class ComicMetadataReader(MetadataReaderPlugin):
|
class ComicMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read comic metadata'
|
name = 'Read comic metadata'
|
||||||
@ -319,7 +323,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
|
|||||||
def get_metadata(self, stream, ftype):
|
def get_metadata(self, stream, ftype):
|
||||||
from calibre.ebooks.metadata.zip import get_metadata
|
from calibre.ebooks.metadata.zip import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Metadata writer plugins {{{
|
||||||
|
|
||||||
class EPUBMetadataWriter(MetadataWriterPlugin):
|
class EPUBMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
@ -395,6 +401,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
|
|||||||
from calibre.ebooks.metadata.topaz import set_metadata
|
from calibre.ebooks.metadata.topaz import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
from calibre.ebooks.comic.input import ComicInput
|
from calibre.ebooks.comic.input import ComicInput
|
||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
@ -436,7 +443,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
|
|||||||
from calibre.devices.cybook.driver import CYBOOK
|
from calibre.devices.cybook.driver import CYBOOK
|
||||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||||
BOOQ, ELONEX, POCKETBOOK301
|
BOOQ, ELONEX, POCKETBOOK301, MENTOR
|
||||||
from calibre.devices.iliad.driver import ILIAD
|
from calibre.devices.iliad.driver import ILIAD
|
||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK
|
from calibre.devices.jetbook.driver import JETBOOK
|
||||||
@ -444,7 +451,7 @@ from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
|||||||
from calibre.devices.nook.driver import NOOK
|
from calibre.devices.nook.driver import NOOK
|
||||||
from calibre.devices.prs505.driver import PRS505
|
from calibre.devices.prs505.driver import PRS505
|
||||||
from calibre.devices.android.driver import ANDROID, S60
|
from calibre.devices.android.driver import ANDROID, S60
|
||||||
from calibre.devices.nokia.driver import N770, N810, E71X
|
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||||
from calibre.devices.eslick.driver import ESLICK, EBK52
|
from calibre.devices.eslick.driver import ESLICK, EBK52
|
||||||
from calibre.devices.nuut2.driver import NUUT2
|
from calibre.devices.nuut2.driver import NUUT2
|
||||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||||
@ -453,7 +460,7 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
|
|||||||
from calibre.devices.edge.driver import EDGE
|
from calibre.devices.edge.driver import EDGE
|
||||||
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
|
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
|
||||||
from calibre.devices.sne.driver import SNE
|
from calibre.devices.sne.driver import SNE
|
||||||
from calibre.devices.misc import PALMPRE, AVANT
|
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL
|
||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
|
|
||||||
@ -461,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
|||||||
LibraryThing
|
LibraryThing
|
||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
||||||
|
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||||
|
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||||
|
|
||||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
||||||
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI]
|
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
EPUBInput,
|
EPUBInput,
|
||||||
@ -499,7 +509,6 @@ plugins += [
|
|||||||
]
|
]
|
||||||
# Order here matters. The first matched device is the one used.
|
# Order here matters. The first matched device is the one used.
|
||||||
plugins += [
|
plugins += [
|
||||||
ITUNES,
|
|
||||||
HANLINV3,
|
HANLINV3,
|
||||||
HANLINV5,
|
HANLINV5,
|
||||||
BLACKBERRY,
|
BLACKBERRY,
|
||||||
@ -520,6 +529,7 @@ plugins += [
|
|||||||
S60,
|
S60,
|
||||||
N770,
|
N770,
|
||||||
E71X,
|
E71X,
|
||||||
|
E52,
|
||||||
N810,
|
N810,
|
||||||
COOL_ER,
|
COOL_ER,
|
||||||
ESLICK,
|
ESLICK,
|
||||||
@ -550,6 +560,10 @@ plugins += [
|
|||||||
AZBOOKA,
|
AZBOOKA,
|
||||||
FOLDER_DEVICE_FOR_CONFIG,
|
FOLDER_DEVICE_FOR_CONFIG,
|
||||||
AVANT,
|
AVANT,
|
||||||
|
MENTOR,
|
||||||
|
SWEEX,
|
||||||
|
PDNOVEL,
|
||||||
|
ITUNES,
|
||||||
]
|
]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
|
@ -36,7 +36,7 @@ class Plugin(_Plugin):
|
|||||||
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
||||||
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
||||||
|
|
||||||
|
# Input profiles {{{
|
||||||
class InputProfile(Plugin):
|
class InputProfile(Plugin):
|
||||||
|
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
@ -218,6 +218,8 @@ input_profiles = [InputProfile, SonyReaderInput, SonyReader300Input,
|
|||||||
|
|
||||||
input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
|
input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class OutputProfile(Plugin):
|
class OutputProfile(Plugin):
|
||||||
|
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
@ -237,11 +239,12 @@ class OutputProfile(Plugin):
|
|||||||
# If True the MOBI renderer on the device supports MOBI indexing
|
# If True the MOBI renderer on the device supports MOBI indexing
|
||||||
supports_mobi_indexing = False
|
supports_mobi_indexing = False
|
||||||
|
|
||||||
# Device supports displaying a nested TOC
|
|
||||||
supports_nested_toc = True
|
|
||||||
|
|
||||||
# If True output should be optimized for a touchscreen interface
|
# If True output should be optimized for a touchscreen interface
|
||||||
touchscreen = False
|
touchscreen = False
|
||||||
|
touchscreen_news_css = ''
|
||||||
|
# A list of extra (beyond CSS 2.1) modules supported by the device
|
||||||
|
# Format is a cssutils profile dictionary (see iPad for example)
|
||||||
|
extra_css_modules = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
@ -256,8 +259,151 @@ class iPadOutput(OutputProfile):
|
|||||||
screen_size = (768, 1024)
|
screen_size = (768, 1024)
|
||||||
comic_screen_size = (768, 1024)
|
comic_screen_size = (768, 1024)
|
||||||
dpi = 132.0
|
dpi = 132.0
|
||||||
supports_nested_toc = False
|
extra_css_modules = [
|
||||||
|
{
|
||||||
|
'name':'webkit',
|
||||||
|
'props': { '-webkit-border-bottom-left-radius':'{length}',
|
||||||
|
'-webkit-border-bottom-right-radius':'{length}',
|
||||||
|
'-webkit-border-top-left-radius':'{length}',
|
||||||
|
'-webkit-border-top-right-radius':'{length}',
|
||||||
|
'-webkit-border-radius': r'{border-width}(\s+{border-width}){0,3}|inherit',
|
||||||
|
},
|
||||||
|
'macros': {'border-width': '{length}|medium|thick|thin'}
|
||||||
|
}
|
||||||
|
]
|
||||||
touchscreen = True
|
touchscreen = True
|
||||||
|
# touchscreen_news_css {{{
|
||||||
|
touchscreen_news_css = u'''
|
||||||
|
/* hr used in articles */
|
||||||
|
.article_articles_list {
|
||||||
|
width:18%;
|
||||||
|
}
|
||||||
|
.article_link {
|
||||||
|
color: #593f29;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
.article_next {
|
||||||
|
-webkit-border-top-right-radius:4px;
|
||||||
|
-webkit-border-bottom-right-radius:4px;
|
||||||
|
font-style: italic;
|
||||||
|
width:32%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.article_prev {
|
||||||
|
-webkit-border-top-left-radius:4px;
|
||||||
|
-webkit-border-bottom-left-radius:4px;
|
||||||
|
font-style: italic;
|
||||||
|
width:32%;
|
||||||
|
}
|
||||||
|
.article_sections_list {
|
||||||
|
width:18%;
|
||||||
|
}
|
||||||
|
.articles_link {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
.sections_link {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
.caption_divider {
|
||||||
|
border:#ccc 1px solid;
|
||||||
|
}
|
||||||
|
|
||||||
|
.touchscreen_navbar {
|
||||||
|
background:#c3bab2;
|
||||||
|
border:#ccc 0px solid;
|
||||||
|
border-collapse:separate;
|
||||||
|
border-spacing:1px;
|
||||||
|
margin-left: 5%;
|
||||||
|
margin-right: 5%;
|
||||||
|
width: 90%;
|
||||||
|
-webkit-border-radius:4px;
|
||||||
|
}
|
||||||
|
.touchscreen_navbar td {
|
||||||
|
background:#fff;
|
||||||
|
font-family:Helvetica;
|
||||||
|
font-size:80%;
|
||||||
|
/* UI touchboxes use 8px padding */
|
||||||
|
padding: 6px;
|
||||||
|
text-align:center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.touchscreen_navbar td a:link {
|
||||||
|
color: #593f29;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Index formatting */
|
||||||
|
.publish_date {
|
||||||
|
text-align:center;
|
||||||
|
}
|
||||||
|
.divider {
|
||||||
|
border-bottom:1em solid white;
|
||||||
|
border-top:1px solid gray;
|
||||||
|
}
|
||||||
|
|
||||||
|
hr.caption_divider {
|
||||||
|
border-color:black;
|
||||||
|
border-style:solid;
|
||||||
|
border-width:1px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Feed summary formatting */
|
||||||
|
.article_summary {
|
||||||
|
display:inline-block;
|
||||||
|
}
|
||||||
|
.feed {
|
||||||
|
font-family:sans-serif;
|
||||||
|
font-weight:bold;
|
||||||
|
font-size:larger;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_link {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_next {
|
||||||
|
-webkit-border-top-right-radius:4px;
|
||||||
|
-webkit-border-bottom-right-radius:4px;
|
||||||
|
font-style: italic;
|
||||||
|
width:40%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_prev {
|
||||||
|
-webkit-border-top-left-radius:4px;
|
||||||
|
-webkit-border-bottom-left-radius:4px;
|
||||||
|
font-style: italic;
|
||||||
|
width:40%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_title {
|
||||||
|
text-align: center;
|
||||||
|
font-size: 160%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.feed_up {
|
||||||
|
font-weight: bold;
|
||||||
|
width:20%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary_headline {
|
||||||
|
font-weight:bold;
|
||||||
|
text-align:left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary_byline {
|
||||||
|
text-align:left;
|
||||||
|
font-family:monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary_text {
|
||||||
|
text-align:left;
|
||||||
|
}
|
||||||
|
|
||||||
|
'''
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
class SonyReaderOutput(OutputProfile):
|
class SonyReaderOutput(OutputProfile):
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
|
|||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
||||||
plugin_dir, OptionParser, prefs
|
plugin_dir, OptionParser, prefs
|
||||||
|
from calibre.ebooks.epub.fix import ePubFixer
|
||||||
|
|
||||||
|
|
||||||
platform = 'linux'
|
platform = 'linux'
|
||||||
@ -151,13 +152,13 @@ def reread_filetype_plugins():
|
|||||||
|
|
||||||
|
|
||||||
def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
|
def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
|
||||||
occasion = {'import':_on_import, 'preprocess':_on_preprocess,
|
occasion_plugins = {'import':_on_import, 'preprocess':_on_preprocess,
|
||||||
'postprocess':_on_postprocess}[occasion]
|
'postprocess':_on_postprocess}[occasion]
|
||||||
customization = config['plugin_customization']
|
customization = config['plugin_customization']
|
||||||
if ft is None:
|
if ft is None:
|
||||||
ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
|
ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
|
||||||
nfp = path_to_file
|
nfp = path_to_file
|
||||||
for plugin in occasion.get(ft, []):
|
for plugin in occasion_plugins.get(ft, []):
|
||||||
if is_disabled(plugin):
|
if is_disabled(plugin):
|
||||||
continue
|
continue
|
||||||
plugin.site_customization = customization.get(plugin.name, '')
|
plugin.site_customization = customization.get(plugin.name, '')
|
||||||
@ -194,7 +195,6 @@ def plugin_customization(plugin):
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
# Input/Output profiles {{{
|
# Input/Output profiles {{{
|
||||||
def input_profiles():
|
def input_profiles():
|
||||||
for plugin in _initialized_plugins:
|
for plugin in _initialized_plugins:
|
||||||
@ -444,6 +444,14 @@ def device_plugins(): # {{{
|
|||||||
yield plugin
|
yield plugin
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
# epub fixers {{{
|
||||||
|
def epub_fixers():
|
||||||
|
for plugin in _initialized_plugins:
|
||||||
|
if isinstance(plugin, ePubFixer):
|
||||||
|
if not is_disabled(plugin):
|
||||||
|
if platform in plugin.supported_platforms:
|
||||||
|
yield plugin
|
||||||
|
# }}}
|
||||||
|
|
||||||
# Initialize plugins {{{
|
# Initialize plugins {{{
|
||||||
|
|
||||||
|
@ -34,6 +34,12 @@ class ANDROID(USBMS):
|
|||||||
|
|
||||||
# Acer
|
# Acer
|
||||||
0x502 : { 0x3203 : [0x0100]},
|
0x502 : { 0x3203 : [0x0100]},
|
||||||
|
|
||||||
|
# Dell
|
||||||
|
0x413c : { 0xb007 : [0x0100]},
|
||||||
|
|
||||||
|
# Eken?
|
||||||
|
0x040d : { 0x0851 : [0x0001]},
|
||||||
}
|
}
|
||||||
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
|
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
|
||||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||||
@ -42,11 +48,12 @@ class ANDROID(USBMS):
|
|||||||
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
|
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
|
||||||
|
|
||||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||||
'GT-I5700', 'SAMSUNG']
|
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
|
||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD',
|
||||||
'PROD_GT-I9000']
|
'GT-I9000', 'FILE-STOR_GADGET']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'PROD_GT-I9000_CARD']
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD',
|
||||||
|
'FILE-STOR_GADGET']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'HTC Android Phone Media'
|
OSX_MAIN_MEM = 'HTC Android Phone Media'
|
||||||
|
|
||||||
@ -63,6 +70,16 @@ class ANDROID(USBMS):
|
|||||||
dirs = [x.strip() for x in dirs.split(',')]
|
dirs = [x.strip() for x in dirs.split(',')]
|
||||||
self.EBOOK_DIR_MAIN = dirs
|
self.EBOOK_DIR_MAIN = dirs
|
||||||
|
|
||||||
|
def get_main_ebook_dir(self, for_upload=False):
|
||||||
|
dirs = self.EBOOK_DIR_MAIN
|
||||||
|
if not for_upload:
|
||||||
|
def aldiko_tweak(x):
|
||||||
|
return 'eBooks' if x == 'eBooks/import' else x
|
||||||
|
if isinstance(dirs, basestring):
|
||||||
|
dirs = [dirs]
|
||||||
|
dirs = list(map(aldiko_tweak, dirs))
|
||||||
|
return dirs
|
||||||
|
|
||||||
class S60(USBMS):
|
class S60(USBMS):
|
||||||
|
|
||||||
name = 'S60 driver'
|
name = 'S60 driver'
|
||||||
|
@ -186,6 +186,15 @@ class BOOQ(EB600):
|
|||||||
WINDOWS_MAIN_MEM = 'EB600'
|
WINDOWS_MAIN_MEM = 'EB600'
|
||||||
WINDOWS_CARD_A_MEM = 'EB600'
|
WINDOWS_CARD_A_MEM = 'EB600'
|
||||||
|
|
||||||
|
class MENTOR(EB600):
|
||||||
|
|
||||||
|
name = 'Astak Mentor EB600'
|
||||||
|
gui_name = 'Mentor'
|
||||||
|
description = _('Communicate with the Astak Mentor EB600')
|
||||||
|
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'pdf', 'txt']
|
||||||
|
|
||||||
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MENTOR'
|
||||||
|
|
||||||
class ELONEX(EB600):
|
class ELONEX(EB600):
|
||||||
|
|
||||||
name = 'Elonex 600EB'
|
name = 'Elonex 600EB'
|
||||||
|
@ -66,7 +66,7 @@ class FOLDER_DEVICE(USBMS):
|
|||||||
detected_device=None):
|
detected_device=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def disconnect_from_folder(self):
|
def unmount_device(self):
|
||||||
self._main_prefix = ''
|
self._main_prefix = ''
|
||||||
self.is_connected = False
|
self.is_connected = False
|
||||||
|
|
||||||
|
@ -106,9 +106,11 @@ class BOOX(HANLINV3):
|
|||||||
description = _('Communicate with the BOOX eBook reader.')
|
description = _('Communicate with the BOOX eBook reader.')
|
||||||
author = 'Jesus Manuel Marinho Valcarce'
|
author = 'Jesus Manuel Marinho Valcarce'
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
METADATA_CACHE = '.metadata.calibre'
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm']
|
FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
|
||||||
|
'prc', 'chm', 'doc']
|
||||||
|
|
||||||
VENDOR_ID = [0x0525]
|
VENDOR_ID = [0x0525]
|
||||||
PRODUCT_ID = [0xa4a5]
|
PRODUCT_ID = [0xa4a5]
|
||||||
|
@ -24,7 +24,7 @@ class N516(USBMS):
|
|||||||
|
|
||||||
VENDOR_ID = [0x0525]
|
VENDOR_ID = [0x0525]
|
||||||
PRODUCT_ID = [0xa4a5]
|
PRODUCT_ID = [0xa4a5]
|
||||||
BCD = [0x323, 0x326]
|
BCD = [0x323, 0x326, 0x327]
|
||||||
|
|
||||||
VENDOR_NAME = 'INGENIC'
|
VENDOR_NAME = 'INGENIC'
|
||||||
WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'
|
WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'
|
||||||
|
@ -59,7 +59,7 @@ class DevicePlugin(Plugin):
|
|||||||
return cls.__name__
|
return cls.__name__
|
||||||
return cls.name
|
return cls.name
|
||||||
|
|
||||||
|
# Device detection {{{
|
||||||
def test_bcd_windows(self, device_id, bcd):
|
def test_bcd_windows(self, device_id, bcd):
|
||||||
if bcd is None or len(bcd) == 0:
|
if bcd is None or len(bcd) == 0:
|
||||||
return True
|
return True
|
||||||
@ -152,6 +152,7 @@ class DevicePlugin(Plugin):
|
|||||||
return True, dev
|
return True, dev
|
||||||
return False, None
|
return False, None
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
def reset(self, key='-1', log_packets=False, report_progress=None,
|
def reset(self, key='-1', log_packets=False, report_progress=None,
|
||||||
detected_device=None) :
|
detected_device=None) :
|
||||||
@ -372,14 +373,12 @@ class DevicePlugin(Plugin):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def settings(cls):
|
def settings(cls):
|
||||||
'''
|
'''
|
||||||
Should return an opts object. The opts object should have one attribute
|
Should return an opts object. The opts object should have at least one attribute
|
||||||
`format_map` which is an ordered list of formats for the device.
|
`format_map` which is an ordered list of formats for the device.
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BookList(list):
|
class BookList(list):
|
||||||
'''
|
'''
|
||||||
A list of books. Each Book object must have the fields:
|
A list of books. Each Book object must have the fields:
|
||||||
|
@ -213,7 +213,7 @@ class KINDLE_DX(KINDLE2):
|
|||||||
PRODUCT_ID = [0x0003]
|
PRODUCT_ID = [0x0003]
|
||||||
BCD = [0x0100]
|
BCD = [0x0100]
|
||||||
|
|
||||||
class Bookmark():
|
class Bookmark(): # {{{
|
||||||
'''
|
'''
|
||||||
A simple class fetching bookmark data
|
A simple class fetching bookmark data
|
||||||
Kindle-specific
|
Kindle-specific
|
||||||
@ -429,6 +429,7 @@ class Bookmark():
|
|||||||
entries, = unpack('>I', data[9:13])
|
entries, = unpack('>I', data[9:13])
|
||||||
current_entry = 0
|
current_entry = 0
|
||||||
e_base = 0x0d
|
e_base = 0x0d
|
||||||
|
self.pdf_page_offset = 0
|
||||||
while current_entry < entries:
|
while current_entry < entries:
|
||||||
'''
|
'''
|
||||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||||
@ -516,3 +517,6 @@ class Bookmark():
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
116
src/calibre/devices/kobo/books.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com>'
|
||||||
|
'''
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.constants import filesystem_encoding, preferred_encoding
|
||||||
|
from calibre import isbytestring
|
||||||
|
|
||||||
|
class Book(MetaInformation):
|
||||||
|
|
||||||
|
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
|
||||||
|
|
||||||
|
JSON_ATTRS = [
|
||||||
|
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
|
||||||
|
'title_sort', 'comments', 'category', 'publisher', 'series',
|
||||||
|
'series_index', 'rating', 'isbn', 'language', 'application_id',
|
||||||
|
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
|
||||||
|
'uuid',
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, prefix, lpath, title, authors, mime, date, ContentType, thumbnail_name, other=None):
|
||||||
|
|
||||||
|
MetaInformation.__init__(self, '')
|
||||||
|
self.device_collections = []
|
||||||
|
self._new_book = False
|
||||||
|
|
||||||
|
self.path = os.path.join(prefix, lpath)
|
||||||
|
if os.sep == '\\':
|
||||||
|
self.path = self.path.replace('/', '\\')
|
||||||
|
self.lpath = lpath.replace('\\', '/')
|
||||||
|
else:
|
||||||
|
self.lpath = lpath
|
||||||
|
|
||||||
|
self.title = title
|
||||||
|
if not authors:
|
||||||
|
self.authors = ['']
|
||||||
|
else:
|
||||||
|
self.authors = [authors]
|
||||||
|
self.mime = mime
|
||||||
|
try:
|
||||||
|
self.size = os.path.getsize(self.path)
|
||||||
|
except OSError:
|
||||||
|
self.size = 0
|
||||||
|
try:
|
||||||
|
if ContentType == '6':
|
||||||
|
self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
|
||||||
|
else:
|
||||||
|
self.datetime = time.gmtime(os.path.getctime(self.path))
|
||||||
|
except:
|
||||||
|
self.datetime = time.gmtime()
|
||||||
|
|
||||||
|
if thumbnail_name is not None:
|
||||||
|
self.thumbnail = ImageWrapper(thumbnail_name)
|
||||||
|
self.tags = []
|
||||||
|
if other:
|
||||||
|
self.smart_update(other)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.path == getattr(other, 'path', None)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def db_id(self):
|
||||||
|
doc = '''The database id in the application database that this file corresponds to'''
|
||||||
|
def fget(self):
|
||||||
|
match = re.search(r'_(\d+)$', self.lpath.rpartition('.')[0])
|
||||||
|
if match:
|
||||||
|
return int(match.group(1))
|
||||||
|
return None
|
||||||
|
return property(fget=fget, doc=doc)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def title_sorter(self):
|
||||||
|
doc = '''String to sort the title. If absent, title is returned'''
|
||||||
|
def fget(self):
|
||||||
|
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
|
||||||
|
return property(doc=doc, fget=fget)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def thumbnail(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def smart_update(self, other):
|
||||||
|
'''
|
||||||
|
Merge the information in C{other} into self. In case of conflicts, the information
|
||||||
|
in C{other} takes precedence, unless the information in C{other} is NULL.
|
||||||
|
'''
|
||||||
|
|
||||||
|
MetaInformation.smart_update(self, other)
|
||||||
|
|
||||||
|
for attr in self.BOOK_ATTRS:
|
||||||
|
if hasattr(other, attr):
|
||||||
|
val = getattr(other, attr, None)
|
||||||
|
setattr(self, attr, val)
|
||||||
|
|
||||||
|
def to_json(self):
|
||||||
|
json = {}
|
||||||
|
for attr in self.JSON_ATTRS:
|
||||||
|
val = getattr(self, attr)
|
||||||
|
if isbytestring(val):
|
||||||
|
enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
|
||||||
|
val = val.decode(enc, 'replace')
|
||||||
|
elif isinstance(val, (list, tuple)):
|
||||||
|
val = [x.decode(preferred_encoding, 'replace') if
|
||||||
|
isbytestring(x) else x for x in val]
|
||||||
|
json[attr] = val
|
||||||
|
return json
|
||||||
|
|
||||||
|
class ImageWrapper(object):
|
||||||
|
def __init__(self, image_path):
|
||||||
|
self.image_path = image_path
|
||||||
|
|
@ -2,17 +2,26 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sqlite3 as sqlite
|
||||||
|
|
||||||
|
from calibre.devices.usbms.books import BookList
|
||||||
|
from calibre.devices.kobo.books import Book
|
||||||
|
from calibre.devices.kobo.books import ImageWrapper
|
||||||
|
from calibre.devices.mime import mime_type_ext
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
from calibre import prints
|
||||||
|
|
||||||
class KOBO(USBMS):
|
class KOBO(USBMS):
|
||||||
|
|
||||||
name = 'Kobo Reader Device Interface'
|
name = 'Kobo Reader Device Interface'
|
||||||
gui_name = 'Kobo Reader'
|
gui_name = 'Kobo Reader'
|
||||||
description = _('Communicate with the Kobo Reader')
|
description = _('Communicate with the Kobo Reader')
|
||||||
author = 'Kovid Goyal'
|
author = 'Timothy Legge and Kovid Goyal'
|
||||||
|
version = (1, 0, 4)
|
||||||
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
@ -29,3 +38,320 @@ class KOBO(USBMS):
|
|||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
USBMS.initialize(self)
|
||||||
|
self.book_class = Book
|
||||||
|
|
||||||
|
def books(self, oncard=None, end_session=True):
|
||||||
|
from calibre.ebooks.metadata.meta import path_to_ext
|
||||||
|
|
||||||
|
dummy_bl = BookList(None, None, None)
|
||||||
|
|
||||||
|
if oncard == 'carda' and not self._card_a_prefix:
|
||||||
|
self.report_progress(1.0, _('Getting list of books on device...'))
|
||||||
|
return dummy_bl
|
||||||
|
elif oncard == 'cardb' and not self._card_b_prefix:
|
||||||
|
self.report_progress(1.0, _('Getting list of books on device...'))
|
||||||
|
return dummy_bl
|
||||||
|
elif oncard and oncard != 'carda' and oncard != 'cardb':
|
||||||
|
self.report_progress(1.0, _('Getting list of books on device...'))
|
||||||
|
return dummy_bl
|
||||||
|
|
||||||
|
prefix = self._card_a_prefix if oncard == 'carda' else \
|
||||||
|
self._card_b_prefix if oncard == 'cardb' \
|
||||||
|
else self._main_prefix
|
||||||
|
|
||||||
|
# get the metadata cache
|
||||||
|
bl = self.booklist_class(oncard, prefix, self.settings)
|
||||||
|
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
|
||||||
|
|
||||||
|
# make a dict cache of paths so the lookup in the loop below is faster.
|
||||||
|
bl_cache = {}
|
||||||
|
for idx,b in enumerate(bl):
|
||||||
|
bl_cache[b.lpath] = idx
|
||||||
|
|
||||||
|
def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID):
|
||||||
|
changed = False
|
||||||
|
# if path_to_ext(path) in self.FORMATS:
|
||||||
|
try:
|
||||||
|
lpath = path.partition(self.normalize_path(prefix))[2]
|
||||||
|
if lpath.startswith(os.sep):
|
||||||
|
lpath = lpath[len(os.sep):]
|
||||||
|
lpath = lpath.replace('\\', '/')
|
||||||
|
# print "LPATH: " + lpath
|
||||||
|
|
||||||
|
path = self.normalize_path(path)
|
||||||
|
# print "Normalized FileName: " + path
|
||||||
|
|
||||||
|
idx = bl_cache.get(lpath, None)
|
||||||
|
if idx is not None:
|
||||||
|
if ImageID is not None:
|
||||||
|
imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
|
||||||
|
#print "Image name Normalized: " + imagename
|
||||||
|
if imagename is not None:
|
||||||
|
bl[idx].thumbnail = ImageWrapper(imagename)
|
||||||
|
bl_cache[lpath] = None
|
||||||
|
if ContentType != '6':
|
||||||
|
if self.update_metadata_item(bl[idx]):
|
||||||
|
# print 'update_metadata_item returned true'
|
||||||
|
changed = True
|
||||||
|
else:
|
||||||
|
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
|
||||||
|
# print 'Update booklist'
|
||||||
|
if bl.add_book(book, replace_metadata=False):
|
||||||
|
changed = True
|
||||||
|
except: # Probably a path encoding error
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return changed
|
||||||
|
|
||||||
|
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
#query = 'select count(distinct volumeId) from volume_shortcovers'
|
||||||
|
#cursor.execute(query)
|
||||||
|
#for row in (cursor):
|
||||||
|
# numrows = row[0]
|
||||||
|
#cursor.close()
|
||||||
|
|
||||||
|
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||||
|
'ImageID from content where BookID is Null'
|
||||||
|
|
||||||
|
cursor.execute (query)
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
|
||||||
|
|
||||||
|
path = self.path_from_contentid(row[3], row[5], oncard)
|
||||||
|
mime = mime_type_ext(path_to_ext(row[3]))
|
||||||
|
|
||||||
|
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
|
||||||
|
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
|
||||||
|
# print "shortbook: " + path
|
||||||
|
elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
|
||||||
|
changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
need_sync = True
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
connection.close()
|
||||||
|
|
||||||
|
# Remove books that are no longer in the filesystem. Cache contains
|
||||||
|
# indices into the booklist if book not in filesystem, None otherwise
|
||||||
|
# Do the operation in reverse order so indices remain valid
|
||||||
|
for idx in sorted(bl_cache.itervalues(), reverse=True):
|
||||||
|
if idx is not None:
|
||||||
|
need_sync = True
|
||||||
|
del bl[idx]
|
||||||
|
|
||||||
|
#print "count found in cache: %d, count of files in metadata: %d, need_sync: %s" % \
|
||||||
|
# (len(bl_cache), len(bl), need_sync)
|
||||||
|
if need_sync: #self.count_found_in_bl != len(bl) or need_sync:
|
||||||
|
if oncard == 'cardb':
|
||||||
|
self.sync_booklists((None, None, bl))
|
||||||
|
elif oncard == 'carda':
|
||||||
|
self.sync_booklists((None, bl, None))
|
||||||
|
else:
|
||||||
|
self.sync_booklists((bl, None, None))
|
||||||
|
|
||||||
|
self.report_progress(1.0, _('Getting list of books on device...'))
|
||||||
|
return bl
|
||||||
|
|
||||||
|
def delete_via_sql(self, ContentID, ContentType):
|
||||||
|
# Delete Order:
|
||||||
|
# 1) shortcover_page
|
||||||
|
# 2) volume_shorcover
|
||||||
|
# 2) content
|
||||||
|
|
||||||
|
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
|
||||||
|
cursor = connection.cursor()
|
||||||
|
t = (ContentID,)
|
||||||
|
cursor.execute('select ImageID from content where ContentID = ?', t)
|
||||||
|
|
||||||
|
ImageID = None
|
||||||
|
for row in cursor:
|
||||||
|
# First get the ImageID to delete the images
|
||||||
|
ImageID = row[0]
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
cursor = connection.cursor()
|
||||||
|
if ContentType == 6:
|
||||||
|
# Delete the shortcover_pages first
|
||||||
|
cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
|
||||||
|
|
||||||
|
#Delete the volume_shortcovers second
|
||||||
|
cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
|
||||||
|
|
||||||
|
# Delete the chapters associated with the book next
|
||||||
|
t = (ContentID,ContentID,)
|
||||||
|
cursor.execute('delete from content where BookID = ? or ContentID = ?', t)
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
if ImageID != None:
|
||||||
|
print "Error condition ImageID was not found"
|
||||||
|
print "You likely tried to delete a book that the kobo has not yet added to the database"
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
# If all this succeeds we need to delete the images files via the ImageID
|
||||||
|
return ImageID
|
||||||
|
|
||||||
|
def delete_images(self, ImageID):
|
||||||
|
if ImageID != None:
|
||||||
|
path_prefix = '.kobo/images/'
|
||||||
|
path = self._main_prefix + path_prefix + ImageID
|
||||||
|
|
||||||
|
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed',)
|
||||||
|
|
||||||
|
for ending in file_endings:
|
||||||
|
fpath = path + ending
|
||||||
|
fpath = self.normalize_path(fpath)
|
||||||
|
|
||||||
|
if os.path.exists(fpath):
|
||||||
|
# print 'Image File Exists: ' + fpath
|
||||||
|
os.unlink(fpath)
|
||||||
|
|
||||||
|
def delete_books(self, paths, end_session=True):
|
||||||
|
for i, path in enumerate(paths):
|
||||||
|
self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
|
||||||
|
path = self.normalize_path(path)
|
||||||
|
# print "Delete file normalized path: " + path
|
||||||
|
extension = os.path.splitext(path)[1]
|
||||||
|
|
||||||
|
if extension == '.kobo':
|
||||||
|
# Kobo books do not have book files. They do have some images though
|
||||||
|
#print "kobo book"
|
||||||
|
ContentType = 6
|
||||||
|
ContentID = self.contentid_from_path(path, ContentType)
|
||||||
|
elif extension == '.pdf' or extension == '.epub':
|
||||||
|
# print "ePub or pdf"
|
||||||
|
ContentType = 16
|
||||||
|
#print "Path: " + path
|
||||||
|
ContentID = self.contentid_from_path(path, ContentType)
|
||||||
|
# print "ContentID: " + ContentID
|
||||||
|
else: # if extension == '.html' or extension == '.txt':
|
||||||
|
ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
|
||||||
|
ContentID = self.contentid_from_path(path, ContentType)
|
||||||
|
|
||||||
|
ImageID = self.delete_via_sql(ContentID, ContentType)
|
||||||
|
#print " We would now delete the Images for" + ImageID
|
||||||
|
self.delete_images(ImageID)
|
||||||
|
|
||||||
|
if os.path.exists(path):
|
||||||
|
# Delete the ebook
|
||||||
|
# print "Delete the ebook: " + path
|
||||||
|
os.unlink(path)
|
||||||
|
|
||||||
|
filepath = os.path.splitext(path)[0]
|
||||||
|
for ext in self.DELETE_EXTS:
|
||||||
|
if os.path.exists(filepath + ext):
|
||||||
|
# print "Filename: " + filename
|
||||||
|
os.unlink(filepath + ext)
|
||||||
|
if os.path.exists(path + ext):
|
||||||
|
# print "Filename: " + filename
|
||||||
|
os.unlink(path + ext)
|
||||||
|
|
||||||
|
if self.SUPPORTS_SUB_DIRS:
|
||||||
|
try:
|
||||||
|
# print "removed"
|
||||||
|
os.removedirs(os.path.dirname(path))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
self.report_progress(1.0, _('Removing books from device...'))
|
||||||
|
|
||||||
|
def remove_books_from_metadata(self, paths, booklists):
|
||||||
|
for i, path in enumerate(paths):
|
||||||
|
self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
|
||||||
|
for bl in booklists:
|
||||||
|
for book in bl:
|
||||||
|
#print "Book Path: " + book.path
|
||||||
|
if path.endswith(book.path):
|
||||||
|
#print " Remove: " + book.path
|
||||||
|
bl.remove_book(book)
|
||||||
|
self.report_progress(1.0, _('Removing books from device metadata listing...'))
|
||||||
|
|
||||||
|
def add_books_to_metadata(self, locations, metadata, booklists):
|
||||||
|
metadata = iter(metadata)
|
||||||
|
for i, location in enumerate(locations):
|
||||||
|
self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
|
||||||
|
info = metadata.next()
|
||||||
|
blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
|
||||||
|
|
||||||
|
# Extract the correct prefix from the pathname. To do this correctly,
|
||||||
|
# we must ensure that both the prefix and the path are normalized
|
||||||
|
# so that the comparison will work. Book's __init__ will fix up
|
||||||
|
# lpath, so we don't need to worry about that here.
|
||||||
|
path = self.normalize_path(location[0])
|
||||||
|
if self._main_prefix:
|
||||||
|
prefix = self._main_prefix if \
|
||||||
|
path.startswith(self.normalize_path(self._main_prefix)) else None
|
||||||
|
if not prefix and self._card_a_prefix:
|
||||||
|
prefix = self._card_a_prefix if \
|
||||||
|
path.startswith(self.normalize_path(self._card_a_prefix)) else None
|
||||||
|
if not prefix and self._card_b_prefix:
|
||||||
|
prefix = self._card_b_prefix if \
|
||||||
|
path.startswith(self.normalize_path(self._card_b_prefix)) else None
|
||||||
|
if prefix is None:
|
||||||
|
prints('in add_books_to_metadata. Prefix is None!', path,
|
||||||
|
self._main_prefix)
|
||||||
|
continue
|
||||||
|
#print "Add book to metatdata: "
|
||||||
|
#print "prefix: " + prefix
|
||||||
|
lpath = path.partition(prefix)[2]
|
||||||
|
if lpath.startswith('/') or lpath.startswith('\\'):
|
||||||
|
lpath = lpath[1:]
|
||||||
|
#print "path: " + lpath
|
||||||
|
#book = self.book_class(prefix, lpath, other=info)
|
||||||
|
lpath = self.normalize_path(prefix + lpath)
|
||||||
|
book = Book(prefix, lpath, '', '', '', '', '', '', other=info)
|
||||||
|
if book.size is None:
|
||||||
|
book.size = os.stat(self.normalize_path(path)).st_size
|
||||||
|
booklists[blist].add_book(book, replace_metadata=True)
|
||||||
|
self.report_progress(1.0, _('Adding books to device metadata listing...'))
|
||||||
|
|
||||||
|
def contentid_from_path(self, path, ContentType):
|
||||||
|
if ContentType == 6:
|
||||||
|
ContentID = os.path.splitext(path)[0]
|
||||||
|
# Remove the prefix on the file. it could be either
|
||||||
|
ContentID = ContentID.replace(self._main_prefix, '')
|
||||||
|
if self._card_a_prefix is not None:
|
||||||
|
ContentID = ContentID.replace(self._card_a_prefix, '')
|
||||||
|
elif ContentType == 999: # HTML Files
|
||||||
|
ContentID = path
|
||||||
|
ContentID = ContentID.replace(self._main_prefix, "/mnt/onboard/")
|
||||||
|
if self._card_a_prefix is not None:
|
||||||
|
ContentID = ContentID.replace(self._card_a_prefix, "/mnt/sd/")
|
||||||
|
else: # ContentType = 16
|
||||||
|
ContentID = path
|
||||||
|
ContentID = ContentID.replace(self._main_prefix, "file:///mnt/onboard/")
|
||||||
|
if self._card_a_prefix is not None:
|
||||||
|
ContentID = ContentID.replace(self._card_a_prefix, "file:///mnt/sd/")
|
||||||
|
ContentID = ContentID.replace("\\", '/')
|
||||||
|
return ContentID
|
||||||
|
|
||||||
|
|
||||||
|
def path_from_contentid(self, ContentID, ContentType, oncard):
|
||||||
|
path = ContentID
|
||||||
|
|
||||||
|
if oncard == 'cardb':
|
||||||
|
print 'path from_contentid cardb'
|
||||||
|
elif oncard == 'carda':
|
||||||
|
path = path.replace("file:///mnt/sd/", self._card_a_prefix)
|
||||||
|
# print "SD Card: " + filename
|
||||||
|
else:
|
||||||
|
if ContentType == "6":
|
||||||
|
# This is a hack as the kobo files do not exist
|
||||||
|
# but the path is required to make a unique id
|
||||||
|
# for calibre's reference
|
||||||
|
path = self._main_prefix + path + '.kobo'
|
||||||
|
# print "Path: " + path
|
||||||
|
else:
|
||||||
|
# if path.startswith("file:///mnt/onboard/"):
|
||||||
|
path = path.replace("file:///mnt/onboard/", self._main_prefix)
|
||||||
|
path = path.replace("/mnt/onboard/", self._main_prefix)
|
||||||
|
# print "Internal: " + filename
|
||||||
|
|
||||||
|
return path
|
||||||
|
@ -49,3 +49,41 @@ class AVANT(USBMS):
|
|||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class SWEEX(USBMS):
|
||||||
|
name = 'Sweex Device Interface'
|
||||||
|
gui_name = 'Sweex'
|
||||||
|
description = _('Communicate with the Sweex MM300')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
# Ordered list of supported formats
|
||||||
|
FORMATS = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x0525]
|
||||||
|
PRODUCT_ID = [0xa4a5]
|
||||||
|
BCD = [0x0319]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'SWEEX'
|
||||||
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = ''
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class PDNOVEL(USBMS):
|
||||||
|
name = 'Pandigital Novel device interface'
|
||||||
|
gui_name = 'PD Novel'
|
||||||
|
description = _('Communicate with the Pandigital Novel')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = ['windows', 'linux', 'osx']
|
||||||
|
FORMATS = ['epub', 'pdf']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x18d1]
|
||||||
|
PRODUCT_ID = [0xb004]
|
||||||
|
BCD = [0x224]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'ANDROID'
|
||||||
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '__UMS_COMPOSITE'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'eBooks'
|
||||||
|
SUPPORTS_SUB_DIRS = False
|
||||||
|
|
||||||
|
@ -67,3 +67,24 @@ class E71X(USBMS):
|
|||||||
VENDOR_NAME = 'NOKIA'
|
VENDOR_NAME = 'NOKIA'
|
||||||
WINDOWS_MAIN_MEM = 'S60'
|
WINDOWS_MAIN_MEM = 'S60'
|
||||||
|
|
||||||
|
class E52(USBMS):
|
||||||
|
|
||||||
|
name = 'Nokia E52 device interface'
|
||||||
|
gui_name = 'Nokia E52'
|
||||||
|
description = _('Communicate with the Nokia E52')
|
||||||
|
author = 'David Ignjic'
|
||||||
|
supported_platforms = ['windows', 'linux', 'osx']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x421]
|
||||||
|
PRODUCT_ID = [0x1CD]
|
||||||
|
BCD = [0x100]
|
||||||
|
|
||||||
|
|
||||||
|
FORMATS = ['mobi', 'prc']
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'eBooks'
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
VENDOR_NAME = 'NOKIA'
|
||||||
|
WINDOWS_MAIN_MEM = 'S60'
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ class PRS505(USBMS):
|
|||||||
if self._card_b_prefix is not None:
|
if self._card_b_prefix is not None:
|
||||||
if not write_cache(self._card_b_prefix):
|
if not write_cache(self._card_b_prefix):
|
||||||
self._card_b_prefix = None
|
self._card_b_prefix = None
|
||||||
|
self.booklist_class.rebuild_collections = self.rebuild_collections
|
||||||
|
|
||||||
def get_device_information(self, end_session=True):
|
def get_device_information(self, end_session=True):
|
||||||
return (self.gui_name, '', '', '')
|
return (self.gui_name, '', '', '')
|
||||||
@ -145,7 +145,7 @@ class PRS505(USBMS):
|
|||||||
blists[i] = booklists[i]
|
blists[i] = booklists[i]
|
||||||
opts = self.settings()
|
opts = self.settings()
|
||||||
if opts.extra_customization:
|
if opts.extra_customization:
|
||||||
collections = [x.strip() for x in
|
collections = [x.lower().strip() for x in
|
||||||
opts.extra_customization.split(',')]
|
opts.extra_customization.split(',')]
|
||||||
else:
|
else:
|
||||||
collections = []
|
collections = []
|
||||||
@ -156,4 +156,10 @@ class PRS505(USBMS):
|
|||||||
USBMS.sync_booklists(self, booklists, end_session=end_session)
|
USBMS.sync_booklists(self, booklists, end_session=end_session)
|
||||||
debug_print('PRS505: finished sync_booklists')
|
debug_print('PRS505: finished sync_booklists')
|
||||||
|
|
||||||
|
def rebuild_collections(self, booklist, oncard):
|
||||||
|
debug_print('PRS505: started rebuild_collections on card', oncard)
|
||||||
|
c = self.initialize_XML_cache()
|
||||||
|
c.rebuild_collections(booklist, {'carda':1, 'cardb':2}.get(oncard, 0))
|
||||||
|
c.write()
|
||||||
|
debug_print('PRS505: finished rebuild_collections')
|
||||||
|
|
||||||
|
@ -6,10 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, time
|
import os, time
|
||||||
from pprint import pprint
|
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import prints, guess_type
|
from calibre import prints, guess_type
|
||||||
@ -62,8 +60,7 @@ class XMLCache(object):
|
|||||||
|
|
||||||
def __init__(self, paths, prefixes, use_author_sort):
|
def __init__(self, paths, prefixes, use_author_sort):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
debug_print('Building XMLCache...')
|
debug_print('Building XMLCache...', paths)
|
||||||
pprint(paths)
|
|
||||||
self.paths = paths
|
self.paths = paths
|
||||||
self.prefixes = prefixes
|
self.prefixes = prefixes
|
||||||
self.use_author_sort = use_author_sort
|
self.use_author_sort = use_author_sort
|
||||||
@ -147,39 +144,73 @@ class XMLCache(object):
|
|||||||
if title+str(i) not in seen:
|
if title+str(i) not in seen:
|
||||||
title = title+str(i)
|
title = title+str(i)
|
||||||
playlist.set('title', title)
|
playlist.set('title', title)
|
||||||
|
seen.add(title)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
seen.add(title)
|
seen.add(title)
|
||||||
|
|
||||||
def get_playlist_map(self):
|
def build_id_playlist_map(self, bl_index):
|
||||||
debug_print('Start get_playlist_map')
|
'''
|
||||||
ans = {}
|
Return a map of the collections in books: {lpaths: [collection names]}
|
||||||
|
'''
|
||||||
|
debug_print('Start build_id_playlist_map')
|
||||||
self.ensure_unique_playlist_titles()
|
self.ensure_unique_playlist_titles()
|
||||||
debug_print('after ensure_unique_playlist_titles')
|
|
||||||
self.prune_empty_playlists()
|
self.prune_empty_playlists()
|
||||||
debug_print('get_playlist_map loop')
|
debug_print('after cleaning playlists')
|
||||||
for i, root in self.record_roots.items():
|
root = self.record_roots[bl_index]
|
||||||
debug_print('get_playlist_map loop', i)
|
if root is None:
|
||||||
id_map = self.build_id_map(root)
|
return
|
||||||
ans[i] = []
|
id_map = self.build_id_map(root)
|
||||||
for playlist in root.xpath('//*[local-name()="playlist"]'):
|
playlist_map = {}
|
||||||
items = []
|
# foreach playlist, get the lpaths for the ids in it, then add to dict
|
||||||
for item in playlist:
|
for playlist in root.xpath('//*[local-name()="playlist"]'):
|
||||||
id_ = item.get('id', None)
|
name = playlist.get('title')
|
||||||
record = id_map.get(id_, None)
|
if name is None:
|
||||||
if record is not None:
|
debug_print('build_id_playlist_map: unnamed playlist!')
|
||||||
items.append(record)
|
continue
|
||||||
ans[i].append((playlist.get('title'), items))
|
for item in playlist:
|
||||||
debug_print('end get_playlist_map')
|
# translate each id into its lpath
|
||||||
return ans
|
id_ = item.get('id', None)
|
||||||
|
if id_ is None:
|
||||||
|
debug_print('build_id_playlist_map: id_ is None!')
|
||||||
|
continue
|
||||||
|
bk = id_map.get(id_, None)
|
||||||
|
if bk is None:
|
||||||
|
debug_print('build_id_playlist_map: book is None!', id_)
|
||||||
|
continue
|
||||||
|
lpath = bk.get('path', None)
|
||||||
|
if lpath is None:
|
||||||
|
debug_print('build_id_playlist_map: lpath is None!', id_)
|
||||||
|
continue
|
||||||
|
if lpath not in playlist_map:
|
||||||
|
playlist_map[lpath] = []
|
||||||
|
playlist_map[lpath].append(name)
|
||||||
|
debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
|
||||||
|
return playlist_map
|
||||||
|
|
||||||
|
def reset_existing_playlists_map(self):
|
||||||
|
'''
|
||||||
|
Call this method before calling get_or_create_playlist in the context of
|
||||||
|
a given job. Call it again after deleting any playlists. The current
|
||||||
|
implementation adds all new playlists before deleting any, so that
|
||||||
|
constraint is respected.
|
||||||
|
'''
|
||||||
|
self._playlist_to_playlist_id_map = {}
|
||||||
|
|
||||||
def get_or_create_playlist(self, bl_idx, title):
|
def get_or_create_playlist(self, bl_idx, title):
|
||||||
|
# maintain a private map of playlists to their ids. Don't check if it
|
||||||
|
# exists, because reset_existing_playlist_map must be called before it
|
||||||
|
# is used to ensure that deleted playlists are taken into account
|
||||||
root = self.record_roots[bl_idx]
|
root = self.record_roots[bl_idx]
|
||||||
for playlist in root.xpath('//*[local-name()="playlist"]'):
|
if bl_idx not in self._playlist_to_playlist_id_map:
|
||||||
if playlist.get('title', None) == title:
|
self._playlist_to_playlist_id_map[bl_idx] = {}
|
||||||
return playlist
|
for playlist in root.xpath('//*[local-name()="playlist"]'):
|
||||||
if DEBUG:
|
pl_title = playlist.get('title', None)
|
||||||
debug_print('Creating playlist:', title)
|
if pl_title is not None:
|
||||||
|
self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
|
||||||
|
if title in self._playlist_to_playlist_id_map[bl_idx]:
|
||||||
|
return self._playlist_to_playlist_id_map[bl_idx][title]
|
||||||
|
debug_print('Creating playlist:', title)
|
||||||
ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
|
ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
|
||||||
nsmap=root.nsmap, attrib={
|
nsmap=root.nsmap, attrib={
|
||||||
'uuid' : uuid(),
|
'uuid' : uuid(),
|
||||||
@ -188,12 +219,12 @@ class XMLCache(object):
|
|||||||
'sourceid': '1'
|
'sourceid': '1'
|
||||||
})
|
})
|
||||||
root.append(ans)
|
root.append(ans)
|
||||||
|
self._playlist_to_playlist_id_map[bl_idx][title] = ans
|
||||||
return ans
|
return ans
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def fix_ids(self): # {{{
|
def fix_ids(self): # {{{
|
||||||
if DEBUG:
|
debug_print('Running fix_ids()')
|
||||||
debug_print('Running fix_ids()')
|
|
||||||
|
|
||||||
def ensure_numeric_ids(root):
|
def ensure_numeric_ids(root):
|
||||||
idmap = {}
|
idmap = {}
|
||||||
@ -251,7 +282,9 @@ class XMLCache(object):
|
|||||||
ensure_media_xml_base_ids(root)
|
ensure_media_xml_base_ids(root)
|
||||||
|
|
||||||
idmap = ensure_numeric_ids(root)
|
idmap = ensure_numeric_ids(root)
|
||||||
remap_playlist_references(root, idmap)
|
if len(idmap) > 0:
|
||||||
|
debug_print('fix_ids: found some non-numeric ids')
|
||||||
|
remap_playlist_references(root, idmap)
|
||||||
if i == 0:
|
if i == 0:
|
||||||
sourceid, playlist_sid = 1, 0
|
sourceid, playlist_sid = 1, 0
|
||||||
base = 0
|
base = 0
|
||||||
@ -276,38 +309,19 @@ class XMLCache(object):
|
|||||||
def update_booklist(self, bl, bl_index):
|
def update_booklist(self, bl, bl_index):
|
||||||
if bl_index not in self.record_roots:
|
if bl_index not in self.record_roots:
|
||||||
return
|
return
|
||||||
if DEBUG:
|
debug_print('Updating JSON cache:', bl_index)
|
||||||
debug_print('Updating JSON cache:', bl_index)
|
playlist_map = self.build_id_playlist_map(bl_index)
|
||||||
root = self.record_roots[bl_index]
|
root = self.record_roots[bl_index]
|
||||||
pmap = self.get_playlist_map()[bl_index]
|
|
||||||
playlist_map = {}
|
|
||||||
for title, records in pmap:
|
|
||||||
for record in records:
|
|
||||||
path = record.get('path', None)
|
|
||||||
if path:
|
|
||||||
if path not in playlist_map:
|
|
||||||
playlist_map[path] = []
|
|
||||||
playlist_map[path].append(title)
|
|
||||||
|
|
||||||
lpath_map = self.build_lpath_map(root)
|
lpath_map = self.build_lpath_map(root)
|
||||||
for book in bl:
|
for book in bl:
|
||||||
record = lpath_map.get(book.lpath, None)
|
record = lpath_map.get(book.lpath, None)
|
||||||
if record is not None:
|
if record is not None:
|
||||||
title = record.get('title', None)
|
title = record.get('title', None)
|
||||||
if title is not None and title != book.title:
|
if title is not None and title != book.title:
|
||||||
if DEBUG:
|
debug_print('Renaming title', book.title, 'to', title)
|
||||||
debug_print('Renaming title', book.title, 'to', title)
|
|
||||||
book.title = title
|
book.title = title
|
||||||
# We shouldn't do this for Sonys, because the reader strips
|
# Don't set the author, because the reader strips all but
|
||||||
# all but the first author.
|
# the first author.
|
||||||
# authors = record.get('author', None)
|
|
||||||
# if authors is not None:
|
|
||||||
# authors = string_to_authors(authors)
|
|
||||||
# if authors != book.authors:
|
|
||||||
# if DEBUG:
|
|
||||||
# prints('Renaming authors', book.authors, 'to',
|
|
||||||
# authors)
|
|
||||||
# book.authors = authors
|
|
||||||
for thumbnail in record.xpath(
|
for thumbnail in record.xpath(
|
||||||
'descendant::*[local-name()="thumbnail"]'):
|
'descendant::*[local-name()="thumbnail"]'):
|
||||||
for img in thumbnail.xpath(
|
for img in thumbnail.xpath(
|
||||||
@ -318,47 +332,57 @@ class XMLCache(object):
|
|||||||
book.thumbnail = raw
|
book.thumbnail = raw
|
||||||
break
|
break
|
||||||
break
|
break
|
||||||
if book.lpath in playlist_map:
|
book.device_collections = playlist_map.get(book.lpath, [])
|
||||||
tags = playlist_map[book.lpath]
|
|
||||||
book.device_collections = tags
|
|
||||||
debug_print('Finished updating JSON cache:', bl_index)
|
debug_print('Finished updating JSON cache:', bl_index)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Update XML from JSON {{{
|
# Update XML from JSON {{{
|
||||||
def update(self, booklists, collections_attributes):
|
def update(self, booklists, collections_attributes):
|
||||||
debug_print('Starting update XML from JSON')
|
debug_print('Starting update', collections_attributes)
|
||||||
playlist_map = self.get_playlist_map()
|
|
||||||
|
|
||||||
for i, booklist in booklists.items():
|
for i, booklist in booklists.items():
|
||||||
if DEBUG:
|
playlist_map = self.build_id_playlist_map(i)
|
||||||
debug_print('Updating XML Cache:', i)
|
debug_print('Updating XML Cache:', i)
|
||||||
root = self.record_roots[i]
|
root = self.record_roots[i]
|
||||||
lpath_map = self.build_lpath_map(root)
|
lpath_map = self.build_lpath_map(root)
|
||||||
|
gtz_count = ltz_count = 0
|
||||||
for book in booklist:
|
for book in booklist:
|
||||||
path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
|
path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
|
||||||
# record = self.book_by_lpath(book.lpath, root)
|
|
||||||
record = lpath_map.get(book.lpath, None)
|
record = lpath_map.get(book.lpath, None)
|
||||||
if record is None:
|
if record is None:
|
||||||
record = self.create_text_record(root, i, book.lpath)
|
record = self.create_text_record(root, i, book.lpath)
|
||||||
self.update_text_record(record, book, path, i)
|
(gtz_count, ltz_count) = self.update_text_record(record, book,
|
||||||
|
path, i, gtz_count, ltz_count)
|
||||||
bl_pmap = playlist_map[i]
|
# Ensure the collections in the XML database are recorded for
|
||||||
self.update_playlists(i, root, booklist, bl_pmap,
|
# this book
|
||||||
collections_attributes)
|
if book.device_collections is None:
|
||||||
|
book.device_collections = []
|
||||||
|
book.device_collections = playlist_map.get(book.lpath, [])
|
||||||
|
debug_print('Timezone votes: %d GMT, %d LTZ'%(gtz_count, ltz_count))
|
||||||
|
self.update_playlists(i, root, booklist, collections_attributes)
|
||||||
|
# Update the device collections because update playlist could have added
|
||||||
|
# some new ones.
|
||||||
|
debug_print('In update/ Starting refresh of device_collections')
|
||||||
|
for i, booklist in booklists.items():
|
||||||
|
playlist_map = self.build_id_playlist_map(i)
|
||||||
|
for book in booklist:
|
||||||
|
book.device_collections = playlist_map.get(book.lpath, [])
|
||||||
|
self.fix_ids()
|
||||||
|
debug_print('Finished update')
|
||||||
|
|
||||||
|
def rebuild_collections(self, booklist, bl_index):
|
||||||
|
if bl_index not in self.record_roots:
|
||||||
|
return
|
||||||
|
root = self.record_roots[bl_index]
|
||||||
|
self.update_playlists(bl_index, root, booklist, [])
|
||||||
self.fix_ids()
|
self.fix_ids()
|
||||||
|
|
||||||
# This is needed to update device_collections
|
def update_playlists(self, bl_index, root, booklist, collections_attributes):
|
||||||
for i, booklist in booklists.items():
|
debug_print('Starting update_playlists', collections_attributes, bl_index)
|
||||||
self.update_booklist(booklist, i)
|
self.reset_existing_playlists_map()
|
||||||
debug_print('Finished update XML from JSON')
|
|
||||||
|
|
||||||
def update_playlists(self, bl_index, root, booklist, playlist_map,
|
|
||||||
collections_attributes):
|
|
||||||
debug_print('Starting update_playlists')
|
|
||||||
collections = booklist.get_collections(collections_attributes)
|
collections = booklist.get_collections(collections_attributes)
|
||||||
lpath_map = self.build_lpath_map(root)
|
lpath_map = self.build_lpath_map(root)
|
||||||
|
debug_print('update_playlists: finished building maps')
|
||||||
for category, books in collections.items():
|
for category, books in collections.items():
|
||||||
records = [lpath_map.get(b.lpath, None) for b in books]
|
records = [lpath_map.get(b.lpath, None) for b in books]
|
||||||
# Remove any books that were not found, although this
|
# Remove any books that were not found, although this
|
||||||
@ -367,25 +391,34 @@ class XMLCache(object):
|
|||||||
debug_print('WARNING: Some elements in the JSON cache were not'
|
debug_print('WARNING: Some elements in the JSON cache were not'
|
||||||
' found in the XML cache')
|
' found in the XML cache')
|
||||||
records = [x for x in records if x is not None]
|
records = [x for x in records if x is not None]
|
||||||
|
# Ensure each book has an ID.
|
||||||
for rec in records:
|
for rec in records:
|
||||||
if rec.get('id', None) is None:
|
if rec.get('id', None) is None:
|
||||||
rec.set('id', str(self.max_id(root)+1))
|
rec.set('id', str(self.max_id(root)+1))
|
||||||
ids = [x.get('id', None) for x in records]
|
ids = [x.get('id', None) for x in records]
|
||||||
|
# Given that we set the ids, there shouldn't be any None's. But
|
||||||
|
# better to be safe...
|
||||||
if None in ids:
|
if None in ids:
|
||||||
if DEBUG:
|
debug_print('WARNING: Some <text> elements do not have ids')
|
||||||
debug_print('WARNING: Some <text> elements do not have ids')
|
ids = [x for x in ids if x is not None]
|
||||||
ids = [x for x in ids if x is not None]
|
|
||||||
|
|
||||||
playlist = self.get_or_create_playlist(bl_index, category)
|
playlist = self.get_or_create_playlist(bl_index, category)
|
||||||
|
# Get the books currently in the playlist. We will need them to be
|
||||||
|
# sure to put back any books that were manually added.
|
||||||
playlist_ids = []
|
playlist_ids = []
|
||||||
for item in playlist:
|
for item in playlist:
|
||||||
id_ = item.get('id', None)
|
id_ = item.get('id', None)
|
||||||
if id_ is not None:
|
if id_ is not None:
|
||||||
playlist_ids.append(id_)
|
playlist_ids.append(id_)
|
||||||
|
# Empty the playlist. We do this so that the playlist will have the
|
||||||
|
# order specified by get_collections
|
||||||
for item in list(playlist):
|
for item in list(playlist):
|
||||||
playlist.remove(item)
|
playlist.remove(item)
|
||||||
|
|
||||||
|
# Get a list of ids not known by get_collections
|
||||||
extra_ids = [x for x in playlist_ids if x not in ids]
|
extra_ids = [x for x in playlist_ids if x not in ids]
|
||||||
|
# Rebuild the collection in the order specified by get_collections. Then
|
||||||
|
# add the ids that get_collections didn't know about.
|
||||||
for id_ in ids + extra_ids:
|
for id_ in ids + extra_ids:
|
||||||
item = playlist.makeelement(
|
item = playlist.makeelement(
|
||||||
'{%s}item'%self.namespaces[bl_index],
|
'{%s}item'%self.namespaces[bl_index],
|
||||||
@ -423,11 +456,38 @@ class XMLCache(object):
|
|||||||
root.append(ans)
|
root.append(ans)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def update_text_record(self, record, book, path, bl_index):
|
def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count):
|
||||||
|
'''
|
||||||
|
Update the Sony database from the book. This is done if the timestamp in
|
||||||
|
the db differs from the timestamp on the file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# It seems that a Sony device can sometimes know what timezone it is in,
|
||||||
|
# and apparently converts the dates to GMT when it writes them to the
|
||||||
|
# db. Unfortunately, we can't tell when it does this, so we use a
|
||||||
|
# horrible heuristic. First, set dates only for new books, trying to
|
||||||
|
# avoid upsetting the sony. Use the timezone determined through the
|
||||||
|
# voting described next. Second, voting: if a book is not new, compare
|
||||||
|
# its Sony DB date against localtime and gmtime. Count the matches. When
|
||||||
|
# we must set a date, use the one with the most matches. Use localtime
|
||||||
|
# if the case of a tie, and hope it is right.
|
||||||
timestamp = os.path.getmtime(path)
|
timestamp = os.path.getmtime(path)
|
||||||
date = strftime(timestamp)
|
rec_date = record.get('date', None)
|
||||||
if date != record.get('date', None):
|
if not getattr(book, '_new_book', False): # book is not new
|
||||||
|
if strftime(timestamp, zone=time.gmtime) == rec_date:
|
||||||
|
gtz_count += 1
|
||||||
|
elif strftime(timestamp, zone=time.localtime) == rec_date:
|
||||||
|
ltz_count += 1
|
||||||
|
else: # book is new. Set the time using the current votes
|
||||||
|
if ltz_count >= gtz_count:
|
||||||
|
tz = time.localtime
|
||||||
|
debug_print("Using localtime TZ for new book", book.lpath)
|
||||||
|
else:
|
||||||
|
tz = time.gmtime
|
||||||
|
debug_print("Using GMT TZ for new book", book.lpath)
|
||||||
|
date = strftime(timestamp, zone=tz)
|
||||||
record.set('date', date)
|
record.set('date', date)
|
||||||
|
|
||||||
record.set('size', str(os.stat(path).st_size))
|
record.set('size', str(os.stat(path).st_size))
|
||||||
title = book.title if book.title else _('Unknown')
|
title = book.title if book.title else _('Unknown')
|
||||||
record.set('title', title)
|
record.set('title', title)
|
||||||
@ -452,6 +512,7 @@ class XMLCache(object):
|
|||||||
if 'id' not in record.attrib:
|
if 'id' not in record.attrib:
|
||||||
num = self.max_id(record.getroottree().getroot())
|
num = self.max_id(record.getroottree().getroot())
|
||||||
record.set('id', str(num+1))
|
record.set('id', str(num+1))
|
||||||
|
return (gtz_count, ltz_count)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Writing the XML files {{{
|
# Writing the XML files {{{
|
||||||
@ -544,10 +605,5 @@ class XMLCache(object):
|
|||||||
break
|
break
|
||||||
self.namespaces[i] = ns
|
self.namespaces[i] = ns
|
||||||
|
|
||||||
# if DEBUG:
|
|
||||||
# debug_print('Found nsmaps:')
|
|
||||||
# pprint(self.nsmaps)
|
|
||||||
# debug_print('Found namespaces:')
|
|
||||||
# pprint(self.namespaces)
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -98,6 +98,9 @@ class LinuxScanner(object):
|
|||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
ans = set([])
|
ans = set([])
|
||||||
|
if not self.ok:
|
||||||
|
raise RuntimeError('DeviceScanner requires the /sys filesystem to work.')
|
||||||
|
|
||||||
for x in os.listdir(self.base):
|
for x in os.listdir(self.base):
|
||||||
base = os.path.join(self.base, x)
|
base = os.path.join(self.base, x)
|
||||||
ven = os.path.join(base, 'idVendor')
|
ven = os.path.join(base, 'idVendor')
|
||||||
@ -145,8 +148,6 @@ class DeviceScanner(object):
|
|||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
if isosx and osx_scanner is None:
|
if isosx and osx_scanner is None:
|
||||||
raise RuntimeError('The Python extension usbobserver must be available on OS X.')
|
raise RuntimeError('The Python extension usbobserver must be available on OS X.')
|
||||||
if islinux and not linux_scanner.ok:
|
|
||||||
raise RuntimeError('DeviceScanner requires the /sys filesystem to work.')
|
|
||||||
self.scanner = win_scanner if iswindows else osx_scanner if isosx else linux_scanner
|
self.scanner = win_scanner if iswindows else osx_scanner if isosx else linux_scanner
|
||||||
self.devices = []
|
self.devices = []
|
||||||
|
|
||||||
|
@ -11,10 +11,11 @@ from calibre.devices.mime import mime_type_ext
|
|||||||
from calibre.devices.interface import BookList as _BookList
|
from calibre.devices.interface import BookList as _BookList
|
||||||
from calibre.constants import filesystem_encoding, preferred_encoding
|
from calibre.constants import filesystem_encoding, preferred_encoding
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring
|
||||||
|
from calibre.utils.config import prefs
|
||||||
|
|
||||||
class Book(MetaInformation):
|
class Book(MetaInformation):
|
||||||
|
|
||||||
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections']
|
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
|
||||||
|
|
||||||
JSON_ATTRS = [
|
JSON_ATTRS = [
|
||||||
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
|
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
|
||||||
@ -29,6 +30,7 @@ class Book(MetaInformation):
|
|||||||
|
|
||||||
MetaInformation.__init__(self, '')
|
MetaInformation.__init__(self, '')
|
||||||
|
|
||||||
|
self._new_book = False
|
||||||
self.device_collections = []
|
self.device_collections = []
|
||||||
self.path = os.path.join(prefix, lpath)
|
self.path = os.path.join(prefix, lpath)
|
||||||
if os.sep == '\\':
|
if os.sep == '\\':
|
||||||
@ -76,7 +78,7 @@ class Book(MetaInformation):
|
|||||||
in C{other} takes precedence, unless the information in C{other} is NULL.
|
in C{other} takes precedence, unless the information in C{other} is NULL.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
MetaInformation.smart_update(self, other)
|
MetaInformation.smart_update(self, other, replace_tags=True)
|
||||||
|
|
||||||
for attr in self.BOOK_ATTRS:
|
for attr in self.BOOK_ATTRS:
|
||||||
if hasattr(other, attr):
|
if hasattr(other, attr):
|
||||||
@ -130,12 +132,37 @@ class CollectionsBookList(BookList):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def get_collections(self, collection_attributes):
|
def get_collections(self, collection_attributes):
|
||||||
|
from calibre.devices.usbms.driver import debug_print
|
||||||
|
debug_print('Starting get_collections:', prefs['manage_device_metadata'])
|
||||||
collections = {}
|
collections = {}
|
||||||
series_categories = set([])
|
series_categories = set([])
|
||||||
collection_attributes = list(collection_attributes)+['device_collections']
|
# This map of sets is used to avoid linear searches when testing for
|
||||||
for attr in collection_attributes:
|
# book equality
|
||||||
attr = attr.strip()
|
collections_lpaths = {}
|
||||||
for book in self:
|
for book in self:
|
||||||
|
# Make sure we can identify this book via the lpath
|
||||||
|
lpath = getattr(book, 'lpath', None)
|
||||||
|
if lpath is None:
|
||||||
|
continue
|
||||||
|
# Decide how we will build the collections. The default: leave the
|
||||||
|
# book in all existing collections. Do not add any new ones.
|
||||||
|
attrs = ['device_collections']
|
||||||
|
if getattr(book, '_new_book', False):
|
||||||
|
if prefs['manage_device_metadata'] == 'manual':
|
||||||
|
# Ensure that the book is in all the book's existing
|
||||||
|
# collections plus all metadata collections
|
||||||
|
attrs += collection_attributes
|
||||||
|
else:
|
||||||
|
# For new books, both 'on_send' and 'on_connect' do the same
|
||||||
|
# thing. The book's existing collections are ignored. Put
|
||||||
|
# the book in collections defined by its metadata.
|
||||||
|
attrs = collection_attributes
|
||||||
|
elif prefs['manage_device_metadata'] == 'on_connect':
|
||||||
|
# For existing books, modify the collections only if the user
|
||||||
|
# specified 'on_connect'
|
||||||
|
attrs = collection_attributes
|
||||||
|
for attr in attrs:
|
||||||
|
attr = attr.strip()
|
||||||
val = getattr(book, attr, None)
|
val = getattr(book, attr, None)
|
||||||
if not val: continue
|
if not val: continue
|
||||||
if isbytestring(val):
|
if isbytestring(val):
|
||||||
@ -150,11 +177,12 @@ class CollectionsBookList(BookList):
|
|||||||
continue
|
continue
|
||||||
if category not in collections:
|
if category not in collections:
|
||||||
collections[category] = []
|
collections[category] = []
|
||||||
if book not in collections[category]:
|
collections_lpaths[category] = set()
|
||||||
|
if lpath not in collections_lpaths[category]:
|
||||||
|
collections_lpaths[category].add(lpath)
|
||||||
collections[category].append(book)
|
collections[category].append(book)
|
||||||
if attr == 'series':
|
if attr == 'series':
|
||||||
series_categories.add(category)
|
series_categories.add(category)
|
||||||
|
|
||||||
# Sort collections
|
# Sort collections
|
||||||
for category, books in collections.items():
|
for category, books in collections.items():
|
||||||
def tgetter(x):
|
def tgetter(x):
|
||||||
@ -167,3 +195,15 @@ class CollectionsBookList(BookList):
|
|||||||
books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
|
books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
|
||||||
return collections
|
return collections
|
||||||
|
|
||||||
|
def rebuild_collections(self, booklist, oncard):
|
||||||
|
'''
|
||||||
|
For each book in the booklist for the card oncard, remove it from all
|
||||||
|
its current collections, then add it to the collections specified in
|
||||||
|
device_collections.
|
||||||
|
|
||||||
|
oncard is None for the main memory, carda for card A, cardb for card B,
|
||||||
|
etc.
|
||||||
|
|
||||||
|
booklist is the object created by the :method:`books` call above.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
@ -78,9 +78,6 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
STORAGE_CARD_VOLUME_LABEL = ''
|
STORAGE_CARD_VOLUME_LABEL = ''
|
||||||
STORAGE_CARD2_VOLUME_LABEL = None
|
STORAGE_CARD2_VOLUME_LABEL = None
|
||||||
|
|
||||||
SUPPORTS_SUB_DIRS = False
|
|
||||||
MUST_READ_METADATA = False
|
|
||||||
SUPPORTS_USE_AUTHOR_SORT = False
|
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
EBOOK_DIR_CARD_A = ''
|
EBOOK_DIR_CARD_A = ''
|
||||||
@ -735,7 +732,7 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
|
self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
|
||||||
|
|
||||||
def get_main_ebook_dir(self):
|
def get_main_ebook_dir(self, for_upload=False):
|
||||||
return self.EBOOK_DIR_MAIN
|
return self.EBOOK_DIR_MAIN
|
||||||
|
|
||||||
def _sanity_check(self, on_card, files):
|
def _sanity_check(self, on_card, files):
|
||||||
@ -753,7 +750,7 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
path = os.path.join(self._card_b_prefix,
|
path = os.path.join(self._card_b_prefix,
|
||||||
*(self.EBOOK_DIR_CARD_B.split('/')))
|
*(self.EBOOK_DIR_CARD_B.split('/')))
|
||||||
else:
|
else:
|
||||||
candidates = self.get_main_ebook_dir()
|
candidates = self.get_main_ebook_dir(for_upload=True)
|
||||||
if isinstance(candidates, basestring):
|
if isinstance(candidates, basestring):
|
||||||
candidates = [candidates]
|
candidates = [candidates]
|
||||||
candidates = [
|
candidates = [
|
||||||
|
@ -13,6 +13,10 @@ class DeviceConfig(object):
|
|||||||
EXTRA_CUSTOMIZATION_MESSAGE = None
|
EXTRA_CUSTOMIZATION_MESSAGE = None
|
||||||
EXTRA_CUSTOMIZATION_DEFAULT = None
|
EXTRA_CUSTOMIZATION_DEFAULT = None
|
||||||
|
|
||||||
|
SUPPORTS_SUB_DIRS = False
|
||||||
|
MUST_READ_METADATA = False
|
||||||
|
SUPPORTS_USE_AUTHOR_SORT = False
|
||||||
|
|
||||||
#: If None the default is used
|
#: If None the default is used
|
||||||
SAVE_TEMPLATE = None
|
SAVE_TEMPLATE = None
|
||||||
|
|
||||||
@ -23,9 +27,14 @@ class DeviceConfig(object):
|
|||||||
config().parse().send_template
|
config().parse().send_template
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _config(cls):
|
def _config_base_name(cls):
|
||||||
klass = cls if isinstance(cls, type) else cls.__class__
|
klass = cls if isinstance(cls, type) else cls.__class__
|
||||||
c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers'))
|
return klass.__name__
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _config(cls):
|
||||||
|
name = cls._config_base_name()
|
||||||
|
c = Config('device_drivers_%s' % name, _('settings for device drivers'))
|
||||||
c.add_opt('format_map', default=cls.FORMATS,
|
c.add_opt('format_map', default=cls.FORMATS,
|
||||||
help=_('Ordered list of formats the device will accept'))
|
help=_('Ordered list of formats the device will accept'))
|
||||||
c.add_opt('use_subdirs', default=True,
|
c.add_opt('use_subdirs', default=True,
|
||||||
|
@ -58,7 +58,7 @@ class USBMS(CLI, Device):
|
|||||||
|
|
||||||
debug_print ('USBMS: Fetching list of books from device. oncard=', oncard)
|
debug_print ('USBMS: Fetching list of books from device. oncard=', oncard)
|
||||||
|
|
||||||
dummy_bl = BookList(None, None, None)
|
dummy_bl = self.booklist_class(None, None, None)
|
||||||
|
|
||||||
if oncard == 'carda' and not self._card_a_prefix:
|
if oncard == 'carda' and not self._card_a_prefix:
|
||||||
self.report_progress(1.0, _('Getting list of books on device...'))
|
self.report_progress(1.0, _('Getting list of books on device...'))
|
||||||
@ -78,6 +78,8 @@ class USBMS(CLI, Device):
|
|||||||
self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \
|
self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \
|
||||||
self.get_main_ebook_dir()
|
self.get_main_ebook_dir()
|
||||||
|
|
||||||
|
debug_print ('USBMS: dirs are:', prefix, ebook_dirs)
|
||||||
|
|
||||||
# get the metadata cache
|
# get the metadata cache
|
||||||
bl = self.booklist_class(oncard, prefix, self.settings)
|
bl = self.booklist_class(oncard, prefix, self.settings)
|
||||||
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
|
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
|
||||||
@ -233,6 +235,7 @@ class USBMS(CLI, Device):
|
|||||||
book = self.book_class(prefix, lpath, other=info)
|
book = self.book_class(prefix, lpath, other=info)
|
||||||
if book.size is None:
|
if book.size is None:
|
||||||
book.size = os.stat(self.normalize_path(path)).st_size
|
book.size = os.stat(self.normalize_path(path)).st_size
|
||||||
|
book._new_book = True # Must be before add_book
|
||||||
booklists[blist].add_book(book, replace_metadata=True)
|
booklists[blist].add_book(book, replace_metadata=True)
|
||||||
self.report_progress(1.0, _('Adding books to device metadata listing...'))
|
self.report_progress(1.0, _('Adding books to device metadata listing...'))
|
||||||
debug_print('USBMS: finished adding metadata')
|
debug_print('USBMS: finished adding metadata')
|
||||||
@ -273,6 +276,9 @@ class USBMS(CLI, Device):
|
|||||||
self.report_progress(1.0, _('Removing books from device metadata listing...'))
|
self.report_progress(1.0, _('Removing books from device metadata listing...'))
|
||||||
debug_print('USBMS: finished removing metadata for %d books'%(len(paths)))
|
debug_print('USBMS: finished removing metadata for %d books'%(len(paths)))
|
||||||
|
|
||||||
|
# If you override this method and you use book._new_book, then you must
|
||||||
|
# complete the processing before you call this method. The flag is cleared
|
||||||
|
# at the end just before the return
|
||||||
def sync_booklists(self, booklists, end_session=True):
|
def sync_booklists(self, booklists, end_session=True):
|
||||||
debug_print('USBMS: starting sync_booklists')
|
debug_print('USBMS: starting sync_booklists')
|
||||||
|
|
||||||
@ -286,11 +292,18 @@ class USBMS(CLI, Device):
|
|||||||
js = [item.to_json() for item in booklists[listid] if
|
js = [item.to_json() for item in booklists[listid] if
|
||||||
hasattr(item, 'to_json')]
|
hasattr(item, 'to_json')]
|
||||||
with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
|
with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
|
||||||
json.dump(js, f, indent=2, encoding='utf-8')
|
f.write(json.dumps(js, indent=2, encoding='utf-8'))
|
||||||
write_prefix(self._main_prefix, 0)
|
write_prefix(self._main_prefix, 0)
|
||||||
write_prefix(self._card_a_prefix, 1)
|
write_prefix(self._card_a_prefix, 1)
|
||||||
write_prefix(self._card_b_prefix, 2)
|
write_prefix(self._card_b_prefix, 2)
|
||||||
|
|
||||||
|
# Clear the _new_book indication, as we are supposed to be done with
|
||||||
|
# adding books at this point
|
||||||
|
for blist in booklists:
|
||||||
|
if blist is not None:
|
||||||
|
for book in blist:
|
||||||
|
book._new_book = False
|
||||||
|
|
||||||
self.report_progress(1.0, _('Sending metadata to device...'))
|
self.report_progress(1.0, _('Sending metadata to device...'))
|
||||||
debug_print('USBMS: finished sync_booklists')
|
debug_print('USBMS: finished sync_booklists')
|
||||||
|
|
||||||
|
@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin):
|
|||||||
log.debug('stream.name=%s' % stream.name)
|
log.debug('stream.name=%s' % stream.name)
|
||||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||||
mainpath = os.path.join(tdir, mainname)
|
mainpath = os.path.join(tdir, mainname)
|
||||||
#raw_input()
|
|
||||||
|
|
||||||
metadata = get_metadata_from_reader(self._chm_reader)
|
metadata = get_metadata_from_reader(self._chm_reader)
|
||||||
|
|
||||||
@ -92,7 +91,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
|
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
|
||||||
if not metadata.language:
|
if not metadata.language:
|
||||||
oeb.logger.warn(u'Language not specified')
|
oeb.logger.warn(u'Language not specified')
|
||||||
metadata.add('language', get_lang())
|
metadata.add('language', get_lang().replace('_', '-'))
|
||||||
if not metadata.creator:
|
if not metadata.creator:
|
||||||
oeb.logger.warn('Creator not specified')
|
oeb.logger.warn('Creator not specified')
|
||||||
metadata.add('creator', _('Unknown'))
|
metadata.add('creator', _('Unknown'))
|
||||||
@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin):
|
|||||||
log.debug('Found %d section nodes' % len(chapters))
|
log.debug('Found %d section nodes' % len(chapters))
|
||||||
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
||||||
f = open(htmlpath, 'wb')
|
f = open(htmlpath, 'wb')
|
||||||
f.write('<html><head><meta http-equiv="Content-type"'
|
|
||||||
' content="text/html;charset=UTF-8" /></head><body>\n')
|
|
||||||
|
|
||||||
if chapters:
|
if chapters:
|
||||||
|
f.write('<html><head><meta http-equiv="Content-type"'
|
||||||
|
' content="text/html;charset=UTF-8" /></head><body>\n')
|
||||||
path0 = chapters[0][1]
|
path0 = chapters[0][1]
|
||||||
subpath = os.path.dirname(path0)
|
subpath = os.path.dirname(path0)
|
||||||
|
|
||||||
@ -158,7 +156,9 @@ class CHMInput(InputFormatPlugin):
|
|||||||
url = url.encode('utf-8')
|
url = url.encode('utf-8')
|
||||||
f.write(url)
|
f.write(url)
|
||||||
|
|
||||||
f.write("</body></html>")
|
f.write("</body></html>")
|
||||||
|
else:
|
||||||
|
f.write(hhcdata)
|
||||||
f.close()
|
f.close()
|
||||||
return htmlpath
|
return htmlpath
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import os, re
|
|||||||
from mimetypes import guess_type as guess_mimetype
|
from mimetypes import guess_type as guess_mimetype
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows, filesystem_encoding
|
||||||
from calibre.utils.chm.chm import CHMFile
|
from calibre.utils.chm.chm import CHMFile
|
||||||
from calibre.utils.chm.chmlib import (
|
from calibre.utils.chm.chmlib import (
|
||||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||||
@ -78,6 +78,8 @@ class CHMError(Exception):
|
|||||||
class CHMReader(CHMFile):
|
class CHMReader(CHMFile):
|
||||||
def __init__(self, input, log):
|
def __init__(self, input, log):
|
||||||
CHMFile.__init__(self)
|
CHMFile.__init__(self)
|
||||||
|
if isinstance(input, unicode):
|
||||||
|
input = input.encode(filesystem_encoding)
|
||||||
if not self.LoadCHM(input):
|
if not self.LoadCHM(input):
|
||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
self.log = log
|
self.log = log
|
||||||
@ -91,7 +93,6 @@ class CHMReader(CHMFile):
|
|||||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||||
self.hhc_path = self.root + ".hhc"
|
self.hhc_path = self.root + ".hhc"
|
||||||
|
|
||||||
|
|
||||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||||
self._playorder += 1
|
self._playorder += 1
|
||||||
@ -152,6 +153,8 @@ class CHMReader(CHMFile):
|
|||||||
if f.lower() == self.hhc_path.lower():
|
if f.lower() == self.hhc_path.lower():
|
||||||
self.hhc_path = f
|
self.hhc_path = f
|
||||||
break
|
break
|
||||||
|
if self.hhc_path not in files and files:
|
||||||
|
self.hhc_path = files[0]
|
||||||
|
|
||||||
def _reformat(self, data):
|
def _reformat(self, data):
|
||||||
try:
|
try:
|
||||||
@ -159,7 +162,7 @@ class CHMReader(CHMFile):
|
|||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# hit some strange encoding problems...
|
# hit some strange encoding problems...
|
||||||
print "Unable to parse html for cleaning, leaving it :("
|
self.log.exception("Unable to parse html for cleaning, leaving it")
|
||||||
return data
|
return data
|
||||||
# nuke javascript...
|
# nuke javascript...
|
||||||
[s.extract() for s in soup('script')]
|
[s.extract() for s in soup('script')]
|
||||||
|
@ -151,6 +151,7 @@ cpalmdoc_do_compress(buffer *b, char *output) {
|
|||||||
for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
|
for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PyMem_Free(temp.data);
|
||||||
return output - head;
|
return output - head;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,7 +169,9 @@ cpalmdoc_compress(PyObject *self, PyObject *args) {
|
|||||||
for (j = 0; j < input_len; j++)
|
for (j = 0; j < input_len; j++)
|
||||||
b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
|
b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
|
||||||
b.len = input_len;
|
b.len = input_len;
|
||||||
output = (char *)PyMem_Malloc(sizeof(char) * b.len);
|
// Make the output buffer larger than the input as sometimes
|
||||||
|
// compression results in a larger block
|
||||||
|
output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
|
||||||
if (output == NULL) return PyErr_NoMemory();
|
if (output == NULL) return PyErr_NoMemory();
|
||||||
j = cpalmdoc_do_compress(&b, output);
|
j = cpalmdoc_do_compress(&b, output);
|
||||||
if ( j == 0) return PyErr_NoMemory();
|
if ( j == 0) return PyErr_NoMemory();
|
||||||
|
@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
|
|||||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||||
|
|
||||||
LIGATURES = {
|
LIGATURES = {
|
||||||
u'\u00c6': u'AE',
|
# u'\u00c6': u'AE',
|
||||||
u'\u00e6': u'ae',
|
# u'\u00e6': u'ae',
|
||||||
u'\u0152': u'OE',
|
# u'\u0152': u'OE',
|
||||||
u'\u0153': u'oe',
|
# u'\u0153': u'oe',
|
||||||
u'\u0132': u'IJ',
|
# u'\u0132': u'IJ',
|
||||||
u'\u0133': u'ij',
|
# u'\u0133': u'ij',
|
||||||
u'\u1D6B': u'ue',
|
# u'\u1D6B': u'ue',
|
||||||
u'\uFB00': u'ff',
|
u'\uFB00': u'ff',
|
||||||
u'\uFB01': u'fi',
|
u'\uFB01': u'fi',
|
||||||
u'\uFB02': u'fl',
|
u'\uFB02': u'fl',
|
||||||
@ -107,9 +107,21 @@ class CSSPreProcessor(object):
|
|||||||
|
|
||||||
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
||||||
|
|
||||||
def __call__(self, data):
|
def __call__(self, data, add_namespace=False):
|
||||||
|
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
|
||||||
data = self.PAGE_PAT.sub('', data)
|
data = self.PAGE_PAT.sub('', data)
|
||||||
return data
|
if not add_namespace:
|
||||||
|
return data
|
||||||
|
ans, namespaced = [], False
|
||||||
|
for line in data.splitlines():
|
||||||
|
ll = line.lstrip()
|
||||||
|
if not (namespaced or ll.startswith('@import') or
|
||||||
|
ll.startswith('@charset')):
|
||||||
|
ans.append(XHTML_CSS_NAMESPACE.strip())
|
||||||
|
namespaced = True
|
||||||
|
ans.append(line)
|
||||||
|
|
||||||
|
return u'\n'.join(ans)
|
||||||
|
|
||||||
class HTMLPreProcessor(object):
|
class HTMLPreProcessor(object):
|
||||||
|
|
||||||
@ -268,7 +280,7 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
if getattr(self.extra_opts, 'remove_footer', None):
|
if getattr(self.extra_opts, 'remove_footer', None):
|
||||||
try:
|
try:
|
||||||
rules.insert(0
|
rules.insert(0,
|
||||||
(re.compile(self.extra_opts.footer_regex), lambda match : '')
|
(re.compile(self.extra_opts.footer_regex), lambda match : '')
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
|
58
src/calibre/ebooks/epub/fix/__init__.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.customize import Plugin
|
||||||
|
|
||||||
|
class InvalidEpub(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ePubFixer(Plugin):
|
||||||
|
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
type = _('ePub Fixer')
|
||||||
|
can_be_disabled = True
|
||||||
|
|
||||||
|
# API that subclasses must implement {{{
|
||||||
|
@property
|
||||||
|
def short_description(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long_description(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fix_name(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def options(self):
|
||||||
|
'''
|
||||||
|
Return a list of 4-tuples
|
||||||
|
(option_name, type, default, help_text)
|
||||||
|
type is one of 'bool', 'int', 'string'
|
||||||
|
'''
|
||||||
|
return []
|
||||||
|
|
||||||
|
def run(self, container, opts, log, fix=False):
|
||||||
|
raise NotImplementedError
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def add_options_to_parser(self, parser):
|
||||||
|
parser.add_option('--' + self.fix_name.replace('_', '-'),
|
||||||
|
help=self.long_description, action='store_true', default=False)
|
||||||
|
for option in self.options:
|
||||||
|
action = 'store'
|
||||||
|
if option[1] == 'bool':
|
||||||
|
action = 'store_true'
|
||||||
|
kwargs = {'action': action, 'default':option[2], 'help':option[3]}
|
||||||
|
if option[1] != 'bool':
|
||||||
|
kwargs['type'] = option[1]
|
||||||
|
parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
|
||||||
|
|
200
src/calibre/ebooks/epub/fix/container.py
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, posixpath, urllib, sys, re
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.fix import InvalidEpub
|
||||||
|
from calibre import guess_type, prepare_string_for_xml
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.constants import iswindows
|
||||||
|
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
||||||
|
|
||||||
|
exists, join = os.path.exists, os.path.join
|
||||||
|
|
||||||
|
OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
|
||||||
|
OPF_NS = 'http://www.idpf.org/2007/opf'
|
||||||
|
|
||||||
|
class Container(object):
|
||||||
|
|
||||||
|
META_INF = {
|
||||||
|
'container.xml' : True,
|
||||||
|
'manifest.xml' : False,
|
||||||
|
'encryption.xml' : False,
|
||||||
|
'metadata.xml' : False,
|
||||||
|
'signatures.xml' : False,
|
||||||
|
'rights.xml' : False,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, path, log):
|
||||||
|
self.root = os.path.abspath(path)
|
||||||
|
self.log = log
|
||||||
|
self.dirtied = set([])
|
||||||
|
self.cache = {}
|
||||||
|
self.mime_map = {}
|
||||||
|
|
||||||
|
if exists(join(self.root, 'mimetype')):
|
||||||
|
os.remove(join(self.root, 'mimetype'))
|
||||||
|
|
||||||
|
container_path = join(self.root, 'META-INF', 'container.xml')
|
||||||
|
if not exists(container_path):
|
||||||
|
raise InvalidEpub('No META-INF/container.xml in epub')
|
||||||
|
self.container = etree.fromstring(open(container_path, 'rb').read())
|
||||||
|
opf_files = self.container.xpath((
|
||||||
|
r'child::ocf:rootfiles/ocf:rootfile'
|
||||||
|
'[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
|
||||||
|
), namespaces={'ocf':OCF_NS}
|
||||||
|
)
|
||||||
|
if not opf_files:
|
||||||
|
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
|
||||||
|
opf_path = os.path.join(self.root,
|
||||||
|
*opf_files[0].get('full-path').split('/'))
|
||||||
|
if not exists(opf_path):
|
||||||
|
raise InvalidEpub('OPF file does not exist at location pointed to'
|
||||||
|
' by META-INF/container.xml')
|
||||||
|
|
||||||
|
# Map of relative paths with / separators to absolute
|
||||||
|
# paths on filesystem with os separators
|
||||||
|
self.name_map = {}
|
||||||
|
for dirpath, dirnames, filenames in os.walk(self.root):
|
||||||
|
for f in filenames:
|
||||||
|
path = join(dirpath, f)
|
||||||
|
name = os.path.relpath(path, self.root).replace(os.sep, '/')
|
||||||
|
self.name_map[name] = path
|
||||||
|
if path == opf_path:
|
||||||
|
self.opf_name = name
|
||||||
|
self.mime_map[name] = guess_type('a.opf')[0]
|
||||||
|
|
||||||
|
for item in self.opf.xpath(
|
||||||
|
'//opf:manifest/opf:item[@href and @media-type]',
|
||||||
|
namespaces={'opf':OPF_NS}):
|
||||||
|
href = item.get('href')
|
||||||
|
self.mime_map[self.href_to_name(href,
|
||||||
|
posixpath.dirname(self.opf_name))] = item.get('media-type')
|
||||||
|
|
||||||
|
def manifest_worthy_names(self):
|
||||||
|
for name in self.name_map:
|
||||||
|
if name.endswith('.opf'): continue
|
||||||
|
if name.startswith('META-INF') and \
|
||||||
|
posixpath.basename(name) in self.META_INF: continue
|
||||||
|
yield name
|
||||||
|
|
||||||
|
def delete_name(self, name):
|
||||||
|
self.mime_map.pop(name, None)
|
||||||
|
path = self.name_map[name]
|
||||||
|
os.remove(path)
|
||||||
|
self.name_map.pop(name)
|
||||||
|
|
||||||
|
def manifest_item_for_name(self, name):
|
||||||
|
href = self.name_to_href(name,
|
||||||
|
posixpath.dirname(self.opf_name))
|
||||||
|
q = prepare_string_for_xml(href, attribute=True)
|
||||||
|
existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
|
||||||
|
namespaces={'opf':OPF_NS})
|
||||||
|
if not existing:
|
||||||
|
return None
|
||||||
|
return existing[0]
|
||||||
|
|
||||||
|
def add_name_to_manifest(self, name):
|
||||||
|
item = self.manifest_item_for_name(name)
|
||||||
|
if item is not None:
|
||||||
|
return
|
||||||
|
manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
|
||||||
|
item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
|
||||||
|
href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
|
||||||
|
id=self.generate_manifest_id())
|
||||||
|
mt = guess_type(posixpath.basename(name))[0]
|
||||||
|
if not mt:
|
||||||
|
mt = 'application/octest-stream'
|
||||||
|
item.set('media-type', mt)
|
||||||
|
manifest.append(item)
|
||||||
|
|
||||||
|
def generate_manifest_id(self):
|
||||||
|
items = self.opf.xpath('//opf:manifest/opf:item[@id]',
|
||||||
|
namespaces={'opf':OPF_NS})
|
||||||
|
ids = set([x.get('id') for x in items])
|
||||||
|
for x in xrange(sys.maxint):
|
||||||
|
c = 'id%d'%x
|
||||||
|
if c not in ids:
|
||||||
|
return c
|
||||||
|
|
||||||
|
@property
|
||||||
|
def opf(self):
|
||||||
|
return self.get(self.opf_name)
|
||||||
|
|
||||||
|
def href_to_name(self, href, base=''):
|
||||||
|
href = urllib.unquote(href.partition('#')[0])
|
||||||
|
name = href
|
||||||
|
if base:
|
||||||
|
name = posixpath.join(base, href)
|
||||||
|
return name
|
||||||
|
|
||||||
|
def name_to_href(self, name, base):
|
||||||
|
if not base:
|
||||||
|
return name
|
||||||
|
return posixpath.relpath(name, base)
|
||||||
|
|
||||||
|
def get_raw(self, name):
|
||||||
|
path = self.name_map[name]
|
||||||
|
return open(path, 'rb').read()
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
if name in self.cache:
|
||||||
|
return self.cache[name]
|
||||||
|
raw = self.get_raw(name)
|
||||||
|
if name in self.mime_map:
|
||||||
|
raw = self._parse(raw, self.mime_map[name])
|
||||||
|
self.cache[name] = raw
|
||||||
|
return raw
|
||||||
|
|
||||||
|
def set(self, name, val):
|
||||||
|
self.cache[name] = val
|
||||||
|
self.dirtied.add(name)
|
||||||
|
|
||||||
|
def _parse(self, raw, mimetype):
|
||||||
|
mt = mimetype.lower()
|
||||||
|
if mt.endswith('+xml'):
|
||||||
|
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
|
||||||
|
raw = xml_to_unicode(raw,
|
||||||
|
strip_encoding_pats=True, assume_utf8=True,
|
||||||
|
resolve_entities=True)[0].strip()
|
||||||
|
idx = raw.find('<html')
|
||||||
|
if idx == -1:
|
||||||
|
idx = raw.find('<HTML')
|
||||||
|
if idx > -1:
|
||||||
|
pre = raw[:idx]
|
||||||
|
raw = raw[idx:]
|
||||||
|
if '<!DOCTYPE' in pre:
|
||||||
|
user_entities = {}
|
||||||
|
for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
|
||||||
|
val = match.group(2)
|
||||||
|
if val.startswith('"') and val.endswith('"'):
|
||||||
|
val = val[1:-1]
|
||||||
|
user_entities[match.group(1)] = val
|
||||||
|
if user_entities:
|
||||||
|
pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
|
||||||
|
raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
|
||||||
|
return etree.fromstring(raw, parser=parser)
|
||||||
|
return raw
|
||||||
|
|
||||||
|
def write(self, path):
|
||||||
|
for name in self.dirtied:
|
||||||
|
data = self.cache[name]
|
||||||
|
raw = data
|
||||||
|
if hasattr(data, 'xpath'):
|
||||||
|
raw = etree.tostring(data, encoding='utf-8',
|
||||||
|
xml_declaration=True)
|
||||||
|
with open(self.name_map[name], 'wb') as f:
|
||||||
|
f.write(raw)
|
||||||
|
self.dirtied.clear()
|
||||||
|
zf = ZipFile(path, 'w')
|
||||||
|
zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
|
||||||
|
compression=ZIP_STORED)
|
||||||
|
zf.add_dir(self.root)
|
||||||
|
zf.close()
|
||||||
|
|
82
src/calibre/ebooks/epub/fix/epubcheck.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
|
||||||
|
from calibre.utils.date import parse_date, strptime
|
||||||
|
|
||||||
|
|
||||||
|
class Epubcheck(ePubFixer):
|
||||||
|
|
||||||
|
name = 'Workaround epubcheck bugs'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def short_description(self):
|
||||||
|
return _('Workaround epubcheck bugs')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long_description(self):
|
||||||
|
return _('Workarounds for bugs in the latest release of epubcheck. '
|
||||||
|
'epubcheck reports many things as errors that are not '
|
||||||
|
'actually errors. epub-fix will try to detect these and replace '
|
||||||
|
'them with constructs that epubcheck likes. This may cause '
|
||||||
|
'significant changes to your epub, complain to the epubcheck '
|
||||||
|
'project.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fix_name(self):
|
||||||
|
return 'epubcheck'
|
||||||
|
|
||||||
|
def fix_pubdates(self):
|
||||||
|
dirtied = False
|
||||||
|
opf = self.container.opf
|
||||||
|
for dcdate in opf.xpath('//dc:date',
|
||||||
|
namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
|
||||||
|
raw = dcdate.text
|
||||||
|
if not raw: raw = ''
|
||||||
|
default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
|
||||||
|
try:
|
||||||
|
ts = parse_date(raw, assume_utc=False, as_utc=True,
|
||||||
|
default=default)
|
||||||
|
except:
|
||||||
|
raise InvalidEpub('Invalid date set in OPF', raw)
|
||||||
|
sval = ts.strftime('%Y-%m-%d')
|
||||||
|
if sval != raw:
|
||||||
|
self.log.error(
|
||||||
|
'OPF contains date', raw, 'that epubcheck does not like')
|
||||||
|
if self.fix:
|
||||||
|
dcdate.text = sval
|
||||||
|
self.log('\tReplaced', raw, 'with', sval)
|
||||||
|
dirtied = True
|
||||||
|
if dirtied:
|
||||||
|
self.container.set(self.container.opf_name, opf)
|
||||||
|
|
||||||
|
def fix_preserve_aspect_ratio(self):
|
||||||
|
for name in self.container.name_map:
|
||||||
|
mt = self.container.mime_map.get(name, '')
|
||||||
|
if mt.lower() == 'application/xhtml+xml':
|
||||||
|
root = self.container.get(name)
|
||||||
|
dirtied = False
|
||||||
|
for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
|
||||||
|
namespaces={'svg':'http://www.w3.org/2000/svg'}):
|
||||||
|
self.log.error('Found <svg> element with'
|
||||||
|
' preserveAspectRatio="none" which epubcheck '
|
||||||
|
'cannot handle')
|
||||||
|
if self.fix:
|
||||||
|
svg.set('preserveAspectRatio', 'xMidYMid meet')
|
||||||
|
dirtied = True
|
||||||
|
self.log('\tReplaced none with xMidYMid meet')
|
||||||
|
if dirtied:
|
||||||
|
self.container.set(name, root)
|
||||||
|
|
||||||
|
|
||||||
|
def run(self, container, opts, log, fix=False):
|
||||||
|
self.container = container
|
||||||
|
self.opts = opts
|
||||||
|
self.log = log
|
||||||
|
self.fix = fix
|
||||||
|
self.fix_pubdates()
|
||||||
|
self.fix_preserve_aspect_ratio()
|
56
src/calibre/ebooks/epub/fix/main.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre.customize.ui import epub_fixers
|
||||||
|
from calibre.ebooks.epub.fix.container import Container
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(usage=_(
|
||||||
|
'%prog [options] file.epub\n\n'
|
||||||
|
'Fix common problems in EPUB files that can cause them '
|
||||||
|
'to be rejected by poorly designed publishing services.\n\n'
|
||||||
|
'By default, no fixing is done and messages are printed out '
|
||||||
|
'for each error detected. Use the options to control which errors '
|
||||||
|
'are automatically fixed.'))
|
||||||
|
for fixer in epub_fixers():
|
||||||
|
fixer.add_options_to_parser(parser)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def run(epub, opts, log):
|
||||||
|
with TemporaryDirectory('_epub-fix') as tdir:
|
||||||
|
with CurrentDir(tdir):
|
||||||
|
zf = ZipFile(epub)
|
||||||
|
zf.extractall()
|
||||||
|
zf.close()
|
||||||
|
container = Container(tdir, log)
|
||||||
|
for fixer in epub_fixers():
|
||||||
|
fix = getattr(opts, fixer.fix_name, False)
|
||||||
|
fixer.run(container, opts, log, fix=fix)
|
||||||
|
container.write(epub)
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
print
|
||||||
|
default_log.error(_('You must specify an epub file'))
|
||||||
|
return
|
||||||
|
epub = os.path.abspath(args[1])
|
||||||
|
run(epub, opts, default_log)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
49
src/calibre/ebooks/epub/fix/unmanifested.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.fix import ePubFixer
|
||||||
|
|
||||||
|
class Unmanifested(ePubFixer):
|
||||||
|
|
||||||
|
name = 'Fix unmanifested files'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def short_description(self):
|
||||||
|
return _('Fix unmanifested files')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long_description(self):
|
||||||
|
return _('Fix unmanifested files. epub-fix can either add them to '
|
||||||
|
'the manifest or delete them as specified by the '
|
||||||
|
'delete unmanifested option.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fix_name(self):
|
||||||
|
return 'unmanifested'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def options(self):
|
||||||
|
return [('delete_unmanifested', 'bool', False,
|
||||||
|
_('Delete unmanifested files instead of adding them to the manifest'))]
|
||||||
|
|
||||||
|
def run(self, container, opts, log, fix=False):
|
||||||
|
dirtied = False
|
||||||
|
for name in list(container.manifest_worthy_names()):
|
||||||
|
item = container.manifest_item_for_name(name)
|
||||||
|
if item is None:
|
||||||
|
log.error(name, 'not in manifest')
|
||||||
|
if fix:
|
||||||
|
if opts.delete_unmanifested:
|
||||||
|
container.delete_name(name)
|
||||||
|
log('\tDeleted')
|
||||||
|
else:
|
||||||
|
container.add_name_to_manifest(name)
|
||||||
|
log('\tAdded to manifest')
|
||||||
|
dirtied = True
|
||||||
|
if dirtied:
|
||||||
|
container.set(container.opf_name, container.opf)
|
@ -380,10 +380,9 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
sel = '.'+lb.get('class')
|
sel = '.'+lb.get('class')
|
||||||
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||||
if sel == rule.selectorList.selectorText:
|
if sel == rule.selectorList.selectorText:
|
||||||
val = rule.style.removeProperty('margin-left')
|
rule.style.removeProperty('margin-left')
|
||||||
pval = rule.style.getProperty('padding-left')
|
# padding-left breaks rendering in webkit and gecko
|
||||||
if val and not pval:
|
rule.style.removeProperty('padding-left')
|
||||||
rule.style.setProperty('padding-left', val)
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from itertools import izip
|
|||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre.constants import islinux, isfreebsd
|
from calibre.constants import islinux, isfreebsd, iswindows
|
||||||
from calibre import unicode_path
|
from calibre import unicode_path
|
||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
@ -32,9 +32,14 @@ class Link(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def url_to_local_path(cls, url, base):
|
def url_to_local_path(cls, url, base):
|
||||||
path = urlunparse(('', '', url.path, url.params, url.query, ''))
|
path = url.path
|
||||||
|
isabs = False
|
||||||
|
if iswindows and path.startswith('/'):
|
||||||
|
path = path[1:]
|
||||||
|
isabs = True
|
||||||
|
path = urlunparse(('', '', path, url.params, url.query, ''))
|
||||||
path = unquote(path)
|
path = unquote(path)
|
||||||
if os.path.isabs(path):
|
if isabs or os.path.isabs(path):
|
||||||
return path
|
return path
|
||||||
return os.path.abspath(os.path.join(base, path))
|
return os.path.abspath(os.path.join(base, path))
|
||||||
|
|
||||||
@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
xpath
|
xpath
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
import cssutils
|
import cssutils
|
||||||
|
self.OEB_STYLES = OEB_STYLES
|
||||||
oeb = create_oebbook(log, None, opts, self,
|
oeb = create_oebbook(log, None, opts, self,
|
||||||
encoding=opts.input_encoding, populate=False)
|
encoding=opts.input_encoding, populate=False)
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
@ -323,7 +329,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
|
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
|
||||||
if not metadata.language:
|
if not metadata.language:
|
||||||
oeb.logger.warn(u'Language not specified')
|
oeb.logger.warn(u'Language not specified')
|
||||||
metadata.add('language', get_lang())
|
metadata.add('language', get_lang().replace('_', '-'))
|
||||||
if not metadata.creator:
|
if not metadata.creator:
|
||||||
oeb.logger.warn('Creator not specified')
|
oeb.logger.warn('Creator not specified')
|
||||||
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
||||||
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||||
|
|
||||||
for item in oeb.manifest.values():
|
for item in oeb.manifest.values():
|
||||||
if item.media_type in OEB_STYLES:
|
if item.media_type in self.OEB_STYLES:
|
||||||
dpath = None
|
dpath = None
|
||||||
for path, href in self.added_resources.items():
|
for path, href in self.added_resources.items():
|
||||||
if href == item.href:
|
if href == item.href:
|
||||||
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
oeb.container = DirContainer(os.getcwdu(), oeb.log)
|
oeb.container = DirContainer(os.getcwdu(), oeb.log)
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
|
def link_to_local_path(self, link_, base=None):
|
||||||
|
if not isinstance(link_, unicode):
|
||||||
|
try:
|
||||||
|
link_ = link_.decode('utf-8', 'error')
|
||||||
|
except:
|
||||||
|
self.log.warn('Failed to decode link %r. Ignoring'%link_)
|
||||||
|
return None, None
|
||||||
|
try:
|
||||||
|
l = Link(link_, base if base else os.getcwdu())
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to process link: %r'%link_)
|
||||||
|
return None, None
|
||||||
|
if l.path is None:
|
||||||
|
# Not a local resource
|
||||||
|
return None, None
|
||||||
|
link = l.path.replace('/', os.sep).strip()
|
||||||
|
frag = l.fragment
|
||||||
|
if not link:
|
||||||
|
return None, None
|
||||||
|
return link, frag
|
||||||
|
|
||||||
def resource_adder(self, link_, base=None):
|
def resource_adder(self, link_, base=None):
|
||||||
link = self.urlnormalize(link_)
|
link, frag = self.link_to_local_path(link_, base=base)
|
||||||
link, frag = self.urldefrag(link)
|
if link is None:
|
||||||
link = unquote(link).replace('/', os.sep)
|
|
||||||
if not link.strip():
|
|
||||||
return link_
|
return link_
|
||||||
try:
|
try:
|
||||||
if base and not os.path.isabs(link):
|
if base and not os.path.isabs(link):
|
||||||
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
item = self.oeb.manifest.add(id, href, media_type)
|
item = self.oeb.manifest.add(id, href, media_type)
|
||||||
item.html_input_href = bhref
|
item.html_input_href = bhref
|
||||||
|
if guessed in self.OEB_STYLES:
|
||||||
|
item.override_css_fetch = partial(
|
||||||
|
self.css_import_handler, os.path.dirname(link))
|
||||||
item.data
|
item.data
|
||||||
self.added_resources[link] = href
|
self.added_resources[link] = href
|
||||||
|
|
||||||
@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
nlink = '#'.join((nlink, frag))
|
nlink = '#'.join((nlink, frag))
|
||||||
return nlink
|
return nlink
|
||||||
|
|
||||||
|
def css_import_handler(self, base, href):
|
||||||
|
link, frag = self.link_to_local_path(href, base=base)
|
||||||
|
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
|
||||||
|
return (None, None)
|
||||||
|
try:
|
||||||
|
raw = open(link, 'rb').read().decode('utf-8', 'replace')
|
||||||
|
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to read CSS file: %r'%link)
|
||||||
|
return (None, None)
|
||||||
|
return (None, raw)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
|
|||||||
xml = d.to_xml(write_files=True)
|
xml = d.to_xml(write_files=True)
|
||||||
if options.verbose > 2:
|
if options.verbose > 2:
|
||||||
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
char_button_map = {}
|
char_button_map = {}
|
||||||
for x in doc.xpath('//CharButton[@refobj]'):
|
for x in doc.xpath('//CharButton[@refobj]'):
|
||||||
|
@ -870,7 +870,7 @@ class Text(LRFStream):
|
|||||||
open_containers = collections.deque()
|
open_containers = collections.deque()
|
||||||
for c in self.content:
|
for c in self.content:
|
||||||
if isinstance(c, basestring):
|
if isinstance(c, basestring):
|
||||||
s += prepare_string_for_xml(c)
|
s += prepare_string_for_xml(c).replace('\0', '')
|
||||||
elif c is None:
|
elif c is None:
|
||||||
if open_containers:
|
if open_containers:
|
||||||
p = open_containers.pop()
|
p = open_containers.pop()
|
||||||
|
@ -268,7 +268,7 @@ class MetaInformation(object):
|
|||||||
):
|
):
|
||||||
prints(x, getattr(self, x, 'None'))
|
prints(x, getattr(self, x, 'None'))
|
||||||
|
|
||||||
def smart_update(self, mi):
|
def smart_update(self, mi, replace_tags=False):
|
||||||
'''
|
'''
|
||||||
Merge the information in C{mi} into self. In case of conflicts, the information
|
Merge the information in C{mi} into self. In case of conflicts, the information
|
||||||
in C{mi} takes precedence, unless the information in mi is NULL.
|
in C{mi} takes precedence, unless the information in mi is NULL.
|
||||||
@ -282,7 +282,7 @@ class MetaInformation(object):
|
|||||||
for attr in ('author_sort', 'title_sort', 'category',
|
for attr in ('author_sort', 'title_sort', 'category',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||||
'cover', 'language', 'guide', 'book_producer',
|
'cover', 'guide', 'book_producer',
|
||||||
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
|
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
|
||||||
'publication_type', 'uuid'):
|
'publication_type', 'uuid'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
@ -291,7 +291,10 @@ class MetaInformation(object):
|
|||||||
setattr(self, attr, val)
|
setattr(self, attr, val)
|
||||||
|
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
self.tags += mi.tags
|
if replace_tags:
|
||||||
|
self.tags = mi.tags
|
||||||
|
else:
|
||||||
|
self.tags += mi.tags
|
||||||
self.tags = list(set(self.tags))
|
self.tags = list(set(self.tags))
|
||||||
|
|
||||||
if mi.author_sort_map:
|
if mi.author_sort_map:
|
||||||
@ -314,6 +317,11 @@ class MetaInformation(object):
|
|||||||
if len(other_comments.strip()) > len(my_comments.strip()):
|
if len(other_comments.strip()) > len(my_comments.strip()):
|
||||||
self.comments = other_comments
|
self.comments = other_comments
|
||||||
|
|
||||||
|
other_lang = getattr(mi, 'language', None)
|
||||||
|
if other_lang and other_lang.lower() != 'und':
|
||||||
|
self.language = other_lang
|
||||||
|
|
||||||
|
|
||||||
def format_series_index(self):
|
def format_series_index(self):
|
||||||
try:
|
try:
|
||||||
x = float(self.series_index)
|
x = float(self.series_index)
|
||||||
|
15
src/calibre/ebooks/metadata/covers.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.customize import Plugin
|
||||||
|
|
||||||
|
class CoverDownload(Plugin):
|
||||||
|
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
type = _('Cover download')
|
@ -15,7 +15,6 @@ from calibre.utils.config import OptionParser
|
|||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
|
||||||
DOUBAN_API_KEY = None
|
|
||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
'atom' : 'http://www.w3.org/2005/Atom',
|
'atom' : 'http://www.w3.org/2005/Atom',
|
||||||
@ -35,13 +34,15 @@ date = XPath("descendant::db:attribute[@name='pubdate']")
|
|||||||
creator = XPath("descendant::db:attribute[@name='author']")
|
creator = XPath("descendant::db:attribute[@name='author']")
|
||||||
tag = XPath("descendant::db:tag")
|
tag = XPath("descendant::db:tag")
|
||||||
|
|
||||||
|
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
||||||
|
|
||||||
class DoubanBooks(MetadataSource):
|
class DoubanBooks(MetadataSource):
|
||||||
|
|
||||||
name = 'Douban Books'
|
name = 'Douban Books'
|
||||||
description = _('Downloads metadata from Douban.com')
|
description = _('Downloads metadata from Douban.com')
|
||||||
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
|
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
|
||||||
author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
|
author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
|
||||||
version = (1, 0, 0) # The version number of this plugin
|
version = (1, 0, 1) # The version number of this plugin
|
||||||
|
|
||||||
def fetch(self):
|
def fetch(self):
|
||||||
try:
|
try:
|
||||||
@ -65,7 +66,7 @@ class Query(object):
|
|||||||
type = "search"
|
type = "search"
|
||||||
|
|
||||||
def __init__(self, title=None, author=None, publisher=None, isbn=None,
|
def __init__(self, title=None, author=None, publisher=None, isbn=None,
|
||||||
max_results=20, start_index=1):
|
max_results=20, start_index=1, api_key=''):
|
||||||
assert not(title is None and author is None and publisher is None and \
|
assert not(title is None and author is None and publisher is None and \
|
||||||
isbn is None)
|
isbn is None)
|
||||||
assert (int(max_results) < 21)
|
assert (int(max_results) < 21)
|
||||||
@ -89,16 +90,16 @@ class Query(object):
|
|||||||
|
|
||||||
if self.type == "isbn":
|
if self.type == "isbn":
|
||||||
self.url = self.ISBN_URL + q
|
self.url = self.ISBN_URL + q
|
||||||
if DOUBAN_API_KEY is not None:
|
if api_key != '':
|
||||||
self.url = self.url + "?apikey=" + DOUBAN_API_KEY
|
self.url = self.url + "?apikey=" + api_key
|
||||||
else:
|
else:
|
||||||
self.url = self.SEARCH_URL+urlencode({
|
self.url = self.SEARCH_URL+urlencode({
|
||||||
'q':q,
|
'q':q,
|
||||||
'max-results':max_results,
|
'max-results':max_results,
|
||||||
'start-index':start_index,
|
'start-index':start_index,
|
||||||
})
|
})
|
||||||
if DOUBAN_API_KEY is not None:
|
if api_key != '':
|
||||||
self.url = self.url + "&apikey=" + DOUBAN_API_KEY
|
self.url = self.url + "&apikey=" + api_key
|
||||||
|
|
||||||
def __call__(self, browser, verbose):
|
def __call__(self, browser, verbose):
|
||||||
if verbose:
|
if verbose:
|
||||||
@ -177,7 +178,7 @@ class ResultList(list):
|
|||||||
d = None
|
d = None
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def populate(self, entries, browser, verbose=False):
|
def populate(self, entries, browser, verbose=False, api_key=''):
|
||||||
for x in entries:
|
for x in entries:
|
||||||
try:
|
try:
|
||||||
id_url = entry_id(x)[0].text
|
id_url = entry_id(x)[0].text
|
||||||
@ -186,8 +187,8 @@ class ResultList(list):
|
|||||||
report(verbose)
|
report(verbose)
|
||||||
mi = MetaInformation(title, self.get_authors(x))
|
mi = MetaInformation(title, self.get_authors(x))
|
||||||
try:
|
try:
|
||||||
if DOUBAN_API_KEY is not None:
|
if api_key != '':
|
||||||
id_url = id_url + "?apikey=" + DOUBAN_API_KEY
|
id_url = id_url + "?apikey=" + api_key
|
||||||
raw = browser.open(id_url).read()
|
raw = browser.open(id_url).read()
|
||||||
feed = etree.fromstring(raw)
|
feed = etree.fromstring(raw)
|
||||||
x = entry(feed)[0]
|
x = entry(feed)[0]
|
||||||
@ -203,12 +204,16 @@ class ResultList(list):
|
|||||||
self.append(mi)
|
self.append(mi)
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None,
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
verbose=False, max_results=40):
|
verbose=False, max_results=40, api_key=None):
|
||||||
br = browser()
|
br = browser()
|
||||||
start, entries = 1, []
|
start, entries = 1, []
|
||||||
|
|
||||||
|
if api_key is None:
|
||||||
|
api_key = CALIBRE_DOUBAN_API_KEY
|
||||||
|
|
||||||
while start > 0 and len(entries) <= max_results:
|
while start > 0 and len(entries) <= max_results:
|
||||||
new, start = Query(title=title, author=author, publisher=publisher,
|
new, start = Query(title=title, author=author, publisher=publisher,
|
||||||
isbn=isbn, max_results=max_results, start_index=start)(br, verbose)
|
isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
|
||||||
if not new:
|
if not new:
|
||||||
break
|
break
|
||||||
entries.extend(new)
|
entries.extend(new)
|
||||||
@ -216,7 +221,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
|
|||||||
entries = entries[:max_results]
|
entries = entries[:max_results]
|
||||||
|
|
||||||
ans = ResultList()
|
ans = ResultList()
|
||||||
ans.populate(entries, br, verbose)
|
ans.populate(entries, br, verbose, api_key)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
|
@ -10,10 +10,11 @@ from calibre import prints
|
|||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
|
from calibre.ebooks.metadata.library_thing import check_for_cover
|
||||||
|
|
||||||
metadata_config = None
|
metadata_config = None
|
||||||
|
|
||||||
class MetadataSource(Plugin):
|
class MetadataSource(Plugin): # {{{
|
||||||
|
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
@ -130,7 +131,9 @@ class MetadataSource(Plugin):
|
|||||||
def customization_help(self):
|
def customization_help(self):
|
||||||
return 'This plugin can only be customized using the GUI'
|
return 'This plugin can only be customized using the GUI'
|
||||||
|
|
||||||
class GoogleBooks(MetadataSource):
|
# }}}
|
||||||
|
|
||||||
|
class GoogleBooks(MetadataSource): # {{{
|
||||||
|
|
||||||
name = 'Google Books'
|
name = 'Google Books'
|
||||||
description = _('Downloads metadata from Google Books')
|
description = _('Downloads metadata from Google Books')
|
||||||
@ -145,8 +148,9 @@ class GoogleBooks(MetadataSource):
|
|||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class ISBNDB(MetadataSource):
|
class ISBNDB(MetadataSource): # {{{
|
||||||
|
|
||||||
name = 'IsbnDB'
|
name = 'IsbnDB'
|
||||||
description = _('Downloads metadata from isbndb.com')
|
description = _('Downloads metadata from isbndb.com')
|
||||||
@ -181,7 +185,9 @@ class ISBNDB(MetadataSource):
|
|||||||
'and enter your access key below.')
|
'and enter your access key below.')
|
||||||
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
|
||||||
|
|
||||||
class Amazon(MetadataSource):
|
# }}}
|
||||||
|
|
||||||
|
class Amazon(MetadataSource): # {{{
|
||||||
|
|
||||||
name = 'Amazon'
|
name = 'Amazon'
|
||||||
metadata_type = 'social'
|
metadata_type = 'social'
|
||||||
@ -198,37 +204,27 @@ class Amazon(MetadataSource):
|
|||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
class LibraryThing(MetadataSource):
|
# }}}
|
||||||
|
|
||||||
|
class LibraryThing(MetadataSource): # {{{
|
||||||
|
|
||||||
name = 'LibraryThing'
|
name = 'LibraryThing'
|
||||||
metadata_type = 'social'
|
metadata_type = 'social'
|
||||||
description = _('Downloads series information from librarything.com')
|
description = _('Downloads series/tags/rating information from librarything.com')
|
||||||
|
|
||||||
def fetch(self):
|
def fetch(self):
|
||||||
if not self.isbn:
|
if not self.isbn:
|
||||||
return
|
return
|
||||||
from calibre import browser
|
from calibre.ebooks.metadata.library_thing import get_social_metadata
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
import json
|
|
||||||
br = browser()
|
|
||||||
try:
|
try:
|
||||||
raw = br.open(
|
self.results = get_social_metadata(self.title, self.book_author,
|
||||||
'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn
|
self.publisher, self.isbn)
|
||||||
).read()
|
|
||||||
data = json.loads(raw)
|
|
||||||
if not data:
|
|
||||||
return
|
|
||||||
if 'error' in data:
|
|
||||||
raise Exception(data['error'])
|
|
||||||
if 'series' in data and 'series_index' in data:
|
|
||||||
mi = MetaInformation(self.title, [])
|
|
||||||
mi.series = data['series']
|
|
||||||
mi.series_index = data['series_index']
|
|
||||||
self.results = mi
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.exception = e
|
self.exception = e
|
||||||
self.tb = traceback.format_exc()
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
def result_index(source, result):
|
def result_index(source, result):
|
||||||
if not result.isbn:
|
if not result.isbn:
|
||||||
@ -268,6 +264,26 @@ class MetadataSources(object):
|
|||||||
for s in self.sources:
|
for s in self.sources:
|
||||||
s.join()
|
s.join()
|
||||||
|
|
||||||
|
def filter_metadata_results(item):
|
||||||
|
keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
|
||||||
|
for keyword in keywords:
|
||||||
|
if item.publisher and keyword in item.publisher.lower():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def do_cover_check(item):
|
||||||
|
item.has_cover = False
|
||||||
|
if item.isbn:
|
||||||
|
try:
|
||||||
|
item.has_cover = check_for_cover(item.isbn)
|
||||||
|
except:
|
||||||
|
pass # Cover not found
|
||||||
|
|
||||||
|
def check_for_covers(items):
|
||||||
|
threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
|
||||||
|
for t in threads: t.start()
|
||||||
|
for t in threads: t.join()
|
||||||
|
|
||||||
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||||
verbose=0):
|
verbose=0):
|
||||||
assert not(title is None and author is None and publisher is None and \
|
assert not(title is None and author is None and publisher is None and \
|
||||||
@ -285,10 +301,73 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
|||||||
for fetcher in fetchers[1:]:
|
for fetcher in fetchers[1:]:
|
||||||
merge_results(results, fetcher.results)
|
merge_results(results, fetcher.results)
|
||||||
|
|
||||||
results = sorted(results, cmp=lambda x, y : cmp(
|
results = list(filter(filter_metadata_results, results))
|
||||||
(x.comments.strip() if x.comments else ''),
|
|
||||||
(y.comments.strip() if y.comments else '')
|
check_for_covers(results)
|
||||||
), reverse=True)
|
|
||||||
|
words = ("the", "a", "an", "of", "and")
|
||||||
|
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
|
||||||
|
trailing_paren_pat = re.compile(r'\(.*\)$')
|
||||||
|
whitespace_pat = re.compile(r'\s+')
|
||||||
|
|
||||||
|
def sort_func(x, y):
|
||||||
|
|
||||||
|
def cleanup_title(s):
|
||||||
|
if s is None:
|
||||||
|
s = _('Unknown')
|
||||||
|
s = s.strip().lower()
|
||||||
|
s = prefix_pat.sub(' ', s)
|
||||||
|
s = trailing_paren_pat.sub('', s)
|
||||||
|
s = whitespace_pat.sub(' ', s)
|
||||||
|
return s.strip()
|
||||||
|
|
||||||
|
t = cleanup_title(title)
|
||||||
|
x_title = cleanup_title(x.title)
|
||||||
|
y_title = cleanup_title(y.title)
|
||||||
|
|
||||||
|
# prefer titles that start with the search title
|
||||||
|
tx = cmp(t, x_title)
|
||||||
|
ty = cmp(t, y_title)
|
||||||
|
result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
|
||||||
|
|
||||||
|
# then prefer titles that have a cover image
|
||||||
|
if result == 0:
|
||||||
|
result = -cmp(x.has_cover, y.has_cover)
|
||||||
|
|
||||||
|
# then prefer titles with the longest comment, with in 10%
|
||||||
|
if result == 0:
|
||||||
|
cx = len(x.comments.strip() if x.comments else '')
|
||||||
|
cy = len(y.comments.strip() if y.comments else '')
|
||||||
|
t = (cx + cy) / 20
|
||||||
|
result = cy - cx
|
||||||
|
if abs(result) < t:
|
||||||
|
result = 0
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
results = sorted(results, cmp=sort_func)
|
||||||
|
|
||||||
|
# if for some reason there is no comment in the top selection, go looking for one
|
||||||
|
if len(results) > 1:
|
||||||
|
if not results[0].comments or len(results[0].comments) == 0:
|
||||||
|
for r in results[1:]:
|
||||||
|
try:
|
||||||
|
if title and title.lower() == r.title[:len(title)].lower() \
|
||||||
|
and r.comments and len(r.comments):
|
||||||
|
results[0].comments = r.comments
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
# Find a pubdate
|
||||||
|
pubdate = None
|
||||||
|
for r in results:
|
||||||
|
if r.pubdate is not None:
|
||||||
|
pubdate = r.pubdate
|
||||||
|
break
|
||||||
|
if pubdate is not None:
|
||||||
|
for r in results:
|
||||||
|
if r.pubdate is None:
|
||||||
|
r.pubdate = pubdate
|
||||||
|
|
||||||
return results, [(x.name, x.exception, x.tb) for x in fetchers]
|
return results, [(x.name, x.exception, x.tb) for x in fetchers]
|
||||||
|
|
||||||
|
@ -34,7 +34,8 @@ def fetch_metadata(url, max=100, timeout=5.):
|
|||||||
errmsg = soup.find('errormessage').string
|
errmsg = soup.find('errormessage').string
|
||||||
raise ISBNDBError('Error fetching metadata: '+errmsg)
|
raise ISBNDBError('Error fetching metadata: '+errmsg)
|
||||||
total_results = int(book_list['total_results'])
|
total_results = int(book_list['total_results'])
|
||||||
np = '&page_number=%s&'%(page_number+1)
|
page_number += 1
|
||||||
|
np = '&page_number=%s&'%page_number
|
||||||
url = re.sub(r'\&page_number=\d+\&', np, url)
|
url = re.sub(r'\&page_number=\d+\&', np, url)
|
||||||
books.extend(book_list.findAll('bookdata'))
|
books.extend(book_list.findAll('bookdata'))
|
||||||
max -= 1
|
max -= 1
|
||||||
|