Sync to trunk.

This commit is contained in:
John Schember 2010-07-13 14:05:53 -04:00
commit b1eb8f5d58
211 changed files with 57578 additions and 51984 deletions

View File

@ -4,6 +4,255 @@
# for important features/bug fixes. # for important features/bug fixes.
# Also, each release can have new and improved recipes. # Also, each release can have new and improved recipes.
- version: 0.7.8
date: 2010-07-09
new features:
- title: "New tool to help prepare EPUBs for publication"
type: major
description: >
"calibre now contains a new command line tool called epub-fix that can automatically fix
common problems in EPUB files that cause them to be rejected by poorly designed publishing services.
The tool is plugin based for extensible functionality in the future. Currently, it can fix unmanifested files
and workaround the date and svg preserveaspectratio bugs of epubcheck."
- title: "New icons for the toolbar buttons by Kamil Tatara"
- title: "Display rating (when available) in cover browser"
- title: "Clicking on the central cover int the cover browser now opens that book in the viewer"
- title: "Use the status bar instead of the area to the right of the location view to display status information"
- title: "Driver for the Pandigital Novel e-book reader"
bug fixes:
- title: "News download: Don not specify a font family for article descriptions"
- title: "News download: Fix regression introduced in 0.7.0 that broke download of some embedded content feeds"
- title: "MOBI Output: Partial support for nested superscript and subscripts."
tickets: [6132]
- title: "CHM Input: Fix handling of buggy CHM files with no .hhc"
tickets: [6087]
- title: "EPUB Input: Fix bug in unzipping EPUB files that have been zipped in depth first order."
tickets: [6127]
- title: "TXT Input: Convert HTML entities to characters."
tickets: [6114]
- title: "LRF Input: Handle LRF files with random null bytes in the text"
tickets: [6097]
- title: "Kobo driver: Fix detection of txt/html files on the device"
- title: "Fix opening of books when calibre library is on an unmapped network share in windows"
- title: "SONY driver: Only update the timestamp in the XML db for newly added books"
- title: "Cover browser: Fix rendering of center cover when width of cover browser is less than the width of a single cover"
- title: "Cover browser: Correct fix for setPixel out of bounds warning causing UI slowdown in calibre"
new recipes:
- title: "evz.ro"
author: Darko Miletic
- title: "Anchorage Daily News, China Economic Net, BBC Chinese and Singtao Daily"
author: rty
- title: Big Oven
author: Starson17
improved recipes:
- Haaretz
- Editor and Publisher
- Estadao
- version: 0.7.7
date: 2010-07-02
new features:
- title: "Support for the Nokia E52"
- title: "Searching on the size column"
- title: "iTunes driver: Add option to disable cover fetching for speeding up the fetching of large book collections"
bug fixes:
- title: "SONY driver: Only update metadata when books are sent to device."
- title: "TXT Input: Ensure the generated html is splittable"
tickets: [5904]
- title: "Fix infinite loop in default cover generation."
tickets: [6061]
- title: "HTML Input: Fix a parsing bug that was triggered in rare conditions"
tickets: [6064]
- title: "HTML2Zip plugin: Do not replace ligatures"
tickets: [6019]
- title: "iTunes driver: Fix transmission of non integral series numbers"
tickets: [6046]
- title: "Simplify implementation of cover caching and ensure cover browser is updated when covers are changed"
- title: "PDF metadata: Fix last character corrupted when setting metadata in encrypted files."
- title: "PDF metadata: Update the version of PoDoFo used to set metadata to 0.8.1. Hopefully that means more PDF files will work"
- title: "Device drivers: Speedup for dumping metadata cache to devices on Windows XP"
- title: "EPUB Output: Ensure that language setting is conformant to the specs"
- title: "MOBI Output: Fix a memory leak and a crash in the palmdoc compression routine"
- title: "Metadata download: Fix a regression that resulted in a failed download for some books"
new recipes:
- title: "Foreign Policy and Alo!"
author: Darko Miletic
- title: Statesman and ifzm
author: rty
improved recipes:
- Akter
- The Old New Thing
- version: 0.7.6
date: 2010-06-28
new features:
- title: "Add support for the new firmware of the Azbooka"
tickets: [5994]
- title: "A few speedups for calibre startup, should add up to a few seconds of startup time on slower machines"
- title: "Support for the Sweem MM300"
- title: "Add keyboard shorcut for Download metadata and covers"
bug fixes:
- title: "Fix regression in 0.7.5 that broke conversion of malformed HTML files (like those Microsoft Word outputs)"
type: major
tickets: [5991]
- title: "Don't download tags from librarything, as the tagging there is not very good"
- title: "Add mimetype for FB2 so that it can be served by the content server"
tickets: [6011]
- title: "Ensure cover is not resized to less than the available space in the Edit Meta Information dialog"
tickets: [6001]
- title: "SONY driver: Only update collections when sending book to device for the first time"
- title: "calibre should now work on windows when the location for the library contains non-ascii characters"
tickets: [5983]
- title: "Cover browser once again distorts instead of cropping covers that have an incorrect aspect ratio"
- title: "ISBNDb metadata plugin: Fix bug causing only first page of results to be fetched"
- title: "Move iTunes driver to the bottom so that it doesn't interfere with device detection for people that have iphones and an ereader plugged in"
improved recipes:
- Houston Chronicle
- Hindu
- Times of India
- New York Times
new recipes:
- title: Winnipeg Sun
author: rty
- version: 0.7.5
date: 2010-06-25
new features:
- title: "New driver for the Kobo featuring closer integration with the device."
- title: "Support for the Dell Streak, Eken Android tablet and the Astak Mentor EB600"
- title: "New series type custom column"
- title: "Add option in Send to device menu to connect to iTunes without any iDevice (experimental)"
- title: "iPad driver: Make setting iTunes Category from series optional. News download now optimizations for iPad output."
- title: "Add option to disable book cover animation"
tickets: [5909]
- title: "Edit meta information dialog: Remember last used size and splitter position."
tickets: [5908]
- title: "Metadata download: If any results have a published date, ensure they all do"
- title: "SONY driver: Add a preference setting in Preferences->Add/Save->Send to device to control how colelctions are managed on the device by calibre"
- title: "Metadata download: Filter out non book results. Also sort results by availability of covers for the isbn"
tickets: [5946]
- title: "Bulk editing for device collections in the device view via the context menu"
bug fixes:
- title: "When converting books using the calibre GUI, set the language of the output book to be the same as the language of the User Interface, instead of undefined. Fixes use of dictionary in iBooks"
- title: "PDF Output: Fix setting top/bottom margnis has no effect"
- title: "Conversion pipeline: Fix typo causing remove footer regex to always fail"
- title: "Handle device being yanked with queued device jobs gracefully"
- title: "Conversion pipeline: Handle deeply nested XML structures"
tickets: [5931]
- title: "Conversion pipeline: Fix handling of lists with a specified left margin"
tickets: [5877]
- title: "Restore workaround for ADE buggy rendering of anchors as links. However, make it overridable by extra CSS"
- title: "Fix LibraryThing metadata download plugin"
- title: "Fix multiple ratings displayed in Tag Browser for some legacy databases"
- title: "Fix invocation of postprocess file type plugins plugins"
- title: "HTML Input: Handle @import directives in linked css files."
tickets: [5135]
- title: "HTML Input: Handle absolute paths in resource links on windows correctly."
tickets: [3031]
- title: "E-book viewer: Handle font-face rules specify multiple families to be substituted"
- title: "Cover browser: Set aspect ratio of covers to 3:4 instead of 2:3. Crop rather than distort covers whoose aspect ratio is different from this. Antialias the rendering of the central cover"
- title: "Reset Tag browser if the text in the search box is edited"
- title: "Fix detection of SD card in Samsung Galaxy windows driver"
new recipes:
- title: "L'Osservatore Romano"
author: Darko Miletic
- title: China Press, London Free Press, People Daily
author: rty
improved recipes:
- Zaobao
- New Scientist
- National Post
- London review of books
- version: 0.7.4 - version: 0.7.4
date: 2010-06-19 date: 2010-06-19

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 11 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 117 KiB

After

Width:  |  Height:  |  Size: 5.0 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -1752,7 +1752,7 @@
sodipodi:cy="93.331604" sodipodi:cy="93.331604"
sodipodi:cx="-166.53223" sodipodi:cx="-166.53223"
id="path6082" id="path6082"
style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)" style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
sodipodi:type="arc" /></clipPath><radialGradient sodipodi:type="arc" /></clipPath><radialGradient
inkscape:collect="always" inkscape:collect="always"
xlink:href="#linearGradient5990" xlink:href="#linearGradient5990"
@ -2513,7 +2513,7 @@
transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)" transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)"
clip-path="none" /><path clip-path="none" /><path
sodipodi:type="arc" sodipodi:type="arc"
style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)" style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
id="path3915" id="path3915"
sodipodi:cx="-166.53223" sodipodi:cx="-166.53223"
sodipodi:cy="93.331604" sodipodi:cy="93.331604"
@ -2901,22 +2901,8 @@
id="g133"> id="g133">
<defs <defs
id="defs135" /> id="defs135" />
<use
id="use138"
x="0"
y="0"
width="121"
height="120" />
<clipPath <clipPath
id="XMLID_215_"> id="XMLID_215_">
<use
id="use141"
x="0"
y="0"
width="121"
height="120" />
</clipPath> </clipPath>
<g <g
clip-path="url(#XMLID_215_)" clip-path="url(#XMLID_215_)"

Before

Width:  |  Height:  |  Size: 116 KiB

After

Width:  |  Height:  |  Size: 116 KiB

View File

@ -0,0 +1,269 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://web.resource.org/cc/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
version="1.0"
id="Livello_1"
width="128"
height="128"
viewBox="0 0 139 139"
overflow="visible"
enable-background="new 0 0 139 139"
xml:space="preserve"
sodipodi:version="0.32"
inkscape:version="0.45+devel"
sodipodi:docname="system-help.svgz"
inkscape:output_extension="org.inkscape.output.svgz.inkscape"
style="overflow:visible"><metadata
id="metadata3164"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
id="defs3162"><filter
inkscape:collect="always"
x="-0.132641"
width="1.265282"
y="-0.34752154"
height="1.6950431"
id="filter3547"><feGaussianBlur
inkscape:collect="always"
stdDeviation="2.7512044"
id="feGaussianBlur3549" /></filter><filter
inkscape:collect="always"
id="filter5097"><feGaussianBlur
inkscape:collect="always"
stdDeviation="2.32"
id="feGaussianBlur5099" /></filter><filter
inkscape:collect="always"
x="-0.143268"
width="1.286536"
y="-0.072184406"
height="1.1443688"
id="filter5125"><feGaussianBlur
inkscape:collect="always"
stdDeviation="1.91024"
id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
inkscape:window-height="697"
inkscape:window-width="1024"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
guidetolerance="10.0"
gridtolerance="10.0"
objecttolerance="10.0"
borderopacity="1.0"
bordercolor="#666666"
pagecolor="#ffffff"
id="base"
inkscape:zoom="2.9352518"
inkscape:cx="99.496726"
inkscape:cy="69.329657"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:current-layer="Livello_1"
height="128px"
width="128px" />
<filter
id="AI_Sfocatura_4">
<feGaussianBlur
stdDeviation="4"
id="feGaussianBlur3096" />
</filter>
<filter
id="AI_Sfocatura_2">
<feGaussianBlur
stdDeviation="2"
id="feGaussianBlur3099" />
</filter>
<radialGradient
id="XMLID_12_"
cx="69.600098"
cy="69.576698"
r="58"
gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
style="stop-color:#000000"
id="stop3102" />
<stop
offset="1"
style="stop-color:#000000;stop-opacity:0;"
id="stop3104" />
</radialGradient>
<circle
sodipodi:ry="58"
sodipodi:rx="58"
sodipodi:cy="69.599998"
sodipodi:cx="69.599998"
style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
id="circle5091"
r="58"
cy="69.599998"
cx="69.599998"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
cx="69.599998"
cy="122.173"
rx="58"
ry="10.573"
id="ellipse3106"
style="opacity:0.6;fill:url(#XMLID_12_)"
sodipodi:cx="69.599998"
sodipodi:cy="122.173"
sodipodi:rx="58"
sodipodi:ry="10.573"
transform="translate(-9.9998474e-2,1.9102535)" />
<radialGradient
id="XMLID_13_"
cx="69.600098"
cy="69.600098"
r="58"
gradientUnits="userSpaceOnUse">
<stop
offset="0.6154"
style="stop-color:#EEEEEE"
id="stop3113" />
<stop
offset="0.8225"
style="stop-color:#DDDDDD"
id="stop3115" />
<stop
offset="1"
style="stop-color:#FFFFFF"
id="stop3117" />
</radialGradient>
<circle
cx="69.599998"
cy="69.599998"
r="58"
id="circle3119"
style="fill:url(#XMLID_13_)"
sodipodi:cx="69.599998"
sodipodi:cy="69.599998"
sodipodi:rx="58"
sodipodi:ry="58"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
<linearGradient
id="XMLID_14_"
gradientUnits="userSpaceOnUse"
x1="27.6001"
y1="69.600098"
x2="111.6001"
y2="69.600098"
gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<stop
offset="0"
style="stop-color:#2A94EC"
id="stop3122" />
<stop
offset="1"
style="stop-color:#0057AE"
id="stop3124" />
</linearGradient>
<path
d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
id="path3126"
style="fill:url(#XMLID_14_)" />
<g
id="circle22111"
cy="92"
rx="36"
ry="36"
cx="343.99899"
enable-background="new "
style="opacity:0.3;filter:url(#filter3547)"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<path
d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
id="path3129"
style="fill:#a8dde0" />
</g>
<linearGradient
id="circle16776_1_"
gradientUnits="userSpaceOnUse"
x1="135.5601"
y1="417.66461"
x2="161.87621"
y2="417.66461"
gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
<stop
offset="0"
style="stop-color:#FFFFFF"
id="stop3132" />
<stop
offset="1"
style="stop-color:#ffffff;stop-opacity:0;"
id="stop3134" />
</linearGradient>
<path
id="circle16776"
enable-background="new "
d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
style="opacity:0.8;fill:url(#circle16776_1_)" />
<g
id="g3137"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<defs
id="defs3139"><path
id="XMLID_10_"
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
<clipPath
id="XMLID_6_">
<use
xlink:href="#XMLID_10_"
id="use3143"
x="0"
y="0"
width="139"
height="139" />
</clipPath>
<g
clip-path="url(#XMLID_6_)"
id="g3145"
style="filter:url(#AI_Sfocatura_2)">
<path
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
id="path3147"
style="fill:none;stroke:#00316e;stroke-width:2" />
</g>
</g>
<g
transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
id="g5119"
style="fill:#00316e;filter:url(#filter5125)"><path
style="fill:#00316e"
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
id="path5121" /><circle
style="fill:#00316e"
sodipodi:ry="8"
sodipodi:rx="8"
sodipodi:cy="93.599998"
sodipodi:cx="69.599998"
cx="69.599998"
cy="93.599998"
r="8"
id="circle5123" /></g><g
id="g5101"
transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
id="path3157"
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
style="fill:#ffffff" /><circle
id="circle3159"
r="8"
cy="93.599998"
cx="69.599998"
sodipodi:cx="69.599998"
sodipodi:cy="93.599998"
sodipodi:rx="8"
sodipodi:ry="8"
style="fill:#ffffff" /></g>
</svg>

After

Width:  |  Height:  |  Size: 8.4 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 133 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

203
resources/images/help.svg Normal file
View File

@ -0,0 +1,203 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
version="1.0"
width="128"
height="128"
viewBox="0 0 139 139"
id="Livello_1"
xml:space="preserve"
style="overflow:visible"><defs
id="defs3162"><filter
x="-0.132641"
y="-0.34752154"
width="1.265282"
height="1.6950431"
color-interpolation-filters="sRGB"
id="filter3547"><feGaussianBlur
id="feGaussianBlur3549"
stdDeviation="2.7512044" /></filter><filter
color-interpolation-filters="sRGB"
id="filter5097"><feGaussianBlur
id="feGaussianBlur5099"
stdDeviation="2.32" /></filter><filter
x="-0.143268"
y="-0.072184406"
width="1.286536"
height="1.1443688"
color-interpolation-filters="sRGB"
id="filter5125"><feGaussianBlur
id="feGaussianBlur5127"
stdDeviation="1.91024" /></filter></defs>
<filter
color-interpolation-filters="sRGB"
id="AI_Sfocatura_4">
<feGaussianBlur
id="feGaussianBlur3096"
stdDeviation="4" />
</filter>
<filter
color-interpolation-filters="sRGB"
id="AI_Sfocatura_2">
<feGaussianBlur
id="feGaussianBlur3099"
stdDeviation="2" />
</filter>
<radialGradient
cx="69.600098"
cy="69.576698"
r="58"
id="XMLID_12_"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)">
<stop
id="stop3102"
style="stop-color:#000000;stop-opacity:1"
offset="0" />
<stop
id="stop3104"
style="stop-color:#000000;stop-opacity:0"
offset="1" />
</radialGradient>
<circle
cx="69.599998"
cy="69.599998"
r="58"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
id="circle5091"
style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)" />
<radialGradient
cx="69.600098"
cy="69.600098"
r="58"
id="XMLID_13_"
gradientUnits="userSpaceOnUse">
<stop
id="stop3113"
style="stop-color:#eeeeee;stop-opacity:1"
offset="0.61540002" />
<stop
id="stop3115"
style="stop-color:#dddddd;stop-opacity:1"
offset="0.82249999" />
<stop
id="stop3117"
style="stop-color:#ffffff;stop-opacity:1"
offset="1" />
</radialGradient>
<circle
cx="69.599998"
cy="69.599998"
r="58"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
id="circle3119"
style="fill:url(#XMLID_13_)" />
<linearGradient
x1="27.6001"
y1="69.600098"
x2="111.6001"
y2="69.600098"
id="XMLID_14_"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<stop
id="stop3122"
style="stop-color:#2a94ec;stop-opacity:1"
offset="0" />
<stop
id="stop3124"
style="stop-color:#0057ae;stop-opacity:1"
offset="1" />
</linearGradient>
<path
d="m 26.062502,67.328127 c 0,25.149228 20.460149,45.609373 45.609375,45.609373 25.149227,0 45.609373,-20.460145 45.609373,-45.609373 0,-25.149226 -20.460146,-45.609374 -45.609373,-45.609374 -25.149226,0 -45.609375,20.460148 -45.609375,45.609374 z"
id="path3126"
style="fill:url(#XMLID_14_)" />
<g
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
id="circle22111"
style="opacity:0.3;filter:url(#filter3547)">
<path
d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 43.007,92.976 50.885,87.768 64.16,86.41 c 13.274,-1.356 26.919,1.648 30.477,6.716 3.556,5.068 -4.322,10.275 -17.596,11.633 z"
id="path3129"
style="fill:#a8dde0" />
</g>
<linearGradient
x1="135.5601"
y1="417.66461"
x2="161.87621"
y2="417.66461"
id="circle16776_1_"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
<stop
id="stop3132"
style="stop-color:#ffffff;stop-opacity:1"
offset="0" />
<stop
id="stop3134"
style="stop-color:#ffffff;stop-opacity:0"
offset="1" />
</linearGradient>
<path
d="m 71.671877,24.06655 c -21.383195,0 -39.252297,14.70468 -43.558039,34.283047 8.584336,7.792687 24.872313,-3.99082 43.558039,-3.99082 18.685727,0 34.974783,11.783507 43.558033,3.99082 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
id="circle16776"
style="opacity:0.8;fill:url(#circle16776_1_)" />
<g
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
id="g3137">
<defs
id="defs3139"><path
d="m 27.6,69.6 c 0,23.159 18.841,42 42,42 23.159,0 42,-18.841 42,-42 0,-23.159 -18.841,-42 -42,-42 -23.159,0 -42,18.841 -42,42 z"
id="XMLID_10_" /></defs>
<clipPath
id="XMLID_6_">
<use
id="use3143"
x="0"
y="0"
width="139"
height="139"
xlink:href="#XMLID_10_" />
</clipPath>
<g
clip-path="url(#XMLID_6_)"
id="g3145"
style="filter:url(#AI_Sfocatura_2)">
<path
d="m 27.6,69.6 c 0,23.159 18.841,42 42,42 23.159,0 42,-18.841 42,-42 0,-23.159 -18.841,-42 -42,-42 -23.159,0 -42,18.841 -42,42 z"
id="path3147"
style="fill:none;stroke:#00316e;stroke-width:2" />
</g>
</g>
<g
transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
id="g5119"
style="fill:#00316e;filter:url(#filter5125)"><path
d="m 63.37,80.089 -0.178,-2.343 c -0.18,-4.598 1.248,-9.284 5.259,-14.062 2.853,-3.424 5.169,-6.398 5.169,-9.463 0,-3.064 -2.049,-5.227 -6.418,-5.318 -3.029,0 -6.506,0.992 -8.913,2.614 l -2.941,-9.733 c 3.208,-1.894 8.467,-3.696 14.885,-3.696 11.677,0 17.115,6.58 17.115,13.97 0,6.939 -4.279,11.357 -7.667,15.231 -3.209,3.605 -4.635,7.121 -4.546,11.177 l 0,1.622 -11.765,0 0,0.001 z"
id="path5121"
style="fill:#00316e" /><circle
cx="69.599998"
cy="93.599998"
r="8"
id="circle5123"
style="fill:#00316e" /></g><g
transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"
id="g5101"><path
d="m 63.37,80.089 -0.178,-2.343 c -0.18,-4.598 1.248,-9.284 5.259,-14.062 2.853,-3.424 5.169,-6.398 5.169,-9.463 0,-3.064 -2.049,-5.227 -6.418,-5.318 -3.029,0 -6.506,0.992 -8.913,2.614 l -2.941,-9.733 c 3.208,-1.894 8.467,-3.696 14.885,-3.696 11.677,0 17.115,6.58 17.115,13.97 0,6.939 -4.279,11.357 -7.667,15.231 -3.209,3.605 -4.635,7.121 -4.546,11.177 l 0,1.622 -11.765,0 0,0.001 z"
id="path3157"
style="fill:#ffffff" /><circle
cx="69.599998"
cy="93.599998"
r="8"
id="circle3159"
style="fill:#ffffff" /></g>
</svg>

After

Width:  |  Height:  |  Size: 6.3 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 109 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 753 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 717 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 836 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 58 KiB

After

Width:  |  Height:  |  Size: 6.3 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 46 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@ -15,7 +15,7 @@ class Akter(BasicNewsRecipe):
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png' masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
@ -23,9 +23,9 @@ class Akter(BasicNewsRecipe):
publication_type = 'magazine' publication_type = 'magazine'
remove_empty_feeds = True remove_empty_feeds = True
PREFIX = 'http://www.akter.co.rs' PREFIX = 'http://www.akter.co.rs'
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body,.lokacija{font-family: Arial,Helvetica,sans1,sans-serif} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.color-2{display:block; margin-bottom: 10px; padding: 5px, 10px; .color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
border-left: 1px solid #D00000; color: #D00000} border-left: 1px solid #D00000; color: #D00000}
img{margin-bottom: 0.8em} """ img{margin-bottom: 0.8em} """

View File

@ -0,0 +1,65 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.alo.rs
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Alo_Novine(BasicNewsRecipe):
title = 'Alo!'
__author__ = 'Darko Miletic'
description = "News Portal from Serbia"
publisher = 'Alo novine d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
delay = 4
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'sr'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.lead {font-size: 1.3em}
h1{color: #DB0700}
.article_uvod{font-style: italic; font-size: 1.2em}
img{margin-bottom: 0.8em} """
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher': publisher
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name=['object','link','embed'])]
remove_attributes = ['height','width']
feeds = [
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
,(u'Politika' , u'http://www.alo.rs/rss/politika')
,(u'Vesti' , u'http://www.alo.rs/rss/vesti')
,(u'Sport' , u'http://www.alo.rs/rss/sport')
,(u'Ljudi' , u'http://www.alo.rs/rss/ljudi')
,(u'Saveti' , u'http://www.alo.rs/rss/saveti')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def print_version(self, url):
artl = url.rpartition('/')[0]
artid = artl.rpartition('/')[2]
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
def image_url_processor(self, baseurl, url):
return url.replace('alo.rs//','alo.rs/')

View File

@ -0,0 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
title = u'Anchorage Daily News'
__author__ = 'rty'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
(u'Business', u'http://www.adn.com/money/index.xml'),
(u'Sports', u'http://www.adn.com/sports/index.xml'),
(u'Politics', u'http://www.adn.com/politics/index.xml'),
(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
]
description = ''''Alaska's Newspaper'''
publisher = 'http://www.adn.com'
category = 'news, Alaska, Anchorage'
language = 'en'
extra_css = '''
p{font-weight: normal;text-align: justify}
'''
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'latin-1'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
keep_only_tags = [
dict(name='div', attrs={'class':'left_col story_mainbar'}),
]
remove_tags = [
dict(name='div', attrs={'class':'story_tools'}),
dict(name='p', attrs={'class':'ad_label'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'advertisement'}),
]

View File

@ -12,9 +12,9 @@ class AssociatedPress(BasicNewsRecipe):
max_articles_per_feed = 15 max_articles_per_feed = 15
html2lrf_options = ['--force-page-break-before-tag="chapter"'] html2lrf_options = ['--force-page-break-before-tag="chapter"']
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ [
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'), (r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
(r'<body class="apple-rss-no-unread-mode" onLoad="setup(null)">.*?<!-- start Entries -->', lambda match : '<body>'), (r'<body class="apple-rss-no-unread-mode" onLoad="setup(null)">.*?<!-- start Entries -->', lambda match : '<body>'),
@ -25,10 +25,10 @@ class AssociatedPress(BasicNewsRecipe):
(r'<p class="ap-story-p">', lambda match : '<p>'), (r'<p class="ap-story-p">', lambda match : '<p>'),
(r'Learn more about our <a href="http://apdigitalnews.com/privacy.html">Privacy Policy</a>.*?</body>', lambda match : '</body>'), (r'Learn more about our <a href="http://apdigitalnews.com/privacy.html">Privacy Policy</a>.*?</body>', lambda match : '</body>'),
] ]
] ]
feeds = [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'), feeds = [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'),
('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'), ('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'),
('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'), ('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'),
@ -38,4 +38,4 @@ class AssociatedPress(BasicNewsRecipe):
('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'), ('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'),
('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'), ('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'),
('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'), ('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'),
] ]

View File

@ -0,0 +1,39 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'BBC Chinese'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
]
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
h1 {font-family: 'DroidFont', serif;}\n
.articledescription {font-family: 'DroidFont', serif;}
'''
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'British Broadcasting Corporation'
description = 'BBC news in Chinese'
category = 'News, Chinese'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
keep_only_tags = [
dict(name='h1'),
dict(name='p', attrs={'class':['primary-topic','summary']}),
dict(name='div', attrs={'class':['bodytext','datestamp']}),
]

View File

@ -0,0 +1,64 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BigOven(BasicNewsRecipe):
title = 'BigOven'
__author__ = 'Starson17'
description = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
language = 'en'
category = 'news, food, recipes, gourmet'
publisher = 'Starson17'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
max_articles_per_feed = 30
needs_subscription = True
conversion_options = {'linearize_tables' : True
, 'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.bigoven.com/')
br.select_form(name='form1')
br['TopMenu_bo1$email'] = self.username
br['TopMenu_bo1$password'] = self.password
br.submit()
return br
remove_attributes = ['style', 'font']
keep_only_tags = [dict(name='h1')
,dict(name='div', attrs={'class':'img'})
,dict(name='div', attrs={'id':'intro'})
]
remove_tags = [dict(name='div', attrs={'style':["overflow: visible;"]})
,dict(name='div', attrs={'class':['ctas']})
#,dict(name='a', attrs={'class':['edit']})
,dict(name='p', attrs={'class':['byline']})
]
feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
def preprocess_html(self, soup):
for tag in soup.findAll(name='a', attrs={'class':['edit']}):
tag.parent.extract()
for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
tag.replaceWith(tag.string)
return soup
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -0,0 +1,39 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278162597(BasicNewsRecipe):
__author__ = 'rty'
title = u'China Economic Net'
oldest_article = 7
max_articles_per_feed = 100
pubisher = 'www.ce.cn - China Economic net - Beijing'
description = 'China Economic Net Magazine'
category = 'Economic News Magazine, Chinese, China'
feeds = [
(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
(u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'),
(u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'),
(u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml')
]
masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif'
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
h1 {font-family: 'DroidFont', serif;}\n
.articledescription {font-family: 'DroidFont', serif;}
'''
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh-cn'
encoding = 'gb2312'
conversion_options = {'linearize_tables':True}
keep_only_tags = [
dict(name='h1', attrs={'id':'articleTitle'}),
dict(name='div', attrs={'class':'laiyuan'}),
dict(name='div', attrs={'id':'articleText'}),
]

View File

@ -0,0 +1,71 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277228948(BasicNewsRecipe):
title = u'China Press USA'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'www.chinapressusa.com'
description = 'Overseas Chinese Network Newspaper in the USA'
category = 'News in Chinese, USA'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
#encoding = 'GB2312'
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url ='http://www.chinapressusa.com/common/images/logo.gif'
extra_css = '''
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {
margin-right: 8pt;
font-family: 'DroidFont', serif;}
h1 {font-family: 'DroidFont', serif, sans-serif}
.show {font-family: 'DroidFont', serif, sans-serif}
'''
feeds = [
(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
]
keep_only_tags = [
dict(name='div', attrs={'class':'show'}),
]
remove_tags = [
# dict(name='table', attrs={'class':'xle'}),
dict(name='div', attrs={'class':'time'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'bank17'}),
# dict(name='a', attrs={'class':'ab12'}),
]
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'id':'displaypagenum'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'show'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'id':'displaypagenum'})
if pager:
pager.extract()
return soup

View File

@ -1,14 +1,29 @@
import re #!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010 elsuave'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class EandP(BasicNewsRecipe): class EandP(BasicNewsRecipe):
title = u'Editor and Publisher' title = u'Editor and Publisher'
__author__ = u'Xanthan Gum' __author__ = u'elsuave (modified from Xanthan Gum)'
description = 'News about newspapers and journalism.' description = 'News about newspapers and journalism.'
publisher = 'Editor and Publisher'
category = 'news, journalism, industry'
language = 'en' language = 'en'
no_stylesheets = True max_articles_per_feed = 25
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
remove_javascript = True
oldest_article = 7 html2lrf_options = [
max_articles_per_feed = 100 '--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
# Font formatting code borrowed from kwetal # Font formatting code borrowed from kwetal
@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe):
h2{font-size: large;} h2{font-size: large;}
''' '''
# Delete everything before the article # Keep only div:itemmgap
remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'}) keep_only_tags = [
dict(name='div', attrs={'class':'itemmgap'})
]
# Delete everything after the article # Remove commenting/social media lins
preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE), remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
lambda match: '</body>'),]
feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
(u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
(u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
(u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
(u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
(u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]
feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
(u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
(u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
(u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
(u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]

View File

@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.elpais.com/diario/
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class ElPaisImpresa(BasicNewsRecipe):
title = 'El País - edicion impresa'
__author__ = 'Darko Miletic'
description = 'el periodico global en Español'
publisher = 'EDICIONES EL PAIS, S.L.'
category = 'news, politics,Spain,actualidad,noticias,informacion,videos,fotografias,audios,graficos,nacional,internacional,deportes,economia,tecnologia,cultura,gente,television,sociedad,opinion,blogs,foros,chats,encuestas,entrevistas,participacion'
no_stylesheets = True
encoding = 'latin1'
use_embedded_content = False
language = 'es'
publication_type = 'newspaper'
masthead_url = 'http://www.elpais.com/im/tit_logo_global.gif'
index = 'http://www.elpais.com/diario/'
extra_css = ' p{text-align: justify} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
feeds = [
(u'Internacional' , index + u'internacional/' )
,(u'España' , index + u'espana/' )
,(u'Economia' , index + u'economia/' )
,(u'Opinion' , index + u'opinion/' )
,(u'Viñetas' , index + u'vineta/' )
,(u'Sociedad' , index + u'sociedad/' )
,(u'Cultura' , index + u'cultura/' )
,(u'Tendencias' , index + u'tendencias/' )
,(u'Gente' , index + u'gente/' )
,(u'Obituarios' , index + u'obituarios/' )
,(u'Deportes' , index + u'deportes/' )
,(u'Pantallas' , index + u'radioytv/' )
,(u'Ultima' , index + u'ultima/' )
,(u'Educacion' , index + u'educacion/' )
,(u'Saludo' , index + u'salud/' )
,(u'Ciberpais' , index + u'ciberpais/' )
,(u'EP3' , index + u'ep3/' )
,(u'Cine' , index + u'cine/' )
,(u'Babelia' , index + u'babelia/' )
,(u'El viajero' , index + u'viajero/' )
,(u'Negocios' , index + u'negocios/' )
,(u'Domingo' , index + u'domingo/' )
,(u'El Pais semanal' , index + u'eps/' )
,(u'Quadern Catalunya' , index + u'quadern-catalunya/' )
]
keep_only_tags=[dict(attrs={'class':['cabecera_noticia','contenido_noticia']})]
remove_attributes=['width','height']
remove_tags=[dict(name='link')]
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('a',attrs={'class':['g19r003','g19i003','g17r003','g17i003']}):
url = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
title = self.tag_to_string(item)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
totalfeeds.append((feedtitle, articles))
return totalfeeds
def print_version(self, url):
return url + '?print=1'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010, elsuave'
''' '''
estadao.com.br estadao.com.br
''' '''
@ -10,12 +10,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Estadao(BasicNewsRecipe): class Estadao(BasicNewsRecipe):
title = 'O Estado de S. Paulo' title = 'O Estado de S. Paulo'
__author__ = 'Darko Miletic' __author__ = 'elsuave (modified from Darko Miletic)'
description = 'News from Brasil in Portuguese' description = 'News from Brasil in Portuguese'
publisher = 'O Estado de S. Paulo' publisher = 'O Estado de S. Paulo'
category = 'news, politics, Brasil' category = 'news, politics, Brasil'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 25
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
@ -30,13 +30,14 @@ class Estadao(BasicNewsRecipe):
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'c1'})] keep_only_tags = [
dict(name='div', attrs={'class':['bb-md-noticia','c5']})
]
remove_tags = [ remove_tags = [
dict(name=['script','object','form','ul']) dict(name=['script','object','form','ul'])
,dict(name='div', attrs={'id':['votacao','estadaohoje']}) ,dict(name='div', attrs={'class':['fnt2 Color_04 bold','right fnt2 innerTop15 dvTmFont','™_01 right outerLeft15','tituloBox','tags']})
,dict(name='p', attrs={'id':'ctrl_texto'}) ,dict(name='div', attrs={'id':['bb-md-noticia-subcom']})
,dict(name='p', attrs={'class':'texto'})
] ]
feeds = [ feeds = [
@ -51,13 +52,12 @@ class Estadao(BasicNewsRecipe):
,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml') ,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
] ]
def preprocess_html(self, soup):
ifr = soup.find('iframe')
if ifr:
ifr.extract()
for item in soup.findAll(style=True):
del item['style']
return soup
language = 'pt' language = 'pt'
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if '/Multimidia/' not in url:
return url

View File

@ -0,0 +1,52 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
evz.ro
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class EVZ_Ro(BasicNewsRecipe):
title = 'evz.ro'
__author__ = 'Darko Miletic'
description = 'News from Romania'
publisher = 'evz.ro'
category = 'news, politics, Romania'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'ro'
masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
preprocess_regexps = [
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
]
remove_tags = [
dict(name=['form','embed','iframe','object','base','link','script','noscript'])
,dict(attrs={'class':['section','statsInfo','email il']})
,dict(attrs={'id' :'gallery'})
]
remove_tags_after = dict(attrs={'class':'section'})
keep_only_tags = [dict(attrs={'class':'single'})]
remove_attributes = ['height','width']
feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,45 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.foreignpolicy.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ForeignPolicy(BasicNewsRecipe):
title = 'Foreign Policy'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
category = 'news, politics, USA'
oldest_article = 31
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
remove_tags = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
remove_attributes = ['height','width']
feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
def print_version(self, url):
return url + '?print=yes&page=full'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,56 +1,95 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
haaretz.com www.haaretz.com
''' '''
import re
from calibre import strftime
from time import gmtime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Haaretz_en(BasicNewsRecipe): class HaaretzPrint_en(BasicNewsRecipe):
title = 'Haaretz in English' title = 'Haaretz - print edition'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. ' description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
publisher = 'haaretz.com' publisher = 'Haaretz'
category = 'news, politics, Israel' category = "news, Haaretz, Israel news, Israel newspapers, Israel business news, Israel financial news, Israeli news,Israeli newspaper, Israeli newspapers, news from Israel, news in Israel, news Israel, news on Israel, newspaper Israel, Israel sports news, Israel diplomacy news"
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'en_IL' language = 'en_IL'
publication_type = 'newspaper' publication_type = 'newspaper'
remove_empty_feeds = True PREFIX = 'http://www.haaretz.com'
masthead_url = 'http://www.haaretz.com/images/logos/logoGrey.gif' masthead_url = PREFIX + '/images/logos/logoGrey.gif'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } ' extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher': publisher
, 'language' : language , 'language' : language
} }
remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')] keep_only_tags = [dict(attrs={'id':'threecolumns'})]
remove_tags_before = dict(name='h1') remove_attributes = ['width','height']
remove_tags_after = dict(attrs={'id':'innerArticle'}) remove_tags = [
keep_only_tags = [dict(attrs={'id':'content'})] dict(name=['iframe','link','object','embed'])
,dict(name='div',attrs={'class':'rightcol'})
]
feeds = [ feeds = [
(u'Opinion' , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false' ) (u'News' , PREFIX + u'/print-edition/news' )
,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false') ,(u'Opinion' , PREFIX + u'/print-edition/opinion' )
,(u'National' , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false' ) ,(u'Business' , PREFIX + u'/print-edition/business' )
,(u'International' , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false' ) ,(u'Real estate' , PREFIX + u'/print-edition/real-estate' )
,(u'Jewish World' , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false' ) ,(u'Sports' , PREFIX + u'/print-edition/sports' )
,(u'Business' , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false' ) ,(u'Travel' , PREFIX + u'/print-edition/travel' )
,(u'Real Estate' , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false' ) ,(u'Books' , PREFIX + u'/print-edition/books' )
,(u'Features' , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false' ) ,(u'Food & Wine' , PREFIX + u'/print-edition/food-wine' )
,(u'Arts and leisure' , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false' ) ,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' )
,(u'Books' , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false' ) ,(u'Features' , PREFIX + u'/print-edition/features' )
,(u'Food and Wine' , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false' )
,(u'Sports' , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false' )
] ]
def print_version(self, url):
article = url.rpartition('/')[2]
return 'http://www.haaretz.com/misc/article-print-page/' + article
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll(attrs={'class':'text'}):
sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
desc = item.find('p')
description = ''
if sp:
if desc:
description = self.tag_to_string(desc)
link = sp.a
url = self.PREFIX + link['href']
title = self.tag_to_string(link)
times = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
articles.append({
'title' :title
,'date' :times
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']

View File

@ -2,7 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
import re import time
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TheHindu(BasicNewsRecipe): class TheHindu(BasicNewsRecipe):
@ -10,45 +10,41 @@ class TheHindu(BasicNewsRecipe):
language = 'en_IN' language = 'en_IN'
oldest_article = 7 oldest_article = 7
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_tags_before = {'name':'font', 'class':'storyhead'} keep_only_tags = [dict(id='content')]
preprocess_regexps = [ remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
(re.compile(r'<!-- story ends -->.*', re.DOTALL), dict(id=['email-section', 'right-column', 'printfooter'])]
lambda match: '</body></html>'),
] extra_css = '.photo-caption { font-size: smaller }'
extra_css = '''
.storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
'''
feeds = [
(u'Main - Front Page', u'http://www.hindu.com/rss/01hdline.xml'),
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
(u'Main - Weather / Religion / Crossword / Cartoon',
u'http://www.hindu.com/rss/10hdline.xml'),
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
(u'Supplement - Literary Review',
u'http://www.hindu.com/rss/lrhdline.xml'),
(u'Supplement - Sunday Magazine',
u'http://www.hindu.com/rss/maghdline.xml'),
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
(u'Supplement - Business Review',
u'http://www.hindu.com/rss/bizhdline.xml'),
(u'Supplement - Book Review',
u'http://www.hindu.com/rss/brhdline.xml'),
(u'Supplement - Science & Technology',
u'http://www.hindu.com/rss/setahdline.xml')
]
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td','center']): for t in soup.findAll(['table', 'tr', 'td','center']):
t.name = 'div' t.name = 'div'
return soup return soup
def parse_index(self):
today = time.strftime('%Y-%m-%d')
soup = self.index_to_soup(
'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
div = soup.find(id='left-column')
feeds = []
current_section = None
current_articles = []
for x in div.findAll(['h3', 'div']):
if current_section and x.get('class', '') == 'tpaper':
a = x.find('a', href=True)
if a is not None:
current_articles.append({'url':a['href']+'?css=print',
'title':self.tag_to_string(a), 'date': '',
'description':''})
if x.name == 'h3':
if current_section and current_articles:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
return feeds

View File

@ -1,12 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import string, pprint
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HoustonChronicle(BasicNewsRecipe): class HoustonChronicle(BasicNewsRecipe):
title = u'The Houston Chronicle' title = u'The Houston Chronicle'
description = 'News from Houston, Texas' description = 'News from Houston, Texas'
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal'
language = 'en' language = 'en'
timefmt = ' [%a, %d %b, %Y]' timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True no_stylesheets = True
@ -38,54 +41,23 @@ class HoustonChronicle(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.chron.com/news/') categories = ['news', 'sports', 'business', 'entertainment', 'life',
container = soup.find('table', attrs={'class':'body-columns'}) 'travel']
feeds = [] feeds = []
current_section = 'Top Stories' for cat in categories:
current_articles = [] articles = []
soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
self.log('\tFound section:', current_section) for elem in soup.findAll(comptype='story', storyid=True):
a = elem.find('a', href=True)
for div in container.findAll('div'): if a is None: continue
if div.get('class', None) == 'module-mast': url = a['href']
t = self.tag_to_string(div).replace(u'\xbb', '').strip() if not url.startswith('http://'):
if t and 'interactives' not in t: url = 'http://www.chron.com'+url
if current_section and current_articles: articles.append({'title':self.tag_to_string(a), 'url':url,
feeds.append((current_section, current_articles)) 'description':'', 'date':''})
current_section = t pprint.pprint(articles[-1])
current_articles = [] if articles:
self.log('\tFound section:', current_section) feeds.append((string.capwords(cat), articles))
elif div.get('storyid', False):
a = div.find('a', href=True)
if a:
title = self.tag_to_string(a)
url = a.get('href')
if title and url:
if url.startswith('/'):
url = 'http://www.chron.com'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title':title, 'url':url,
'date':'', 'description':''})
elif div.get('class', None) == 'columnbox' and \
'special' in current_section.lower():
a = div.find('a')
if a:
title = self.tag_to_string(a)
url = a.get('href')
if title and url:
if not url.startswith('/'): continue
url = 'http://www.chron.com'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
a.extract()
desc = self.tag_to_string(div)
current_articles.append({'title':title, 'url':url,
'date':'', 'description':desc})
if current_section and current_articles:
feeds.append((current_section, current_articles))
return feeds return feeds

View File

@ -0,0 +1,50 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277305250(BasicNewsRecipe):
title = u'infzm - China Southern Weekly'
oldest_article = 14
max_articles_per_feed = 100
feeds = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'),
(u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', u'http://www.infzm.com/rss/economic.xml'),
(u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', u'http://www.infzm.com/rss/culture.xml'),
(u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', u'http://www.infzm.com/rss/lifestyle.xml'),
(u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', u'http://www.infzm.com/rss/opinion.xml')
]
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'http://www.infzm.com'
description = 'Chinese Weekly Tabloid'
category = 'News, China'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
#encoding = 'GB2312'
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg'
extra_css = '''
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {
margin-right: 8pt;
font-family: 'DroidFont', serif;}
.detailContent {font-family: 'DroidFont', serif, sans-serif}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'detailContent'}),
]
remove_tags = [
dict(name='div', attrs={'id':['detailTools', 'detailSideL', 'pageNum']}),
]
remove_tags_after = [
dict(name='div', attrs={'id':'pageNum'}),
]
def preprocess_html(self, soup):
for item in soup.findAll(color=True):
del item['font']
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
lrb.co.uk lrb.co.uk
''' '''
@ -8,32 +8,38 @@ lrb.co.uk
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LondonReviewOfBooks(BasicNewsRecipe): class LondonReviewOfBooks(BasicNewsRecipe):
title = u'London Review of Books' title = 'London Review of Books (free)'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
category = 'news, literature, England' category = 'news, literature, UK'
publisher = 'London Review of Books' publisher = 'LRB ltd.'
oldest_article = 7 oldest_article = 15
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en_GB' language = 'en_GB'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
publication_type = 'magazine'
masthead_url = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
,'language' : language ,'language' : language
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [dict(name='div' , attrs={'id' :'main'})] keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
remove_tags = [ remove_attributes = ['width','height']
dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
,dict(name='div' , attrs={'id' :['mainmenu','precontent','otherarticles'] })
,dict(name='span', attrs={'class':['inlineright','article-icons']})
,dict(name='ul' , attrs={'class':'article-controls'})
,dict(name='p' , attrs={'class':'meta-info' })
]
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')] feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.lrb.co.uk/')
cover_item = soup.find('p',attrs={'class':'cover'})
if cover_item:
cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
return cover_url

View File

@ -0,0 +1,75 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
lrb.co.uk
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LondonReviewOfBooksPayed(BasicNewsRecipe):
title = 'London Review of Books'
__author__ = 'Darko Miletic'
description = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
category = 'news, literature, UK'
publisher = 'LRB Ltd.'
max_articles_per_feed = 100
language = 'en_GB'
no_stylesheets = True
delay = 1
use_embedded_content = False
encoding = 'utf-8'
INDEX = 'http://www.lrb.co.uk'
LOGIN = INDEX + '/login'
masthead_url = INDEX + '/assets/images/lrb_logo_big.gif'
needs_subscription = True
publication_type = 'magazine'
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(nr=1)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('p',attrs={'class':'cover'})
lrbtitle = self.title
if cover_item:
self.cover_url = self.INDEX + cover_item.a.img['src']
content = self.INDEX + cover_item.a['href']
soup2 = self.index_to_soup(content)
sitem = soup2.find(attrs={'class':'article-list'})
lrbtitle = soup2.head.title.string
for item in sitem.findAll('a',attrs={'class':'title'}):
description = u''
title_prefix = u''
feed_link = item
if feed_link.has_key('href'):
url = self.INDEX + feed_link['href']
title = title_prefix + self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [(lrbtitle, articles)]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
remove_attributes = ['width','height']

View File

@ -7,18 +7,18 @@ class NYTimes(BasicNewsRecipe):
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal'
description = 'Canadian national newspaper' description = 'Canadian national newspaper'
timefmt = ' [%d %b, %Y]' timefmt = ' [%d %b, %Y]'
needs_subscription = False
language = 'en_CA' language = 'en_CA'
needs_subscription = False
no_stylesheets = True no_stylesheets = True
#remove_tags_before = dict(name='h1', attrs={'class':'heading'}) #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'}) remove_tags_after = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name='iframe'),
dict(name='div', attrs={'class':'story-tools'}), dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
#dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}), #dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
#dict(name='form', attrs={'onsubmit':''}), #dict(name='form', attrs={'onsubmit':''}),
#dict(name='table', attrs={'cellspacing':'0'}), dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
] ]
# def preprocess_html(self, soup): # def preprocess_html(self, soup):
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.nejm_get_index() soup = self.nejm_get_index()
div = soup.find(id='LegoText4') div = soup.find(id='npContentMain')
current_section = None current_section = None
current_articles = [] current_articles = []
@ -50,7 +50,7 @@ class NYTimes(BasicNewsRecipe):
current_section = self.tag_to_string(x) current_section = self.tag_to_string(x)
current_articles = [] current_articles = []
self.log('\tFound section:', current_section) self.log('\tFound section:', current_section)
if current_section is not None and x.name == 'h3': if current_section is not None and x.name == 'h5':
# Article found # Article found
title = self.tag_to_string(x) title = self.tag_to_string(x)
a = x.find('a', href=lambda x: x and 'story' in x) a = x.find('a', href=lambda x: x and 'story' in x)
@ -59,8 +59,8 @@ class NYTimes(BasicNewsRecipe):
url = a.get('href', False) url = a.get('href', False)
if not url or not title: if not url or not title:
continue continue
if url.startswith('story'): #if url.startswith('story'):
url = 'http://www.nationalpost.com/todays-paper/'+url url = 'http://www.nationalpost.com/todays-paper/'+url
self.log('\t\tFound article:', title) self.log('\t\tFound article:', title)
self.log('\t\t\t', url) self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url, current_articles.append({'title': title, 'url':url,
@ -70,28 +70,11 @@ class NYTimes(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'class':'triline'}) story = soup.find(name='div', attrs={'id':'npContentMain'})
page2_link = soup.find('p','pagenav') ##td = heading.findParent(name='td')
if page2_link: ##td.extract()
atag = page2_link.find('a',href=True)
if atag:
page2_url = atag['href']
if page2_url.startswith('story'):
page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
elif page2_url.startswith( '/todays-paper/story.html'):
page2_url = 'http://www.nationalpost.com/'+page2_url
page2_soup = self.index_to_soup(page2_url)
if page2_soup:
page2_content = page2_soup.find('div','story-content')
if page2_content:
full_story = BeautifulSoup('<div></div>')
full_story.insert(0,story)
full_story.insert(1,page2_content)
story = full_story
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>') soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body') body = soup.find(name='body')
body.insert(0, story) body.insert(0, story)
return soup return soup

View File

@ -32,15 +32,16 @@ class NewScientist(BasicNewsRecipe):
} }
preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')] preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})] keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]}) dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools']}) ,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial']})
,dict(name='p' , attrs={'class':['marker','infotext' ]}) ,dict(name='p' , attrs={'class':['marker','infotext' ]})
,dict(name='meta' , attrs={'name' :'description' }) ,dict(name='meta' , attrs={'name' :'description' })
,dict(name='a' , attrs={'rel' :'tag' })
] ]
remove_tags_after = dict(attrs={'class':'nbpcopy'}) remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
remove_attributes = ['height','width'] remove_attributes = ['height','width']
feeds = [ feeds = [

View File

@ -17,7 +17,7 @@ class NYTimes(BasicNewsRecipe):
title = 'New York Times Top Stories' title = 'New York Times Top Stories'
__author__ = 'GRiker' __author__ = 'GRiker'
language = 'en' language = 'en'
requires_version = (0, 7, 3) requires_version = (0, 7, 5)
description = 'Top Stories from the New York Times' description = 'Top Stories from the New York Times'
# List of sections typically included in Top Stories. Use a keyword from the # List of sections typically included in Top Stories. Use a keyword from the
@ -79,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule', 'doubleRule',
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module',
'icon enlargeThis', 'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'module box nav', 'module box nav',
@ -88,6 +89,7 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule', 'relatedSearchesModule',
'side_tool', 'side_tool',
'singleAd', 'singleAd',
'subNavigation clearfix',
'subNavigation tabContent active', 'subNavigation tabContent active',
'subNavigation tabContent active clearfix', 'subNavigation tabContent active clearfix',
]}), ]}),
@ -110,6 +112,7 @@ class NYTimes(BasicNewsRecipe):
'navigation', 'navigation',
'portfolioInline', 'portfolioInline',
'relatedArticles', 'relatedArticles',
'respond',
'side_search', 'side_search',
'side_index', 'side_index',
'side_tool', 'side_tool',
@ -458,8 +461,10 @@ class NYTimes(BasicNewsRecipe):
if mp_off >= 0: if mp_off >= 0:
c = c[:mp_off] c = c[:mp_off]
emTag.insert(0, c) emTag.insert(0, c)
hrTag = Tag(soup, 'hr') #hrTag = Tag(soup, 'hr')
#hrTag['style'] = "margin-top:0em;margin-bottom:0em" #hrTag['class'] = 'caption_divider'
hrTag = Tag(soup, 'div')
hrTag['class'] = 'divider'
emTag.insert(1, hrTag) emTag.insert(1, hrTag)
caption.replaceWith(emTag) caption.replaceWith(emTag)

View File

@ -13,14 +13,14 @@ Story
import re, string, time import re, string, time
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
class NYTimes(BasicNewsRecipe): class NYTimes(BasicNewsRecipe):
title = 'The New York Times' title = 'The New York Times'
__author__ = 'GRiker' __author__ = 'GRiker'
language = 'en' language = 'en'
requires_version = (0, 7, 3) requires_version = (0, 7, 5)
description = 'Daily news from the New York Times (subscription version)' description = 'Daily news from the New York Times (subscription version)'
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials', allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
@ -66,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule', 'doubleRule',
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module',
'icon enlargeThis', 'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'module box nav', 'module box nav',
@ -75,6 +76,7 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule', 'relatedSearchesModule',
'side_tool', 'side_tool',
'singleAd', 'singleAd',
'subNavigation clearfix',
'subNavigation tabContent active', 'subNavigation tabContent active',
'subNavigation tabContent active clearfix', 'subNavigation tabContent active clearfix',
]}), ]}),
@ -97,6 +99,7 @@ class NYTimes(BasicNewsRecipe):
'navigation', 'navigation',
'portfolioInline', 'portfolioInline',
'relatedArticles', 'relatedArticles',
'respond',
'side_search', 'side_search',
'side_index', 'side_index',
'side_tool', 'side_tool',
@ -333,7 +336,7 @@ class NYTimes(BasicNewsRecipe):
self.log(">>> No class:'columnGroup first' found <<<") self.log(">>> No class:'columnGroup first' found <<<")
# Change class="kicker" to <h3> # Change class="kicker" to <h3>
kicker = soup.find(True, {'class':'kicker'}) kicker = soup.find(True, {'class':'kicker'})
if kicker and kicker.contents[0]: if kicker and kicker.contents and kicker.contents[0]:
h3Tag = Tag(soup, "h3") h3Tag = Tag(soup, "h3")
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
use_alt=False))) use_alt=False)))
@ -348,8 +351,10 @@ class NYTimes(BasicNewsRecipe):
if mp_off >= 0: if mp_off >= 0:
c = c[:mp_off] c = c[:mp_off]
emTag.insert(0, c) emTag.insert(0, c)
hrTag = Tag(soup, 'hr') #hrTag = Tag(soup, 'hr')
#hrTag['style'] = "margin-top:0em;margin-bottom:0em" #hrTag['class'] = 'caption_divider'
hrTag = Tag(soup, 'div')
hrTag['class'] = 'divider'
emTag.insert(1, hrTag) emTag.insert(1, hrTag)
caption.replaceWith(emTag) caption.replaceWith(emTag)
@ -417,12 +422,11 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
def postprocess_book(self, oeb, opts, log) : def populate_article_metadata(self,article,soup,first):
print "\npostprocess_book()\n" '''
Extract author and description from article, add to article metadata
def extract_byline(href) : '''
# <meta name="byline" content= def extract_author(soup):
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find('meta',attrs={'name':['byl','CLMST']}) byline = soup.find('meta',attrs={'name':['byl','CLMST']})
if byline : if byline :
author = byline['content'] author = byline['content']
@ -432,50 +436,34 @@ class NYTimes(BasicNewsRecipe):
if byline: if byline:
author = byline.renderContents() author = byline.renderContents()
else: else:
print "couldn't find byline in %s" % href
print soup.prettify() print soup.prettify()
return None return None
# Kill commas - Kindle switches to '&' return author
return re.sub(',','',author)
def extract_description(href) : def extract_description(soup):
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
description = soup.find('meta',attrs={'name':['description','description ']}) description = soup.find('meta',attrs={'name':['description','description ']})
if description : if description :
# print repr(description['content'])
# print self.massageNCXText(description['content'])
return self.massageNCXText(description['content']) return self.massageNCXText(description['content'])
else: else:
# Take first paragraph of article # Take first paragraph of article
articleBody = soup.find('div',attrs={'id':'articleBody'}) articlebody = soup.find('div',attrs={'id':'articlebody'})
if not articleBody: if not articlebody:
# Try again with class instead of id # Try again with class instead of id
articleBody = soup.find('div',attrs={'class':'articleBody'}) articlebody = soup.find('div',attrs={'class':'articlebody'})
if not articleBody: if not articlebody:
print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:' print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
print soup.prettify() print soup.prettify()
return None return None
paras = articleBody.findAll('p') paras = articlebody.findAll('p')
for p in paras: for p in paras:
if p.renderContents() > '' : if p.renderContents() > '' :
return self.massageNCXText(self.tag_to_string(p,use_alt=False)) return self.massageNCXText(self.tag_to_string(p,use_alt=False))
return None return None
# Method entry point here if not article.author:
# Single section toc looks different than multi-section tocs article.author = extract_author(soup)
if oeb.toc.depth() == 2 : if not article.summary:
for article in oeb.toc : article.summary = article.text_summary = extract_description(soup)
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href).decode('utf-8')
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
def strip_anchors(self,soup): def strip_anchors(self,soup):
paras = soup.findAll(True) paras = soup.findAll(True)

View File

@ -28,7 +28,7 @@ class OldNewThing(BasicNewsRecipe):
} }
remove_attributes = ['width','height'] remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':['postsub','comment']})] keep_only_tags = [dict(attrs={'class':'full-post'})]
remove_tags = [dict(attrs={'class':['post-attributes','post-tags','post-actions']})]
feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')] feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]

View File

@ -0,0 +1,79 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
title = u'Singtao Daily - Canada'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'rty'
description = 'Toronto Canada Chinese Newspaper'
publisher = 'news.singtao.ca'
category = 'Chinese, News, Canada'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg'
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\
body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\
h1 {font-family: 'DroidFont', serif;}\
.articledescription {font-family: 'DroidFont', serif;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':['title','storybody']}),
dict(name='div', attrs={'class':'content'})
]
def parse_index(self):
feeds = []
for title, url in [
('Editorial',
'http://news.singtao.ca/toronto/editorial.html'),
('Toronto \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'),
'http://news.singtao.ca/toronto/city.html'),
('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'),
'http://news.singtao.ca/toronto/canada.html'),
('Entertainment',
'http://news.singtao.ca/toronto/entertainment.html'),
('World',
'http://news.singtao.ca/toronto/world.html'),
('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'),
'http://news.singtao.ca/toronto/finance.html'),
('Sports', 'http://news.singtao.ca/toronto/sports.html'),
]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'class': ['newslist paddingL10T10','newslist3 paddingL10T10']})
#date = div.find(attrs={'class': 'underlineBLK'})
current_articles = []
for li in div.findAll('li'):
a = li.find('a', href = True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://news.singtao.ca'+url
# self.log('\ \ Found article:', title)
# self.log('\ \ \ ', url)
current_articles.append({'title': title, 'url': url, 'description':''})
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278049615(BasicNewsRecipe):
title = u'Statesman'
pubisher = 'http://www.statesman.com/'
description = 'Austin Texas Daily Newspaper'
category = 'News, Austin, Texas'
__author__ = 'rty'
oldest_article = 3
max_articles_per_feed = 100
feeds = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
]
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
#temp_files = []
#articles_are_obfuscated = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
remove_tags = [
dict(name='div', attrs={'id':'cxArticleOptions'}),
]
keep_only_tags = [
dict(name='div', attrs={'class':'cxArticleHeader'}),
dict(name='div', attrs={'id':'cxArticleBodyText'}),
]

View File

@ -1,21 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class TimesOfIndia(BasicNewsRecipe): class TimesOfIndia(BasicNewsRecipe):
title = u'Times of India' title = u'Times of India'
language = 'en_IN' language = 'en_IN'
__author__ = 'Krittika Goyal' __author__ = 'Kovid Goyal'
oldest_article = 1 #days oldest_article = 1 #days
max_articles_per_feed = 25 max_articles_per_feed = 25
remove_stylesheets = True no_stylesheets = True
keep_only_tags = [dict(attrs={'class':'prttabl'})]
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(style=lambda x: x and 'float' in x)
dict(name='td', attrs={'class':'newptool1'}),
dict(name='div', attrs={'id':'newptool'}),
dict(name='ul', attrs={'class':'newtabcontent_tabs_new'}),
dict(name='b', text='Topics'),
dict(name='span', text=':'),
] ]
feeds = [ feeds = [
@ -42,13 +37,8 @@ class TimesOfIndia(BasicNewsRecipe):
('Most Read', ('Most Read',
'http://timesofindia.indiatimes.com/rssfeedmostread.cms') 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
] ]
def print_version(self, url):
return url + '?prtpage=1'
def preprocess_html(self, soup): def preprocess_html(self, soup):
heading = soup.find(name='h1', attrs={'class':'heading'})
td = heading.findParent(name='td')
td.extract()
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, td)
td.name = 'div'
return soup return soup

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277647803(BasicNewsRecipe):
title = u'Winnipeg Sun'
__author__ = 'rty'
__version__ = '1.0'
oldest_article = 2
pubisher = 'www.winnipegsun.com'
description = 'Winnipeg Newspaper'
category = 'News, Winnipeg, Canada'
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'UTF-8'
remove_javascript = True
use_embedded_content = False
language = 'en_CA'
feeds = [
(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
]
keep_only_tags = [
dict(name='div', attrs={'id':'article'}),
]
remove_tags = [
dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
dict(name='div', attrs={'id':'commentsBottom'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'bottomBox clear'})
]

View File

@ -15,22 +15,22 @@ class ZAOBAO(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
recursions = 1 recursions = 1
language = 'zh' language = 'zh'
encoding = 'gbk' encoding = 'gbk'
# multithreaded_fetch = True # multithreaded_fetch = True
keep_only_tags = [ keep_only_tags = [
dict(name='table', attrs={'cellpadding':'9'}), dict(name='td', attrs={'class':'text'}),
dict(name='table', attrs={'class':'cont'}),
dict(name='div', attrs={'id':'content'}),
dict(name='span', attrs={'class':'page'}), dict(name='span', attrs={'class':'page'}),
dict(name='div', attrs={'id':'content'})
] ]
remove_tags = [ remove_tags = [
dict(name='table', attrs={'cellspacing':'9'}), dict(name='table', attrs={'cellspacing':'9'}),
dict(name='fieldset'),
dict(name='div', attrs={'width':'30%'}),
] ]
extra_css = '\ extra_css = '\n\
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\
body{font-family: serif1, serif}\n\ body{font-family: serif1, serif}\n\
.article_description{font-family: serif1, serif}\n\ .article_description{font-family: serif1, serif}\n\
@ -41,7 +41,10 @@ class ZAOBAO(BasicNewsRecipe):
.article {font-size:medium}\n\ .article {font-size:medium}\n\
.navbar {font-size: small}\n\ .navbar {font-size: small}\n\
.feed{font-size: medium}\n\ .feed{font-size: medium}\n\
.small{font-size: small; padding-right: 8%}\n' .small{font-size: small;padding-right: 8pt}\n\
.text{padding-right: 8pt}\n\
p{text-indent: 0cm}\n\
div#content{padding-right: 10pt}'
INDEXES = [ INDEXES = [
(u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml') (u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml')
@ -51,27 +54,35 @@ class ZAOBAO(BasicNewsRecipe):
DESC_SENSE = u'\u8054\u5408\u65e9\u62a5\u7f51' DESC_SENSE = u'\u8054\u5408\u65e9\u62a5\u7f51'
feeds = [ feeds = [
(u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'), (u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'),
(u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'), (u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'), (u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'),
(u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'), (u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'),
(u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'), (u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'),
(u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'), (u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'),
(u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'), (u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'),
(u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'), (u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'),
(u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'), (u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'),
(u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'), (u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'),
(u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'), (u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'),
(u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'), (u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
] ]
def preprocess_html(self, soup):
for tag in soup.findAll(name='a'):
if tag.has_key('href'):
tag_url = tag['href']
if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
del tag['href']
return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']): for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div' tag.name = 'div'
return soup return soup
def parse_feeds(self): def parse_feeds(self):
self.log.debug('ZAOBAO overrided parse_feeds()') self.log_debug(_('ZAOBAO overrided parse_feeds()'))
parsed_feeds = BasicNewsRecipe.parse_feeds(self) parsed_feeds = BasicNewsRecipe.parse_feeds(self)
for id, obj in enumerate(self.INDEXES): for id, obj in enumerate(self.INDEXES):
@ -88,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
a_title = self.tag_to_string(a) a_title = self.tag_to_string(a)
date = '' date = ''
description = '' description = ''
self.log.debug('adding %s at %s'%(a_title,a_url)) self.log_debug(_('adding %s at %s')%(a_title,a_url))
articles.append({ articles.append({
'title':a_title, 'title':a_title,
'date':date, 'date':date,
@ -97,26 +108,25 @@ class ZAOBAO(BasicNewsRecipe):
}) })
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article, pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed, max_articles_per_feed=self.max_articles_per_feed)
log=self.log)
self.log.debug('adding %s to feed'%(title)) self.log_debug(_('adding %s to feed')%(title))
for feed in pfeeds: for feed in pfeeds:
self.log.debug('adding feed: %s'%(feed.title)) self.log_debug(_('adding feed: %s')%(feed.title))
feed.description = self.DESC_SENSE feed.description = self.DESC_SENSE
parsed_feeds.append(feed) parsed_feeds.append(feed)
for a, article in enumerate(feed): for a, article in enumerate(feed):
self.log.debug('added article %s from %s'%(article.title, article.url)) self.log_debug(_('added article %s from %s')%(article.title, article.url))
self.log.debug('added feed %s'%(feed.title)) self.log_debug(_('added feed %s')%(feed.title))
for i, feed in enumerate(parsed_feeds): for i, feed in enumerate(parsed_feeds):
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse() # workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
weired_encoding_detected = False weired_encoding_detected = False
if not isinstance(feed.description, unicode) and self.encoding and feed.description: if not isinstance(feed.description, unicode) and self.encoding and feed.description:
self.log.debug('Feed %s is not encoded correctly, manually replace it'%(feed.title)) self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
feed.description = feed.description.decode(self.encoding, 'replace') feed.description = feed.description.decode(self.encoding, 'replace')
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description: elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
self.log.debug('Feed %s is strangely encoded, manually redo all'%(feed.title)) self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace') feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
weired_encoding_detected = True weired_encoding_detected = True
@ -138,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace') article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
if article.title == "Untitled article": if article.title == "Untitled article":
self.log.debug('Removing empty article %s from %s'%(article.title, article.url)) self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
# remove the article # remove the article
feed.articles[a:a+1] = [] feed.articles[a:a+1] = []
return parsed_feeds return parsed_feeds

View File

@ -406,3 +406,8 @@ img, object, svg|svg {
width: auto; width: auto;
height: auto; height: auto;
} }
/* These are needed because ADE renders anchors the same as links */
a { text-decoration: inherit; color: inherit; cursor: inherit }
a[href] { text-decoration: underline; color: blue; cursor: pointer }

View File

@ -40,19 +40,20 @@ class LinuxFreeze(Command):
'/usr/bin/pdftohtml', '/usr/bin/pdftohtml',
'/usr/lib/libwmflite-0.2.so.7', '/usr/lib/libwmflite-0.2.so.7',
'/usr/lib/liblcms.so.1', '/usr/lib/liblcms.so.1',
'/usr/lib/liblcms2.so.2',
'/usr/lib/libstlport.so.5.1',
'/tmp/calibre-mount-helper', '/tmp/calibre-mount-helper',
'/usr/lib/libunrar.so', '/usr/lib/libunrar.so',
'/usr/lib/libchm.so.0', '/usr/lib/libchm.so.0',
'/usr/lib/libsqlite3.so.0', '/usr/lib/libsqlite3.so.0',
'/usr/lib/libsqlite3.so.0', '/usr/lib/libsqlite3.so.0',
'/usr/lib/libmng.so.1', '/usr/lib/libmng.so.1',
'/usr/lib/libpodofo.so.0.6.99', '/usr/lib/libpodofo.so.0.8.1',
'/lib/libz.so.1', '/lib/libz.so.1',
'/lib/libuuid.so.1', '/lib/libuuid.so.1',
'/usr/lib/libtiff.so.3', '/usr/lib/libtiff.so.5',
'/lib/libbz2.so.1', '/lib/libbz2.so.1',
'/usr/lib/libpoppler.so.5', '/usr/lib/libpoppler.so.6',
'/usr/lib/libpoppler-qt4.so.3',
'/usr/lib/libxml2.so.2', '/usr/lib/libxml2.so.2',
'/usr/lib/libopenjpeg.so.2', '/usr/lib/libopenjpeg.so.2',
'/usr/lib/libxslt.so.1', '/usr/lib/libxslt.so.1',
@ -61,10 +62,10 @@ class LinuxFreeze(Command):
'/usr/lib/libgthread-2.0.so.0', '/usr/lib/libgthread-2.0.so.0',
stdcpp, stdcpp,
ffi, ffi,
'/usr/lib/libpng12.so.0', '/usr/lib/libpng14.so.14',
'/usr/lib/libexslt.so.0', '/usr/lib/libexslt.so.0',
'/usr/lib/libMagickWand.so.2', '/usr/lib/libMagickWand.so.3',
'/usr/lib/libMagickCore.so.2', '/usr/lib/libMagickCore.so.3',
'/usr/lib/libgcrypt.so.11', '/usr/lib/libgcrypt.so.11',
'/usr/lib/libgpg-error.so.0', '/usr/lib/libgpg-error.so.0',
'/usr/lib/libphonon.so.4', '/usr/lib/libphonon.so.4',

View File

@ -265,6 +265,9 @@ class Py2App(object):
@flush @flush
def get_local_dependencies(self, path_to_lib): def get_local_dependencies(self, path_to_lib):
for x in self.get_dependencies(path_to_lib): for x in self.get_dependencies(path_to_lib):
if x.startswith('libpodofo'):
yield x, x
continue
for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/', for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
'/opt/local/lib/', '/opt/local/lib/',
'/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'): '/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
@ -397,7 +400,7 @@ class Py2App(object):
@flush @flush
def add_podofo(self): def add_podofo(self):
info('\nAdding PoDoFo') info('\nAdding PoDoFo')
pdf = join(SW, 'lib', 'libpodofo.0.6.99.dylib') pdf = join(SW, 'lib', 'libpodofo.0.8.1.dylib')
self.install_dylib(pdf) self.install_dylib(pdf)
@flush @flush

View File

@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
from setup.build_environment import msvc, MT, RC from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn from setup.installer.windows.wix import WixMixIn
QT_DIR = 'C:\\Qt\\4.6.0' QT_DIR = 'C:\\Qt\\4.6.3'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns'] QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb' LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'

View File

@ -162,9 +162,50 @@ SET(WANT_LIB64 FALSE)
SET(PODOFO_BUILD_SHARED TRUE) SET(PODOFO_BUILD_SHARED TRUE)
SET(PODOFO_BUILD_STATIC FALSE) SET(PODOFO_BUILD_STATIC FALSE)
cp build/podofo-0.7.0/build/src/Release/podofo.dll bin/ cp build/podofo/build/src/Release/podofo.dll bin/
cp build/podofo-0.7.0/build/src/Release/podofo.lib lib/ cp build/podofo/build/src/Release/podofo.lib lib/
cp build/podofo-0.7.0/build/src/Release/podofo.exp lib/ cp build/podofo/build/src/Release/podofo.exp lib/
cp build/podofo/build/podofo_config.h include/podofo/
cp -r build/podofo/src/* include/podofo/
The following patch was required to get it to compile:
Index: src/PdfImage.cpp
===================================================================
--- src/PdfImage.cpp (revision 1261)
+++ src/PdfImage.cpp (working copy)
@@ -627,7 +627,7 @@
long lLen = static_cast<long>(pInfo->rowbytes * height);
char* pBuffer = static_cast<char*>(malloc(sizeof(char) * lLen));
- png_bytep pRows[height];
+ png_bytepp pRows = static_cast<png_bytepp>(malloc(sizeof(png_bytep)*height));
for(int y=0; y<height; y++)
{
pRows[y] = reinterpret_cast<png_bytep>(pBuffer + (y * pInfo->rowbytes));
@@ -672,6 +672,7 @@
this->SetImageData( width, height, pInfo->bit_depth, &stream );
free(pBuffer);
+ free(pRows);
}
#endif // PODOFO_HAVE_PNG_LIB
Index: src/PdfFiltersPrivate.cpp
===================================================================
--- src/PdfFiltersPrivate.cpp (revision 1261)
+++ src/PdfFiltersPrivate.cpp (working copy)
@@ -1019,7 +1019,7 @@
/*
* Prepare for input from a memory buffer.
*/
-GLOBAL(void)
+void
jpeg_memory_src (j_decompress_ptr cinfo, const JOCTET * buffer, size_t bufsize)
{
my_src_ptr src;
ImageMagick ImageMagick
-------------- --------------

View File

@ -154,6 +154,10 @@
<CustomAction Id="LaunchApplication" BinaryKey="WixCA" <CustomAction Id="LaunchApplication" BinaryKey="WixCA"
DllEntry="WixShellExec" Impersonate="yes"/> DllEntry="WixShellExec" Impersonate="yes"/>
<InstallUISequence>
<FileCost Suppress="yes" />
</InstallUISequence>
</Product> </Product>
</Wix> </Wix>

View File

@ -30,6 +30,7 @@ mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs') mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml') mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg') mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('text/fb2+xml', '.fb2')
mimetypes.add_type('application/x-sony-bbeb', '.lrf') mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-sony-bbeb', '.lrx') mimetypes.add_type('application/x-sony-bbeb', '.lrx')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
@ -43,6 +44,7 @@ mimetypes.add_type('application/x-mobipocket-ebook', '.prc')
mimetypes.add_type('application/x-mobipocket-ebook', '.azw') mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
mimetypes.add_type('application/x-cbz', '.cbz') mimetypes.add_type('application/x-cbz', '.cbz')
mimetypes.add_type('application/x-cbr', '.cbr') mimetypes.add_type('application/x-cbr', '.cbr')
mimetypes.add_type('application/x-koboreader-ebook', '.kobo')
mimetypes.add_type('image/wmf', '.wmf') mimetypes.add_type('image/wmf', '.wmf')
guess_type = mimetypes.guess_type guess_type = mimetypes.guess_type
import cssutils import cssutils
@ -340,13 +342,6 @@ def detect_ncpus():
return ans return ans
def launch(path_or_url):
from PyQt4.QtCore import QUrl
from PyQt4.QtGui import QDesktopServices
if os.path.exists(path_or_url):
path_or_url = 'file:'+path_or_url
QDesktopServices.openUrl(QUrl(path_or_url))
relpath = os.path.relpath relpath = os.path.relpath
_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE) _spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
def english_sort(x, y): def english_sort(x, y):

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.7.4' __version__ = '0.7.8'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
from calibre.constants import numeric_version from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
# To archive plugins {{{
class HTML2ZIP(FileTypePlugin): class HTML2ZIP(FileTypePlugin):
name = 'HTML to ZIP' name = 'HTML to ZIP'
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -30,6 +31,7 @@ every time you add an HTML file to the library.\
with TemporaryDirectory('_plugin_html2zip') as tdir: with TemporaryDirectory('_plugin_html2zip') as tdir:
recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)] recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
if self.site_customization and self.site_customization.strip(): if self.site_customization and self.site_customization.strip():
recs.append(['input_encoding', self.site_customization.strip(), recs.append(['input_encoding', self.site_customization.strip(),
OptionRecommendation.HIGH]) OptionRecommendation.HIGH])
@ -81,7 +83,9 @@ class PML2PMLZ(FileTypePlugin):
return of.name return of.name
# }}}
# Metadata reader plugins {{{
class ComicMetadataReader(MetadataReaderPlugin): class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata' name = 'Read comic metadata'
@ -319,7 +323,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata from calibre.ebooks.metadata.zip import get_metadata
return get_metadata(stream) return get_metadata(stream)
# }}}
# Metadata writer plugins {{{
class EPUBMetadataWriter(MetadataWriterPlugin): class EPUBMetadataWriter(MetadataWriterPlugin):
@ -395,6 +401,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.topaz import set_metadata from calibre.ebooks.metadata.topaz import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
# }}}
from calibre.ebooks.comic.input import ComicInput from calibre.ebooks.comic.input import ComicInput
from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.epub.input import EPUBInput
@ -436,7 +443,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
from calibre.devices.cybook.driver import CYBOOK from calibre.devices.cybook.driver import CYBOOK
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \ from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \ POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
BOOQ, ELONEX, POCKETBOOK301 BOOQ, ELONEX, POCKETBOOK301, MENTOR
from calibre.devices.iliad.driver import ILIAD from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK from calibre.devices.jetbook.driver import JETBOOK
@ -444,7 +451,7 @@ from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
from calibre.devices.nook.driver import NOOK from calibre.devices.nook.driver import NOOK
from calibre.devices.prs505.driver import PRS505 from calibre.devices.prs505.driver import PRS505
from calibre.devices.android.driver import ANDROID, S60 from calibre.devices.android.driver import ANDROID, S60
from calibre.devices.nokia.driver import N770, N810, E71X from calibre.devices.nokia.driver import N770, N810, E71X, E52
from calibre.devices.eslick.driver import ESLICK, EBK52 from calibre.devices.eslick.driver import ESLICK, EBK52
from calibre.devices.nuut2.driver import NUUT2 from calibre.devices.nuut2.driver import NUUT2
from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.iriver.driver import IRIVER_STORY
@ -453,7 +460,7 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
from calibre.devices.edge.driver import EDGE from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, AVANT from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO from calibre.devices.kobo.driver import KOBO
@ -461,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.library.catalog import CSV_XML, EPUB_MOBI from calibre.library.catalog import CSV_XML, EPUB_MOBI
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI] LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
plugins += [ plugins += [
ComicInput, ComicInput,
EPUBInput, EPUBInput,
@ -499,7 +509,6 @@ plugins += [
] ]
# Order here matters. The first matched device is the one used. # Order here matters. The first matched device is the one used.
plugins += [ plugins += [
ITUNES,
HANLINV3, HANLINV3,
HANLINV5, HANLINV5,
BLACKBERRY, BLACKBERRY,
@ -520,6 +529,7 @@ plugins += [
S60, S60,
N770, N770,
E71X, E71X,
E52,
N810, N810,
COOL_ER, COOL_ER,
ESLICK, ESLICK,
@ -550,6 +560,10 @@ plugins += [
AZBOOKA, AZBOOKA,
FOLDER_DEVICE_FOR_CONFIG, FOLDER_DEVICE_FOR_CONFIG,
AVANT, AVANT,
MENTOR,
SWEEX,
PDNOVEL,
ITUNES,
] ]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]

View File

@ -36,7 +36,7 @@ class Plugin(_Plugin):
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name) self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num) self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
# Input profiles {{{
class InputProfile(Plugin): class InputProfile(Plugin):
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -218,6 +218,8 @@ input_profiles = [InputProfile, SonyReaderInput, SonyReader300Input,
input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower())) input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
# }}}
class OutputProfile(Plugin): class OutputProfile(Plugin):
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -237,11 +239,12 @@ class OutputProfile(Plugin):
# If True the MOBI renderer on the device supports MOBI indexing # If True the MOBI renderer on the device supports MOBI indexing
supports_mobi_indexing = False supports_mobi_indexing = False
# Device supports displaying a nested TOC
supports_nested_toc = True
# If True output should be optimized for a touchscreen interface # If True output should be optimized for a touchscreen interface
touchscreen = False touchscreen = False
touchscreen_news_css = ''
# A list of extra (beyond CSS 2.1) modules supported by the device
# Format is a cssutils profile dictionary (see iPad for example)
extra_css_modules = []
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
@ -256,8 +259,151 @@ class iPadOutput(OutputProfile):
screen_size = (768, 1024) screen_size = (768, 1024)
comic_screen_size = (768, 1024) comic_screen_size = (768, 1024)
dpi = 132.0 dpi = 132.0
supports_nested_toc = False extra_css_modules = [
{
'name':'webkit',
'props': { '-webkit-border-bottom-left-radius':'{length}',
'-webkit-border-bottom-right-radius':'{length}',
'-webkit-border-top-left-radius':'{length}',
'-webkit-border-top-right-radius':'{length}',
'-webkit-border-radius': r'{border-width}(\s+{border-width}){0,3}|inherit',
},
'macros': {'border-width': '{length}|medium|thick|thin'}
}
]
touchscreen = True touchscreen = True
# touchscreen_news_css {{{
touchscreen_news_css = u'''
/* hr used in articles */
.article_articles_list {
width:18%;
}
.article_link {
color: #593f29;
font-style: italic;
}
.article_next {
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
font-style: italic;
width:32%;
}
.article_prev {
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
font-style: italic;
width:32%;
}
.article_sections_list {
width:18%;
}
.articles_link {
font-weight: bold;
}
.sections_link {
font-weight: bold;
}
.caption_divider {
border:#ccc 1px solid;
}
.touchscreen_navbar {
background:#c3bab2;
border:#ccc 0px solid;
border-collapse:separate;
border-spacing:1px;
margin-left: 5%;
margin-right: 5%;
width: 90%;
-webkit-border-radius:4px;
}
.touchscreen_navbar td {
background:#fff;
font-family:Helvetica;
font-size:80%;
/* UI touchboxes use 8px padding */
padding: 6px;
text-align:center;
}
.touchscreen_navbar td a:link {
color: #593f29;
text-decoration: none;
}
/* Index formatting */
.publish_date {
text-align:center;
}
.divider {
border-bottom:1em solid white;
border-top:1px solid gray;
}
hr.caption_divider {
border-color:black;
border-style:solid;
border-width:1px;
}
/* Feed summary formatting */
.article_summary {
display:inline-block;
}
.feed {
font-family:sans-serif;
font-weight:bold;
font-size:larger;
}
.feed_link {
font-style: italic;
}
.feed_next {
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
font-style: italic;
width:40%;
}
.feed_prev {
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
font-style: italic;
width:40%;
}
.feed_title {
text-align: center;
font-size: 160%;
}
.feed_up {
font-weight: bold;
width:20%;
}
.summary_headline {
font-weight:bold;
text-align:left;
}
.summary_byline {
text-align:left;
font-family:monospace;
}
.summary_text {
text-align:left;
}
'''
# }}}
class SonyReaderOutput(OutputProfile): class SonyReaderOutput(OutputProfile):

View File

@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.fetch import MetadataSource from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \ from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
plugin_dir, OptionParser, prefs plugin_dir, OptionParser, prefs
from calibre.ebooks.epub.fix import ePubFixer
platform = 'linux' platform = 'linux'
@ -151,13 +152,13 @@ def reread_filetype_plugins():
def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'): def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
occasion = {'import':_on_import, 'preprocess':_on_preprocess, occasion_plugins = {'import':_on_import, 'preprocess':_on_preprocess,
'postprocess':_on_postprocess}[occasion] 'postprocess':_on_postprocess}[occasion]
customization = config['plugin_customization'] customization = config['plugin_customization']
if ft is None: if ft is None:
ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '') ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
nfp = path_to_file nfp = path_to_file
for plugin in occasion.get(ft, []): for plugin in occasion_plugins.get(ft, []):
if is_disabled(plugin): if is_disabled(plugin):
continue continue
plugin.site_customization = customization.get(plugin.name, '') plugin.site_customization = customization.get(plugin.name, '')
@ -194,7 +195,6 @@ def plugin_customization(plugin):
# }}} # }}}
# Input/Output profiles {{{ # Input/Output profiles {{{
def input_profiles(): def input_profiles():
for plugin in _initialized_plugins: for plugin in _initialized_plugins:
@ -444,6 +444,14 @@ def device_plugins(): # {{{
yield plugin yield plugin
# }}} # }}}
# epub fixers {{{
def epub_fixers():
for plugin in _initialized_plugins:
if isinstance(plugin, ePubFixer):
if not is_disabled(plugin):
if platform in plugin.supported_platforms:
yield plugin
# }}}
# Initialize plugins {{{ # Initialize plugins {{{

View File

@ -34,6 +34,12 @@ class ANDROID(USBMS):
# Acer # Acer
0x502 : { 0x3203 : [0x0100]}, 0x502 : { 0x3203 : [0x0100]},
# Dell
0x413c : { 0xb007 : [0x0100]},
# Eken?
0x040d : { 0x0851 : [0x0001]},
} }
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books'] EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -42,11 +48,12 @@ class ANDROID(USBMS):
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN) EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG'] 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD',
'PROD_GT-I9000'] 'GT-I9000', 'FILE-STOR_GADGET']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'PROD_GT-I9000_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD',
'FILE-STOR_GADGET']
OSX_MAIN_MEM = 'HTC Android Phone Media' OSX_MAIN_MEM = 'HTC Android Phone Media'
@ -63,6 +70,16 @@ class ANDROID(USBMS):
dirs = [x.strip() for x in dirs.split(',')] dirs = [x.strip() for x in dirs.split(',')]
self.EBOOK_DIR_MAIN = dirs self.EBOOK_DIR_MAIN = dirs
def get_main_ebook_dir(self, for_upload=False):
dirs = self.EBOOK_DIR_MAIN
if not for_upload:
def aldiko_tweak(x):
return 'eBooks' if x == 'eBooks/import' else x
if isinstance(dirs, basestring):
dirs = [dirs]
dirs = list(map(aldiko_tweak, dirs))
return dirs
class S60(USBMS): class S60(USBMS):
name = 'S60 driver' name = 'S60 driver'

File diff suppressed because it is too large Load Diff

View File

@ -186,6 +186,15 @@ class BOOQ(EB600):
WINDOWS_MAIN_MEM = 'EB600' WINDOWS_MAIN_MEM = 'EB600'
WINDOWS_CARD_A_MEM = 'EB600' WINDOWS_CARD_A_MEM = 'EB600'
class MENTOR(EB600):
name = 'Astak Mentor EB600'
gui_name = 'Mentor'
description = _('Communicate with the Astak Mentor EB600')
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'pdf', 'txt']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MENTOR'
class ELONEX(EB600): class ELONEX(EB600):
name = 'Elonex 600EB' name = 'Elonex 600EB'

View File

@ -66,7 +66,7 @@ class FOLDER_DEVICE(USBMS):
detected_device=None): detected_device=None):
pass pass
def disconnect_from_folder(self): def unmount_device(self):
self._main_prefix = '' self._main_prefix = ''
self.is_connected = False self.is_connected = False

View File

@ -106,9 +106,11 @@ class BOOX(HANLINV3):
description = _('Communicate with the BOOX eBook reader.') description = _('Communicate with the BOOX eBook reader.')
author = 'Jesus Manuel Marinho Valcarce' author = 'Jesus Manuel Marinho Valcarce'
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
METADATA_CACHE = '.metadata.calibre'
# Ordered list of supported formats # Ordered list of supported formats
FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm'] FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
'prc', 'chm', 'doc']
VENDOR_ID = [0x0525] VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5] PRODUCT_ID = [0xa4a5]

View File

@ -24,7 +24,7 @@ class N516(USBMS):
VENDOR_ID = [0x0525] VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5] PRODUCT_ID = [0xa4a5]
BCD = [0x323, 0x326] BCD = [0x323, 0x326, 0x327]
VENDOR_NAME = 'INGENIC' VENDOR_NAME = 'INGENIC'
WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE' WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'

View File

@ -59,7 +59,7 @@ class DevicePlugin(Plugin):
return cls.__name__ return cls.__name__
return cls.name return cls.name
# Device detection {{{
def test_bcd_windows(self, device_id, bcd): def test_bcd_windows(self, device_id, bcd):
if bcd is None or len(bcd) == 0: if bcd is None or len(bcd) == 0:
return True return True
@ -152,6 +152,7 @@ class DevicePlugin(Plugin):
return True, dev return True, dev
return False, None return False, None
# }}}
def reset(self, key='-1', log_packets=False, report_progress=None, def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None) : detected_device=None) :
@ -372,14 +373,12 @@ class DevicePlugin(Plugin):
@classmethod @classmethod
def settings(cls): def settings(cls):
''' '''
Should return an opts object. The opts object should have one attribute Should return an opts object. The opts object should have at least one attribute
`format_map` which is an ordered list of formats for the device. `format_map` which is an ordered list of formats for the device.
''' '''
raise NotImplementedError() raise NotImplementedError()
class BookList(list): class BookList(list):
''' '''
A list of books. Each Book object must have the fields: A list of books. Each Book object must have the fields:

View File

@ -213,7 +213,7 @@ class KINDLE_DX(KINDLE2):
PRODUCT_ID = [0x0003] PRODUCT_ID = [0x0003]
BCD = [0x0100] BCD = [0x0100]
class Bookmark(): class Bookmark(): # {{{
''' '''
A simple class fetching bookmark data A simple class fetching bookmark data
Kindle-specific Kindle-specific
@ -429,6 +429,7 @@ class Bookmark():
entries, = unpack('>I', data[9:13]) entries, = unpack('>I', data[9:13])
current_entry = 0 current_entry = 0
e_base = 0x0d e_base = 0x0d
self.pdf_page_offset = 0
while current_entry < entries: while current_entry < entries:
''' '''
location, = unpack('>I', data[e_base+2:e_base+6]) location, = unpack('>I', data[e_base+2:e_base+6])
@ -516,3 +517,6 @@ class Bookmark():
else: else:
print "unsupported bookmark_extension: %s" % self.bookmark_extension print "unsupported bookmark_extension: %s" % self.bookmark_extension
# }}}

View File

@ -0,0 +1,116 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com>'
'''
'''
import os
import re
import time
from calibre.ebooks.metadata import MetaInformation
from calibre.constants import filesystem_encoding, preferred_encoding
from calibre import isbytestring
class Book(MetaInformation):
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
'title_sort', 'comments', 'category', 'publisher', 'series',
'series_index', 'rating', 'isbn', 'language', 'application_id',
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
'uuid',
]
def __init__(self, prefix, lpath, title, authors, mime, date, ContentType, thumbnail_name, other=None):
MetaInformation.__init__(self, '')
self.device_collections = []
self._new_book = False
self.path = os.path.join(prefix, lpath)
if os.sep == '\\':
self.path = self.path.replace('/', '\\')
self.lpath = lpath.replace('\\', '/')
else:
self.lpath = lpath
self.title = title
if not authors:
self.authors = ['']
else:
self.authors = [authors]
self.mime = mime
try:
self.size = os.path.getsize(self.path)
except OSError:
self.size = 0
try:
if ContentType == '6':
self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
else:
self.datetime = time.gmtime(os.path.getctime(self.path))
except:
self.datetime = time.gmtime()
if thumbnail_name is not None:
self.thumbnail = ImageWrapper(thumbnail_name)
self.tags = []
if other:
self.smart_update(other)
def __eq__(self, other):
return self.path == getattr(other, 'path', None)
@dynamic_property
def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.lpath.rpartition('.')[0])
if match:
return int(match.group(1))
return None
return property(fget=fget, doc=doc)
@dynamic_property
def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
return property(doc=doc, fget=fget)
@dynamic_property
def thumbnail(self):
return None
def smart_update(self, other):
'''
Merge the information in C{other} into self. In case of conflicts, the information
in C{other} takes precedence, unless the information in C{other} is NULL.
'''
MetaInformation.smart_update(self, other)
for attr in self.BOOK_ATTRS:
if hasattr(other, attr):
val = getattr(other, attr, None)
setattr(self, attr, val)
def to_json(self):
json = {}
for attr in self.JSON_ATTRS:
val = getattr(self, attr)
if isbytestring(val):
enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
val = val.decode(enc, 'replace')
elif isinstance(val, (list, tuple)):
val = [x.decode(preferred_encoding, 'replace') if
isbytestring(x) else x for x in val]
json[attr] = val
return json
class ImageWrapper(object):
def __init__(self, image_path):
self.image_path = image_path

View File

@ -2,17 +2,26 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
import sqlite3 as sqlite
from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book
from calibre.devices.kobo.books import ImageWrapper
from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre import prints
class KOBO(USBMS): class KOBO(USBMS):
name = 'Kobo Reader Device Interface' name = 'Kobo Reader Device Interface'
gui_name = 'Kobo Reader' gui_name = 'Kobo Reader'
description = _('Communicate with the Kobo Reader') description = _('Communicate with the Kobo Reader')
author = 'Kovid Goyal' author = 'Timothy Legge and Kovid Goyal'
version = (1, 0, 4)
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
@ -29,3 +38,320 @@ class KOBO(USBMS):
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
def initialize(self):
USBMS.initialize(self)
self.book_class = Book
def books(self, oncard=None, end_session=True):
from calibre.ebooks.metadata.meta import path_to_ext
dummy_bl = BookList(None, None, None)
if oncard == 'carda' and not self._card_a_prefix:
self.report_progress(1.0, _('Getting list of books on device...'))
return dummy_bl
elif oncard == 'cardb' and not self._card_b_prefix:
self.report_progress(1.0, _('Getting list of books on device...'))
return dummy_bl
elif oncard and oncard != 'carda' and oncard != 'cardb':
self.report_progress(1.0, _('Getting list of books on device...'))
return dummy_bl
prefix = self._card_a_prefix if oncard == 'carda' else \
self._card_b_prefix if oncard == 'cardb' \
else self._main_prefix
# get the metadata cache
bl = self.booklist_class(oncard, prefix, self.settings)
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
# make a dict cache of paths so the lookup in the loop below is faster.
bl_cache = {}
for idx,b in enumerate(bl):
bl_cache[b.lpath] = idx
def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID):
changed = False
# if path_to_ext(path) in self.FORMATS:
try:
lpath = path.partition(self.normalize_path(prefix))[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
lpath = lpath.replace('\\', '/')
# print "LPATH: " + lpath
path = self.normalize_path(path)
# print "Normalized FileName: " + path
idx = bl_cache.get(lpath, None)
if idx is not None:
if ImageID is not None:
imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
#print "Image name Normalized: " + imagename
if imagename is not None:
bl[idx].thumbnail = ImageWrapper(imagename)
bl_cache[lpath] = None
if ContentType != '6':
if self.update_metadata_item(bl[idx]):
# print 'update_metadata_item returned true'
changed = True
else:
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
# print 'Update booklist'
if bl.add_book(book, replace_metadata=False):
changed = True
except: # Probably a path encoding error
import traceback
traceback.print_exc()
return changed
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
cursor = connection.cursor()
#query = 'select count(distinct volumeId) from volume_shortcovers'
#cursor.execute(query)
#for row in (cursor):
# numrows = row[0]
#cursor.close()
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID from content where BookID is Null'
cursor.execute (query)
changed = False
for i, row in enumerate(cursor):
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
path = self.path_from_contentid(row[3], row[5], oncard)
mime = mime_type_ext(path_to_ext(row[3]))
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
# print "shortbook: " + path
elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
if changed:
need_sync = True
cursor.close()
connection.close()
# Remove books that are no longer in the filesystem. Cache contains
# indices into the booklist if book not in filesystem, None otherwise
# Do the operation in reverse order so indices remain valid
for idx in sorted(bl_cache.itervalues(), reverse=True):
if idx is not None:
need_sync = True
del bl[idx]
#print "count found in cache: %d, count of files in metadata: %d, need_sync: %s" % \
# (len(bl_cache), len(bl), need_sync)
if need_sync: #self.count_found_in_bl != len(bl) or need_sync:
if oncard == 'cardb':
self.sync_booklists((None, None, bl))
elif oncard == 'carda':
self.sync_booklists((None, bl, None))
else:
self.sync_booklists((bl, None, None))
self.report_progress(1.0, _('Getting list of books on device...'))
return bl
def delete_via_sql(self, ContentID, ContentType):
# Delete Order:
# 1) shortcover_page
# 2) volume_shorcover
# 2) content
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
cursor = connection.cursor()
t = (ContentID,)
cursor.execute('select ImageID from content where ContentID = ?', t)
ImageID = None
for row in cursor:
# First get the ImageID to delete the images
ImageID = row[0]
cursor.close()
cursor = connection.cursor()
if ContentType == 6:
# Delete the shortcover_pages first
cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
#Delete the volume_shortcovers second
cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
# Delete the chapters associated with the book next
t = (ContentID,ContentID,)
cursor.execute('delete from content where BookID = ? or ContentID = ?', t)
connection.commit()
cursor.close()
if ImageID != None:
print "Error condition ImageID was not found"
print "You likely tried to delete a book that the kobo has not yet added to the database"
connection.close()
# If all this succeeds we need to delete the images files via the ImageID
return ImageID
def delete_images(self, ImageID):
if ImageID != None:
path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed',)
for ending in file_endings:
fpath = path + ending
fpath = self.normalize_path(fpath)
if os.path.exists(fpath):
# print 'Image File Exists: ' + fpath
os.unlink(fpath)
def delete_books(self, paths, end_session=True):
for i, path in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
path = self.normalize_path(path)
# print "Delete file normalized path: " + path
extension = os.path.splitext(path)[1]
if extension == '.kobo':
# Kobo books do not have book files. They do have some images though
#print "kobo book"
ContentType = 6
ContentID = self.contentid_from_path(path, ContentType)
elif extension == '.pdf' or extension == '.epub':
# print "ePub or pdf"
ContentType = 16
#print "Path: " + path
ContentID = self.contentid_from_path(path, ContentType)
# print "ContentID: " + ContentID
else: # if extension == '.html' or extension == '.txt':
ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
ContentID = self.contentid_from_path(path, ContentType)
ImageID = self.delete_via_sql(ContentID, ContentType)
#print " We would now delete the Images for" + ImageID
self.delete_images(ImageID)
if os.path.exists(path):
# Delete the ebook
# print "Delete the ebook: " + path
os.unlink(path)
filepath = os.path.splitext(path)[0]
for ext in self.DELETE_EXTS:
if os.path.exists(filepath + ext):
# print "Filename: " + filename
os.unlink(filepath + ext)
if os.path.exists(path + ext):
# print "Filename: " + filename
os.unlink(path + ext)
if self.SUPPORTS_SUB_DIRS:
try:
# print "removed"
os.removedirs(os.path.dirname(path))
except:
pass
self.report_progress(1.0, _('Removing books from device...'))
def remove_books_from_metadata(self, paths, booklists):
for i, path in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
for bl in booklists:
for book in bl:
#print "Book Path: " + book.path
if path.endswith(book.path):
#print " Remove: " + book.path
bl.remove_book(book)
self.report_progress(1.0, _('Removing books from device metadata listing...'))
def add_books_to_metadata(self, locations, metadata, booklists):
metadata = iter(metadata)
for i, location in enumerate(locations):
self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
info = metadata.next()
blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
# Extract the correct prefix from the pathname. To do this correctly,
# we must ensure that both the prefix and the path are normalized
# so that the comparison will work. Book's __init__ will fix up
# lpath, so we don't need to worry about that here.
path = self.normalize_path(location[0])
if self._main_prefix:
prefix = self._main_prefix if \
path.startswith(self.normalize_path(self._main_prefix)) else None
if not prefix and self._card_a_prefix:
prefix = self._card_a_prefix if \
path.startswith(self.normalize_path(self._card_a_prefix)) else None
if not prefix and self._card_b_prefix:
prefix = self._card_b_prefix if \
path.startswith(self.normalize_path(self._card_b_prefix)) else None
if prefix is None:
prints('in add_books_to_metadata. Prefix is None!', path,
self._main_prefix)
continue
#print "Add book to metatdata: "
#print "prefix: " + prefix
lpath = path.partition(prefix)[2]
if lpath.startswith('/') or lpath.startswith('\\'):
lpath = lpath[1:]
#print "path: " + lpath
#book = self.book_class(prefix, lpath, other=info)
lpath = self.normalize_path(prefix + lpath)
book = Book(prefix, lpath, '', '', '', '', '', '', other=info)
if book.size is None:
book.size = os.stat(self.normalize_path(path)).st_size
booklists[blist].add_book(book, replace_metadata=True)
self.report_progress(1.0, _('Adding books to device metadata listing...'))
def contentid_from_path(self, path, ContentType):
if ContentType == 6:
ContentID = os.path.splitext(path)[0]
# Remove the prefix on the file. it could be either
ContentID = ContentID.replace(self._main_prefix, '')
if self._card_a_prefix is not None:
ContentID = ContentID.replace(self._card_a_prefix, '')
elif ContentType == 999: # HTML Files
ContentID = path
ContentID = ContentID.replace(self._main_prefix, "/mnt/onboard/")
if self._card_a_prefix is not None:
ContentID = ContentID.replace(self._card_a_prefix, "/mnt/sd/")
else: # ContentType = 16
ContentID = path
ContentID = ContentID.replace(self._main_prefix, "file:///mnt/onboard/")
if self._card_a_prefix is not None:
ContentID = ContentID.replace(self._card_a_prefix, "file:///mnt/sd/")
ContentID = ContentID.replace("\\", '/')
return ContentID
def path_from_contentid(self, ContentID, ContentType, oncard):
path = ContentID
if oncard == 'cardb':
print 'path from_contentid cardb'
elif oncard == 'carda':
path = path.replace("file:///mnt/sd/", self._card_a_prefix)
# print "SD Card: " + filename
else:
if ContentType == "6":
# This is a hack as the kobo files do not exist
# but the path is required to make a unique id
# for calibre's reference
path = self._main_prefix + path + '.kobo'
# print "Path: " + path
else:
# if path.startswith("file:///mnt/onboard/"):
path = path.replace("file:///mnt/onboard/", self._main_prefix)
path = path.replace("/mnt/onboard/", self._main_prefix)
# print "Internal: " + filename
return path

View File

@ -49,3 +49,41 @@ class AVANT(USBMS):
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
class SWEEX(USBMS):
name = 'Sweex Device Interface'
gui_name = 'Sweex'
description = _('Communicate with the Sweex MM300')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5]
BCD = [0x0319]
VENDOR_NAME = 'SWEEX'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True
class PDNOVEL(USBMS):
name = 'Pandigital Novel device interface'
gui_name = 'PD Novel'
description = _('Communicate with the Pandigital Novel')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'linux', 'osx']
FORMATS = ['epub', 'pdf']
VENDOR_ID = [0x18d1]
PRODUCT_ID = [0xb004]
BCD = [0x224]
VENDOR_NAME = 'ANDROID'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '__UMS_COMPOSITE'
EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = False

View File

@ -67,3 +67,24 @@ class E71X(USBMS):
VENDOR_NAME = 'NOKIA' VENDOR_NAME = 'NOKIA'
WINDOWS_MAIN_MEM = 'S60' WINDOWS_MAIN_MEM = 'S60'
class E52(USBMS):
name = 'Nokia E52 device interface'
gui_name = 'Nokia E52'
description = _('Communicate with the Nokia E52')
author = 'David Ignjic'
supported_platforms = ['windows', 'linux', 'osx']
VENDOR_ID = [0x421]
PRODUCT_ID = [0x1CD]
BCD = [0x100]
FORMATS = ['mobi', 'prc']
EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = True
VENDOR_NAME = 'NOKIA'
WINDOWS_MAIN_MEM = 'S60'

View File

@ -99,7 +99,7 @@ class PRS505(USBMS):
if self._card_b_prefix is not None: if self._card_b_prefix is not None:
if not write_cache(self._card_b_prefix): if not write_cache(self._card_b_prefix):
self._card_b_prefix = None self._card_b_prefix = None
self.booklist_class.rebuild_collections = self.rebuild_collections
def get_device_information(self, end_session=True): def get_device_information(self, end_session=True):
return (self.gui_name, '', '', '') return (self.gui_name, '', '', '')
@ -145,7 +145,7 @@ class PRS505(USBMS):
blists[i] = booklists[i] blists[i] = booklists[i]
opts = self.settings() opts = self.settings()
if opts.extra_customization: if opts.extra_customization:
collections = [x.strip() for x in collections = [x.lower().strip() for x in
opts.extra_customization.split(',')] opts.extra_customization.split(',')]
else: else:
collections = [] collections = []
@ -156,4 +156,10 @@ class PRS505(USBMS):
USBMS.sync_booklists(self, booklists, end_session=end_session) USBMS.sync_booklists(self, booklists, end_session=end_session)
debug_print('PRS505: finished sync_booklists') debug_print('PRS505: finished sync_booklists')
def rebuild_collections(self, booklist, oncard):
debug_print('PRS505: started rebuild_collections on card', oncard)
c = self.initialize_XML_cache()
c.rebuild_collections(booklist, {'carda':1, 'cardb':2}.get(oncard, 0))
c.write()
debug_print('PRS505: finished rebuild_collections')

View File

@ -6,10 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, time import os, time
from pprint import pprint
from base64 import b64decode from base64 import b64decode
from uuid import uuid4 from uuid import uuid4
from lxml import etree from lxml import etree
from calibre import prints, guess_type from calibre import prints, guess_type
@ -62,8 +60,7 @@ class XMLCache(object):
def __init__(self, paths, prefixes, use_author_sort): def __init__(self, paths, prefixes, use_author_sort):
if DEBUG: if DEBUG:
debug_print('Building XMLCache...') debug_print('Building XMLCache...', paths)
pprint(paths)
self.paths = paths self.paths = paths
self.prefixes = prefixes self.prefixes = prefixes
self.use_author_sort = use_author_sort self.use_author_sort = use_author_sort
@ -147,39 +144,73 @@ class XMLCache(object):
if title+str(i) not in seen: if title+str(i) not in seen:
title = title+str(i) title = title+str(i)
playlist.set('title', title) playlist.set('title', title)
seen.add(title)
break break
else: else:
seen.add(title) seen.add(title)
def get_playlist_map(self): def build_id_playlist_map(self, bl_index):
debug_print('Start get_playlist_map') '''
ans = {} Return a map of the collections in books: {lpaths: [collection names]}
'''
debug_print('Start build_id_playlist_map')
self.ensure_unique_playlist_titles() self.ensure_unique_playlist_titles()
debug_print('after ensure_unique_playlist_titles')
self.prune_empty_playlists() self.prune_empty_playlists()
debug_print('get_playlist_map loop') debug_print('after cleaning playlists')
for i, root in self.record_roots.items(): root = self.record_roots[bl_index]
debug_print('get_playlist_map loop', i) if root is None:
id_map = self.build_id_map(root) return
ans[i] = [] id_map = self.build_id_map(root)
for playlist in root.xpath('//*[local-name()="playlist"]'): playlist_map = {}
items = [] # foreach playlist, get the lpaths for the ids in it, then add to dict
for item in playlist: for playlist in root.xpath('//*[local-name()="playlist"]'):
id_ = item.get('id', None) name = playlist.get('title')
record = id_map.get(id_, None) if name is None:
if record is not None: debug_print('build_id_playlist_map: unnamed playlist!')
items.append(record) continue
ans[i].append((playlist.get('title'), items)) for item in playlist:
debug_print('end get_playlist_map') # translate each id into its lpath
return ans id_ = item.get('id', None)
if id_ is None:
debug_print('build_id_playlist_map: id_ is None!')
continue
bk = id_map.get(id_, None)
if bk is None:
debug_print('build_id_playlist_map: book is None!', id_)
continue
lpath = bk.get('path', None)
if lpath is None:
debug_print('build_id_playlist_map: lpath is None!', id_)
continue
if lpath not in playlist_map:
playlist_map[lpath] = []
playlist_map[lpath].append(name)
debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
return playlist_map
def reset_existing_playlists_map(self):
'''
Call this method before calling get_or_create_playlist in the context of
a given job. Call it again after deleting any playlists. The current
implementation adds all new playlists before deleting any, so that
constraint is respected.
'''
self._playlist_to_playlist_id_map = {}
def get_or_create_playlist(self, bl_idx, title): def get_or_create_playlist(self, bl_idx, title):
# maintain a private map of playlists to their ids. Don't check if it
# exists, because reset_existing_playlist_map must be called before it
# is used to ensure that deleted playlists are taken into account
root = self.record_roots[bl_idx] root = self.record_roots[bl_idx]
for playlist in root.xpath('//*[local-name()="playlist"]'): if bl_idx not in self._playlist_to_playlist_id_map:
if playlist.get('title', None) == title: self._playlist_to_playlist_id_map[bl_idx] = {}
return playlist for playlist in root.xpath('//*[local-name()="playlist"]'):
if DEBUG: pl_title = playlist.get('title', None)
debug_print('Creating playlist:', title) if pl_title is not None:
self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
if title in self._playlist_to_playlist_id_map[bl_idx]:
return self._playlist_to_playlist_id_map[bl_idx][title]
debug_print('Creating playlist:', title)
ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx], ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
nsmap=root.nsmap, attrib={ nsmap=root.nsmap, attrib={
'uuid' : uuid(), 'uuid' : uuid(),
@ -188,12 +219,12 @@ class XMLCache(object):
'sourceid': '1' 'sourceid': '1'
}) })
root.append(ans) root.append(ans)
self._playlist_to_playlist_id_map[bl_idx][title] = ans
return ans return ans
# }}} # }}}
def fix_ids(self): # {{{ def fix_ids(self): # {{{
if DEBUG: debug_print('Running fix_ids()')
debug_print('Running fix_ids()')
def ensure_numeric_ids(root): def ensure_numeric_ids(root):
idmap = {} idmap = {}
@ -251,7 +282,9 @@ class XMLCache(object):
ensure_media_xml_base_ids(root) ensure_media_xml_base_ids(root)
idmap = ensure_numeric_ids(root) idmap = ensure_numeric_ids(root)
remap_playlist_references(root, idmap) if len(idmap) > 0:
debug_print('fix_ids: found some non-numeric ids')
remap_playlist_references(root, idmap)
if i == 0: if i == 0:
sourceid, playlist_sid = 1, 0 sourceid, playlist_sid = 1, 0
base = 0 base = 0
@ -276,38 +309,19 @@ class XMLCache(object):
def update_booklist(self, bl, bl_index): def update_booklist(self, bl, bl_index):
if bl_index not in self.record_roots: if bl_index not in self.record_roots:
return return
if DEBUG: debug_print('Updating JSON cache:', bl_index)
debug_print('Updating JSON cache:', bl_index) playlist_map = self.build_id_playlist_map(bl_index)
root = self.record_roots[bl_index] root = self.record_roots[bl_index]
pmap = self.get_playlist_map()[bl_index]
playlist_map = {}
for title, records in pmap:
for record in records:
path = record.get('path', None)
if path:
if path not in playlist_map:
playlist_map[path] = []
playlist_map[path].append(title)
lpath_map = self.build_lpath_map(root) lpath_map = self.build_lpath_map(root)
for book in bl: for book in bl:
record = lpath_map.get(book.lpath, None) record = lpath_map.get(book.lpath, None)
if record is not None: if record is not None:
title = record.get('title', None) title = record.get('title', None)
if title is not None and title != book.title: if title is not None and title != book.title:
if DEBUG: debug_print('Renaming title', book.title, 'to', title)
debug_print('Renaming title', book.title, 'to', title)
book.title = title book.title = title
# We shouldn't do this for Sonys, because the reader strips # Don't set the author, because the reader strips all but
# all but the first author. # the first author.
# authors = record.get('author', None)
# if authors is not None:
# authors = string_to_authors(authors)
# if authors != book.authors:
# if DEBUG:
# prints('Renaming authors', book.authors, 'to',
# authors)
# book.authors = authors
for thumbnail in record.xpath( for thumbnail in record.xpath(
'descendant::*[local-name()="thumbnail"]'): 'descendant::*[local-name()="thumbnail"]'):
for img in thumbnail.xpath( for img in thumbnail.xpath(
@ -318,47 +332,57 @@ class XMLCache(object):
book.thumbnail = raw book.thumbnail = raw
break break
break break
if book.lpath in playlist_map: book.device_collections = playlist_map.get(book.lpath, [])
tags = playlist_map[book.lpath]
book.device_collections = tags
debug_print('Finished updating JSON cache:', bl_index) debug_print('Finished updating JSON cache:', bl_index)
# }}} # }}}
# Update XML from JSON {{{ # Update XML from JSON {{{
def update(self, booklists, collections_attributes): def update(self, booklists, collections_attributes):
debug_print('Starting update XML from JSON') debug_print('Starting update', collections_attributes)
playlist_map = self.get_playlist_map()
for i, booklist in booklists.items(): for i, booklist in booklists.items():
if DEBUG: playlist_map = self.build_id_playlist_map(i)
debug_print('Updating XML Cache:', i) debug_print('Updating XML Cache:', i)
root = self.record_roots[i] root = self.record_roots[i]
lpath_map = self.build_lpath_map(root) lpath_map = self.build_lpath_map(root)
gtz_count = ltz_count = 0
for book in booklist: for book in booklist:
path = os.path.join(self.prefixes[i], *(book.lpath.split('/'))) path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
# record = self.book_by_lpath(book.lpath, root)
record = lpath_map.get(book.lpath, None) record = lpath_map.get(book.lpath, None)
if record is None: if record is None:
record = self.create_text_record(root, i, book.lpath) record = self.create_text_record(root, i, book.lpath)
self.update_text_record(record, book, path, i) (gtz_count, ltz_count) = self.update_text_record(record, book,
path, i, gtz_count, ltz_count)
bl_pmap = playlist_map[i] # Ensure the collections in the XML database are recorded for
self.update_playlists(i, root, booklist, bl_pmap, # this book
collections_attributes) if book.device_collections is None:
book.device_collections = []
book.device_collections = playlist_map.get(book.lpath, [])
debug_print('Timezone votes: %d GMT, %d LTZ'%(gtz_count, ltz_count))
self.update_playlists(i, root, booklist, collections_attributes)
# Update the device collections because update playlist could have added
# some new ones.
debug_print('In update/ Starting refresh of device_collections')
for i, booklist in booklists.items():
playlist_map = self.build_id_playlist_map(i)
for book in booklist:
book.device_collections = playlist_map.get(book.lpath, [])
self.fix_ids()
debug_print('Finished update')
def rebuild_collections(self, booklist, bl_index):
if bl_index not in self.record_roots:
return
root = self.record_roots[bl_index]
self.update_playlists(bl_index, root, booklist, [])
self.fix_ids() self.fix_ids()
# This is needed to update device_collections def update_playlists(self, bl_index, root, booklist, collections_attributes):
for i, booklist in booklists.items(): debug_print('Starting update_playlists', collections_attributes, bl_index)
self.update_booklist(booklist, i) self.reset_existing_playlists_map()
debug_print('Finished update XML from JSON')
def update_playlists(self, bl_index, root, booklist, playlist_map,
collections_attributes):
debug_print('Starting update_playlists')
collections = booklist.get_collections(collections_attributes) collections = booklist.get_collections(collections_attributes)
lpath_map = self.build_lpath_map(root) lpath_map = self.build_lpath_map(root)
debug_print('update_playlists: finished building maps')
for category, books in collections.items(): for category, books in collections.items():
records = [lpath_map.get(b.lpath, None) for b in books] records = [lpath_map.get(b.lpath, None) for b in books]
# Remove any books that were not found, although this # Remove any books that were not found, although this
@ -367,25 +391,34 @@ class XMLCache(object):
debug_print('WARNING: Some elements in the JSON cache were not' debug_print('WARNING: Some elements in the JSON cache were not'
' found in the XML cache') ' found in the XML cache')
records = [x for x in records if x is not None] records = [x for x in records if x is not None]
# Ensure each book has an ID.
for rec in records: for rec in records:
if rec.get('id', None) is None: if rec.get('id', None) is None:
rec.set('id', str(self.max_id(root)+1)) rec.set('id', str(self.max_id(root)+1))
ids = [x.get('id', None) for x in records] ids = [x.get('id', None) for x in records]
# Given that we set the ids, there shouldn't be any None's. But
# better to be safe...
if None in ids: if None in ids:
if DEBUG: debug_print('WARNING: Some <text> elements do not have ids')
debug_print('WARNING: Some <text> elements do not have ids') ids = [x for x in ids if x is not None]
ids = [x for x in ids if x is not None]
playlist = self.get_or_create_playlist(bl_index, category) playlist = self.get_or_create_playlist(bl_index, category)
# Get the books currently in the playlist. We will need them to be
# sure to put back any books that were manually added.
playlist_ids = [] playlist_ids = []
for item in playlist: for item in playlist:
id_ = item.get('id', None) id_ = item.get('id', None)
if id_ is not None: if id_ is not None:
playlist_ids.append(id_) playlist_ids.append(id_)
# Empty the playlist. We do this so that the playlist will have the
# order specified by get_collections
for item in list(playlist): for item in list(playlist):
playlist.remove(item) playlist.remove(item)
# Get a list of ids not known by get_collections
extra_ids = [x for x in playlist_ids if x not in ids] extra_ids = [x for x in playlist_ids if x not in ids]
# Rebuild the collection in the order specified by get_collections. Then
# add the ids that get_collections didn't know about.
for id_ in ids + extra_ids: for id_ in ids + extra_ids:
item = playlist.makeelement( item = playlist.makeelement(
'{%s}item'%self.namespaces[bl_index], '{%s}item'%self.namespaces[bl_index],
@ -423,11 +456,38 @@ class XMLCache(object):
root.append(ans) root.append(ans)
return ans return ans
def update_text_record(self, record, book, path, bl_index): def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count):
'''
Update the Sony database from the book. This is done if the timestamp in
the db differs from the timestamp on the file.
'''
# It seems that a Sony device can sometimes know what timezone it is in,
# and apparently converts the dates to GMT when it writes them to the
# db. Unfortunately, we can't tell when it does this, so we use a
# horrible heuristic. First, set dates only for new books, trying to
# avoid upsetting the sony. Use the timezone determined through the
# voting described next. Second, voting: if a book is not new, compare
# its Sony DB date against localtime and gmtime. Count the matches. When
# we must set a date, use the one with the most matches. Use localtime
# if the case of a tie, and hope it is right.
timestamp = os.path.getmtime(path) timestamp = os.path.getmtime(path)
date = strftime(timestamp) rec_date = record.get('date', None)
if date != record.get('date', None): if not getattr(book, '_new_book', False): # book is not new
if strftime(timestamp, zone=time.gmtime) == rec_date:
gtz_count += 1
elif strftime(timestamp, zone=time.localtime) == rec_date:
ltz_count += 1
else: # book is new. Set the time using the current votes
if ltz_count >= gtz_count:
tz = time.localtime
debug_print("Using localtime TZ for new book", book.lpath)
else:
tz = time.gmtime
debug_print("Using GMT TZ for new book", book.lpath)
date = strftime(timestamp, zone=tz)
record.set('date', date) record.set('date', date)
record.set('size', str(os.stat(path).st_size)) record.set('size', str(os.stat(path).st_size))
title = book.title if book.title else _('Unknown') title = book.title if book.title else _('Unknown')
record.set('title', title) record.set('title', title)
@ -452,6 +512,7 @@ class XMLCache(object):
if 'id' not in record.attrib: if 'id' not in record.attrib:
num = self.max_id(record.getroottree().getroot()) num = self.max_id(record.getroottree().getroot())
record.set('id', str(num+1)) record.set('id', str(num+1))
return (gtz_count, ltz_count)
# }}} # }}}
# Writing the XML files {{{ # Writing the XML files {{{
@ -544,10 +605,5 @@ class XMLCache(object):
break break
self.namespaces[i] = ns self.namespaces[i] = ns
# if DEBUG:
# debug_print('Found nsmaps:')
# pprint(self.nsmaps)
# debug_print('Found namespaces:')
# pprint(self.namespaces)
# }}} # }}}

View File

@ -98,6 +98,9 @@ class LinuxScanner(object):
def __call__(self): def __call__(self):
ans = set([]) ans = set([])
if not self.ok:
raise RuntimeError('DeviceScanner requires the /sys filesystem to work.')
for x in os.listdir(self.base): for x in os.listdir(self.base):
base = os.path.join(self.base, x) base = os.path.join(self.base, x)
ven = os.path.join(base, 'idVendor') ven = os.path.join(base, 'idVendor')
@ -145,8 +148,6 @@ class DeviceScanner(object):
def __init__(self, *args): def __init__(self, *args):
if isosx and osx_scanner is None: if isosx and osx_scanner is None:
raise RuntimeError('The Python extension usbobserver must be available on OS X.') raise RuntimeError('The Python extension usbobserver must be available on OS X.')
if islinux and not linux_scanner.ok:
raise RuntimeError('DeviceScanner requires the /sys filesystem to work.')
self.scanner = win_scanner if iswindows else osx_scanner if isosx else linux_scanner self.scanner = win_scanner if iswindows else osx_scanner if isosx else linux_scanner
self.devices = [] self.devices = []

View File

@ -11,10 +11,11 @@ from calibre.devices.mime import mime_type_ext
from calibre.devices.interface import BookList as _BookList from calibre.devices.interface import BookList as _BookList
from calibre.constants import filesystem_encoding, preferred_encoding from calibre.constants import filesystem_encoding, preferred_encoding
from calibre import isbytestring from calibre import isbytestring
from calibre.utils.config import prefs
class Book(MetaInformation): class Book(MetaInformation):
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections'] BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
JSON_ATTRS = [ JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort', 'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
@ -29,6 +30,7 @@ class Book(MetaInformation):
MetaInformation.__init__(self, '') MetaInformation.__init__(self, '')
self._new_book = False
self.device_collections = [] self.device_collections = []
self.path = os.path.join(prefix, lpath) self.path = os.path.join(prefix, lpath)
if os.sep == '\\': if os.sep == '\\':
@ -76,7 +78,7 @@ class Book(MetaInformation):
in C{other} takes precedence, unless the information in C{other} is NULL. in C{other} takes precedence, unless the information in C{other} is NULL.
''' '''
MetaInformation.smart_update(self, other) MetaInformation.smart_update(self, other, replace_tags=True)
for attr in self.BOOK_ATTRS: for attr in self.BOOK_ATTRS:
if hasattr(other, attr): if hasattr(other, attr):
@ -130,12 +132,37 @@ class CollectionsBookList(BookList):
return True return True
def get_collections(self, collection_attributes): def get_collections(self, collection_attributes):
from calibre.devices.usbms.driver import debug_print
debug_print('Starting get_collections:', prefs['manage_device_metadata'])
collections = {} collections = {}
series_categories = set([]) series_categories = set([])
collection_attributes = list(collection_attributes)+['device_collections'] # This map of sets is used to avoid linear searches when testing for
for attr in collection_attributes: # book equality
attr = attr.strip() collections_lpaths = {}
for book in self: for book in self:
# Make sure we can identify this book via the lpath
lpath = getattr(book, 'lpath', None)
if lpath is None:
continue
# Decide how we will build the collections. The default: leave the
# book in all existing collections. Do not add any new ones.
attrs = ['device_collections']
if getattr(book, '_new_book', False):
if prefs['manage_device_metadata'] == 'manual':
# Ensure that the book is in all the book's existing
# collections plus all metadata collections
attrs += collection_attributes
else:
# For new books, both 'on_send' and 'on_connect' do the same
# thing. The book's existing collections are ignored. Put
# the book in collections defined by its metadata.
attrs = collection_attributes
elif prefs['manage_device_metadata'] == 'on_connect':
# For existing books, modify the collections only if the user
# specified 'on_connect'
attrs = collection_attributes
for attr in attrs:
attr = attr.strip()
val = getattr(book, attr, None) val = getattr(book, attr, None)
if not val: continue if not val: continue
if isbytestring(val): if isbytestring(val):
@ -150,11 +177,12 @@ class CollectionsBookList(BookList):
continue continue
if category not in collections: if category not in collections:
collections[category] = [] collections[category] = []
if book not in collections[category]: collections_lpaths[category] = set()
if lpath not in collections_lpaths[category]:
collections_lpaths[category].add(lpath)
collections[category].append(book) collections[category].append(book)
if attr == 'series': if attr == 'series':
series_categories.add(category) series_categories.add(category)
# Sort collections # Sort collections
for category, books in collections.items(): for category, books in collections.items():
def tgetter(x): def tgetter(x):
@ -167,3 +195,15 @@ class CollectionsBookList(BookList):
books.sort(cmp=lambda x,y:cmp(getter(x), getter(y))) books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
return collections return collections
def rebuild_collections(self, booklist, oncard):
'''
For each book in the booklist for the card oncard, remove it from all
its current collections, then add it to the collections specified in
device_collections.
oncard is None for the main memory, carda for card A, cardb for card B,
etc.
booklist is the object created by the :method:`books` call above.
'''
pass

View File

@ -78,9 +78,6 @@ class Device(DeviceConfig, DevicePlugin):
STORAGE_CARD_VOLUME_LABEL = '' STORAGE_CARD_VOLUME_LABEL = ''
STORAGE_CARD2_VOLUME_LABEL = None STORAGE_CARD2_VOLUME_LABEL = None
SUPPORTS_SUB_DIRS = False
MUST_READ_METADATA = False
SUPPORTS_USE_AUTHOR_SORT = False
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD_A = '' EBOOK_DIR_CARD_A = ''
@ -735,7 +732,7 @@ class Device(DeviceConfig, DevicePlugin):
traceback.print_exc() traceback.print_exc()
self._main_prefix = self._card_a_prefix = self._card_b_prefix = None self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
def get_main_ebook_dir(self): def get_main_ebook_dir(self, for_upload=False):
return self.EBOOK_DIR_MAIN return self.EBOOK_DIR_MAIN
def _sanity_check(self, on_card, files): def _sanity_check(self, on_card, files):
@ -753,7 +750,7 @@ class Device(DeviceConfig, DevicePlugin):
path = os.path.join(self._card_b_prefix, path = os.path.join(self._card_b_prefix,
*(self.EBOOK_DIR_CARD_B.split('/'))) *(self.EBOOK_DIR_CARD_B.split('/')))
else: else:
candidates = self.get_main_ebook_dir() candidates = self.get_main_ebook_dir(for_upload=True)
if isinstance(candidates, basestring): if isinstance(candidates, basestring):
candidates = [candidates] candidates = [candidates]
candidates = [ candidates = [

View File

@ -13,6 +13,10 @@ class DeviceConfig(object):
EXTRA_CUSTOMIZATION_MESSAGE = None EXTRA_CUSTOMIZATION_MESSAGE = None
EXTRA_CUSTOMIZATION_DEFAULT = None EXTRA_CUSTOMIZATION_DEFAULT = None
SUPPORTS_SUB_DIRS = False
MUST_READ_METADATA = False
SUPPORTS_USE_AUTHOR_SORT = False
#: If None the default is used #: If None the default is used
SAVE_TEMPLATE = None SAVE_TEMPLATE = None
@ -23,9 +27,14 @@ class DeviceConfig(object):
config().parse().send_template config().parse().send_template
@classmethod @classmethod
def _config(cls): def _config_base_name(cls):
klass = cls if isinstance(cls, type) else cls.__class__ klass = cls if isinstance(cls, type) else cls.__class__
c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers')) return klass.__name__
@classmethod
def _config(cls):
name = cls._config_base_name()
c = Config('device_drivers_%s' % name, _('settings for device drivers'))
c.add_opt('format_map', default=cls.FORMATS, c.add_opt('format_map', default=cls.FORMATS,
help=_('Ordered list of formats the device will accept')) help=_('Ordered list of formats the device will accept'))
c.add_opt('use_subdirs', default=True, c.add_opt('use_subdirs', default=True,

View File

@ -58,7 +58,7 @@ class USBMS(CLI, Device):
debug_print ('USBMS: Fetching list of books from device. oncard=', oncard) debug_print ('USBMS: Fetching list of books from device. oncard=', oncard)
dummy_bl = BookList(None, None, None) dummy_bl = self.booklist_class(None, None, None)
if oncard == 'carda' and not self._card_a_prefix: if oncard == 'carda' and not self._card_a_prefix:
self.report_progress(1.0, _('Getting list of books on device...')) self.report_progress(1.0, _('Getting list of books on device...'))
@ -78,6 +78,8 @@ class USBMS(CLI, Device):
self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \ self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \
self.get_main_ebook_dir() self.get_main_ebook_dir()
debug_print ('USBMS: dirs are:', prefix, ebook_dirs)
# get the metadata cache # get the metadata cache
bl = self.booklist_class(oncard, prefix, self.settings) bl = self.booklist_class(oncard, prefix, self.settings)
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE) need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
@ -233,6 +235,7 @@ class USBMS(CLI, Device):
book = self.book_class(prefix, lpath, other=info) book = self.book_class(prefix, lpath, other=info)
if book.size is None: if book.size is None:
book.size = os.stat(self.normalize_path(path)).st_size book.size = os.stat(self.normalize_path(path)).st_size
book._new_book = True # Must be before add_book
booklists[blist].add_book(book, replace_metadata=True) booklists[blist].add_book(book, replace_metadata=True)
self.report_progress(1.0, _('Adding books to device metadata listing...')) self.report_progress(1.0, _('Adding books to device metadata listing...'))
debug_print('USBMS: finished adding metadata') debug_print('USBMS: finished adding metadata')
@ -273,6 +276,9 @@ class USBMS(CLI, Device):
self.report_progress(1.0, _('Removing books from device metadata listing...')) self.report_progress(1.0, _('Removing books from device metadata listing...'))
debug_print('USBMS: finished removing metadata for %d books'%(len(paths))) debug_print('USBMS: finished removing metadata for %d books'%(len(paths)))
# If you override this method and you use book._new_book, then you must
# complete the processing before you call this method. The flag is cleared
# at the end just before the return
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
debug_print('USBMS: starting sync_booklists') debug_print('USBMS: starting sync_booklists')
@ -286,11 +292,18 @@ class USBMS(CLI, Device):
js = [item.to_json() for item in booklists[listid] if js = [item.to_json() for item in booklists[listid] if
hasattr(item, 'to_json')] hasattr(item, 'to_json')]
with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f: with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
json.dump(js, f, indent=2, encoding='utf-8') f.write(json.dumps(js, indent=2, encoding='utf-8'))
write_prefix(self._main_prefix, 0) write_prefix(self._main_prefix, 0)
write_prefix(self._card_a_prefix, 1) write_prefix(self._card_a_prefix, 1)
write_prefix(self._card_b_prefix, 2) write_prefix(self._card_b_prefix, 2)
# Clear the _new_book indication, as we are supposed to be done with
# adding books at this point
for blist in booklists:
if blist is not None:
for book in blist:
book._new_book = False
self.report_progress(1.0, _('Sending metadata to device...')) self.report_progress(1.0, _('Sending metadata to device...'))
debug_print('USBMS: finished sync_booklists') debug_print('USBMS: finished sync_booklists')

View File

@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin):
log.debug('stream.name=%s' % stream.name) log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log) mainname = self._chmtohtml(tdir, chm_name, no_images, log)
mainpath = os.path.join(tdir, mainname) mainpath = os.path.join(tdir, mainname)
#raw_input()
metadata = get_metadata_from_reader(self._chm_reader) metadata = get_metadata_from_reader(self._chm_reader)
@ -92,7 +91,7 @@ class CHMInput(InputFormatPlugin):
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
if not metadata.language: if not metadata.language:
oeb.logger.warn(u'Language not specified') oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang()) metadata.add('language', get_lang().replace('_', '-'))
if not metadata.creator: if not metadata.creator:
oeb.logger.warn('Creator not specified') oeb.logger.warn('Creator not specified')
metadata.add('creator', _('Unknown')) metadata.add('creator', _('Unknown'))
@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin):
log.debug('Found %d section nodes' % len(chapters)) log.debug('Found %d section nodes' % len(chapters))
htmlpath = os.path.splitext(hhcpath)[0] + ".html" htmlpath = os.path.splitext(hhcpath)[0] + ".html"
f = open(htmlpath, 'wb') f = open(htmlpath, 'wb')
f.write('<html><head><meta http-equiv="Content-type"'
' content="text/html;charset=UTF-8" /></head><body>\n')
if chapters: if chapters:
f.write('<html><head><meta http-equiv="Content-type"'
' content="text/html;charset=UTF-8" /></head><body>\n')
path0 = chapters[0][1] path0 = chapters[0][1]
subpath = os.path.dirname(path0) subpath = os.path.dirname(path0)
@ -158,7 +156,9 @@ class CHMInput(InputFormatPlugin):
url = url.encode('utf-8') url = url.encode('utf-8')
f.write(url) f.write(url)
f.write("</body></html>") f.write("</body></html>")
else:
f.write(hhcdata)
f.close() f.close()
return htmlpath return htmlpath

View File

@ -8,7 +8,7 @@ import os, re
from mimetypes import guess_type as guess_mimetype from mimetypes import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
from calibre.constants import iswindows from calibre.constants import iswindows, filesystem_encoding
from calibre.utils.chm.chm import CHMFile from calibre.utils.chm.chm import CHMFile
from calibre.utils.chm.chmlib import ( from calibre.utils.chm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -78,6 +78,8 @@ class CHMError(Exception):
class CHMReader(CHMFile): class CHMReader(CHMFile):
def __init__(self, input, log): def __init__(self, input, log):
CHMFile.__init__(self) CHMFile.__init__(self)
if isinstance(input, unicode):
input = input.encode(filesystem_encoding)
if not self.LoadCHM(input): if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,)) raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log self.log = log
@ -91,7 +93,6 @@ class CHMReader(CHMFile):
self.root, ext = os.path.splitext(self.topics.lstrip('/')) self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc" self.hhc_path = self.root + ".hhc"
def _parse_toc(self, ul, basedir=os.getcwdu()): def _parse_toc(self, ul, basedir=os.getcwdu()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='') toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1 self._playorder += 1
@ -152,6 +153,8 @@ class CHMReader(CHMFile):
if f.lower() == self.hhc_path.lower(): if f.lower() == self.hhc_path.lower():
self.hhc_path = f self.hhc_path = f
break break
if self.hhc_path not in files and files:
self.hhc_path = files[0]
def _reformat(self, data): def _reformat(self, data):
try: try:
@ -159,7 +162,7 @@ class CHMReader(CHMFile):
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
except ValueError: except ValueError:
# hit some strange encoding problems... # hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :(" self.log.exception("Unable to parse html for cleaning, leaving it")
return data return data
# nuke javascript... # nuke javascript...
[s.extract() for s in soup('script')] [s.extract() for s in soup('script')]

View File

@ -151,6 +151,7 @@ cpalmdoc_do_compress(buffer *b, char *output) {
for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j]; for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
} }
} }
PyMem_Free(temp.data);
return output - head; return output - head;
} }
@ -168,7 +169,9 @@ cpalmdoc_compress(PyObject *self, PyObject *args) {
for (j = 0; j < input_len; j++) for (j = 0; j < input_len; j++)
b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
b.len = input_len; b.len = input_len;
output = (char *)PyMem_Malloc(sizeof(char) * b.len); // Make the output buffer larger than the input as sometimes
// compression results in a larger block
output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
if (output == NULL) return PyErr_NoMemory(); if (output == NULL) return PyErr_NoMemory();
j = cpalmdoc_do_compress(&b, output); j = cpalmdoc_do_compress(&b, output);
if ( j == 0) return PyErr_NoMemory(); if ( j == 0) return PyErr_NoMemory();

View File

@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE) _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
LIGATURES = { LIGATURES = {
u'\u00c6': u'AE', # u'\u00c6': u'AE',
u'\u00e6': u'ae', # u'\u00e6': u'ae',
u'\u0152': u'OE', # u'\u0152': u'OE',
u'\u0153': u'oe', # u'\u0153': u'oe',
u'\u0132': u'IJ', # u'\u0132': u'IJ',
u'\u0133': u'ij', # u'\u0133': u'ij',
u'\u1D6B': u'ue', # u'\u1D6B': u'ue',
u'\uFB00': u'ff', u'\uFB00': u'ff',
u'\uFB01': u'fi', u'\uFB01': u'fi',
u'\uFB02': u'fl', u'\uFB02': u'fl',
@ -107,9 +107,21 @@ class CSSPreProcessor(object):
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}') PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
def __call__(self, data): def __call__(self, data, add_namespace=False):
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
data = self.PAGE_PAT.sub('', data) data = self.PAGE_PAT.sub('', data)
return data if not add_namespace:
return data
ans, namespaced = [], False
for line in data.splitlines():
ll = line.lstrip()
if not (namespaced or ll.startswith('@import') or
ll.startswith('@charset')):
ans.append(XHTML_CSS_NAMESPACE.strip())
namespaced = True
ans.append(line)
return u'\n'.join(ans)
class HTMLPreProcessor(object): class HTMLPreProcessor(object):
@ -268,7 +280,7 @@ class HTMLPreProcessor(object):
if getattr(self.extra_opts, 'remove_footer', None): if getattr(self.extra_opts, 'remove_footer', None):
try: try:
rules.insert(0 rules.insert(0,
(re.compile(self.extra_opts.footer_regex), lambda match : '') (re.compile(self.extra_opts.footer_regex), lambda match : '')
) )
except: except:

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize import Plugin
class InvalidEpub(ValueError):
pass
class ePubFixer(Plugin):
supported_platforms = ['windows', 'osx', 'linux']
author = 'Kovid Goyal'
type = _('ePub Fixer')
can_be_disabled = True
# API that subclasses must implement {{{
@property
def short_description(self):
raise NotImplementedError
@property
def long_description(self):
raise NotImplementedError
@property
def fix_name(self):
raise NotImplementedError
@property
def options(self):
'''
Return a list of 4-tuples
(option_name, type, default, help_text)
type is one of 'bool', 'int', 'string'
'''
return []
def run(self, container, opts, log, fix=False):
raise NotImplementedError
# }}}
def add_options_to_parser(self, parser):
parser.add_option('--' + self.fix_name.replace('_', '-'),
help=self.long_description, action='store_true', default=False)
for option in self.options:
action = 'store'
if option[1] == 'bool':
action = 'store_true'
kwargs = {'action': action, 'default':option[2], 'help':option[3]}
if option[1] != 'bool':
kwargs['type'] = option[1]
parser.add_option('--'+option[0].replace('_', '-'), **kwargs)

View File

@ -0,0 +1,200 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, posixpath, urllib, sys, re
from lxml import etree
from calibre.ebooks.epub.fix import InvalidEpub
from calibre import guess_type, prepare_string_for_xml
from calibre.ebooks.chardet import xml_to_unicode
from calibre.constants import iswindows
from calibre.utils.zipfile import ZipFile, ZIP_STORED
exists, join = os.path.exists, os.path.join
OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
OPF_NS = 'http://www.idpf.org/2007/opf'
class Container(object):
META_INF = {
'container.xml' : True,
'manifest.xml' : False,
'encryption.xml' : False,
'metadata.xml' : False,
'signatures.xml' : False,
'rights.xml' : False,
}
def __init__(self, path, log):
self.root = os.path.abspath(path)
self.log = log
self.dirtied = set([])
self.cache = {}
self.mime_map = {}
if exists(join(self.root, 'mimetype')):
os.remove(join(self.root, 'mimetype'))
container_path = join(self.root, 'META-INF', 'container.xml')
if not exists(container_path):
raise InvalidEpub('No META-INF/container.xml in epub')
self.container = etree.fromstring(open(container_path, 'rb').read())
opf_files = self.container.xpath((
r'child::ocf:rootfiles/ocf:rootfile'
'[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
), namespaces={'ocf':OCF_NS}
)
if not opf_files:
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
opf_path = os.path.join(self.root,
*opf_files[0].get('full-path').split('/'))
if not exists(opf_path):
raise InvalidEpub('OPF file does not exist at location pointed to'
' by META-INF/container.xml')
# Map of relative paths with / separators to absolute
# paths on filesystem with os separators
self.name_map = {}
for dirpath, dirnames, filenames in os.walk(self.root):
for f in filenames:
path = join(dirpath, f)
name = os.path.relpath(path, self.root).replace(os.sep, '/')
self.name_map[name] = path
if path == opf_path:
self.opf_name = name
self.mime_map[name] = guess_type('a.opf')[0]
for item in self.opf.xpath(
'//opf:manifest/opf:item[@href and @media-type]',
namespaces={'opf':OPF_NS}):
href = item.get('href')
self.mime_map[self.href_to_name(href,
posixpath.dirname(self.opf_name))] = item.get('media-type')
def manifest_worthy_names(self):
for name in self.name_map:
if name.endswith('.opf'): continue
if name.startswith('META-INF') and \
posixpath.basename(name) in self.META_INF: continue
yield name
def delete_name(self, name):
self.mime_map.pop(name, None)
path = self.name_map[name]
os.remove(path)
self.name_map.pop(name)
def manifest_item_for_name(self, name):
href = self.name_to_href(name,
posixpath.dirname(self.opf_name))
q = prepare_string_for_xml(href, attribute=True)
existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
namespaces={'opf':OPF_NS})
if not existing:
return None
return existing[0]
def add_name_to_manifest(self, name):
item = self.manifest_item_for_name(name)
if item is not None:
return
manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
id=self.generate_manifest_id())
mt = guess_type(posixpath.basename(name))[0]
if not mt:
mt = 'application/octest-stream'
item.set('media-type', mt)
manifest.append(item)
def generate_manifest_id(self):
items = self.opf.xpath('//opf:manifest/opf:item[@id]',
namespaces={'opf':OPF_NS})
ids = set([x.get('id') for x in items])
for x in xrange(sys.maxint):
c = 'id%d'%x
if c not in ids:
return c
@property
def opf(self):
return self.get(self.opf_name)
def href_to_name(self, href, base=''):
href = urllib.unquote(href.partition('#')[0])
name = href
if base:
name = posixpath.join(base, href)
return name
def name_to_href(self, name, base):
if not base:
return name
return posixpath.relpath(name, base)
def get_raw(self, name):
path = self.name_map[name]
return open(path, 'rb').read()
def get(self, name):
if name in self.cache:
return self.cache[name]
raw = self.get_raw(name)
if name in self.mime_map:
raw = self._parse(raw, self.mime_map[name])
self.cache[name] = raw
return raw
def set(self, name, val):
self.cache[name] = val
self.dirtied.add(name)
def _parse(self, raw, mimetype):
mt = mimetype.lower()
if mt.endswith('+xml'):
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
raw = xml_to_unicode(raw,
strip_encoding_pats=True, assume_utf8=True,
resolve_entities=True)[0].strip()
idx = raw.find('<html')
if idx == -1:
idx = raw.find('<HTML')
if idx > -1:
pre = raw[:idx]
raw = raw[idx:]
if '<!DOCTYPE' in pre:
user_entities = {}
for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
val = match.group(2)
if val.startswith('"') and val.endswith('"'):
val = val[1:-1]
user_entities[match.group(1)] = val
if user_entities:
pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
return etree.fromstring(raw, parser=parser)
return raw
def write(self, path):
for name in self.dirtied:
data = self.cache[name]
raw = data
if hasattr(data, 'xpath'):
raw = etree.tostring(data, encoding='utf-8',
xml_declaration=True)
with open(self.name_map[name], 'wb') as f:
f.write(raw)
self.dirtied.clear()
zf = ZipFile(path, 'w')
zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
compression=ZIP_STORED)
zf.add_dir(self.root)
zf.close()

View File

@ -0,0 +1,82 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
from calibre.utils.date import parse_date, strptime
class Epubcheck(ePubFixer):
name = 'Workaround epubcheck bugs'
@property
def short_description(self):
return _('Workaround epubcheck bugs')
@property
def long_description(self):
return _('Workarounds for bugs in the latest release of epubcheck. '
'epubcheck reports many things as errors that are not '
'actually errors. epub-fix will try to detect these and replace '
'them with constructs that epubcheck likes. This may cause '
'significant changes to your epub, complain to the epubcheck '
'project.')
@property
def fix_name(self):
return 'epubcheck'
def fix_pubdates(self):
dirtied = False
opf = self.container.opf
for dcdate in opf.xpath('//dc:date',
namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
raw = dcdate.text
if not raw: raw = ''
default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
try:
ts = parse_date(raw, assume_utc=False, as_utc=True,
default=default)
except:
raise InvalidEpub('Invalid date set in OPF', raw)
sval = ts.strftime('%Y-%m-%d')
if sval != raw:
self.log.error(
'OPF contains date', raw, 'that epubcheck does not like')
if self.fix:
dcdate.text = sval
self.log('\tReplaced', raw, 'with', sval)
dirtied = True
if dirtied:
self.container.set(self.container.opf_name, opf)
def fix_preserve_aspect_ratio(self):
for name in self.container.name_map:
mt = self.container.mime_map.get(name, '')
if mt.lower() == 'application/xhtml+xml':
root = self.container.get(name)
dirtied = False
for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
namespaces={'svg':'http://www.w3.org/2000/svg'}):
self.log.error('Found <svg> element with'
' preserveAspectRatio="none" which epubcheck '
'cannot handle')
if self.fix:
svg.set('preserveAspectRatio', 'xMidYMid meet')
dirtied = True
self.log('\tReplaced none with xMidYMid meet')
if dirtied:
self.container.set(name, root)
def run(self, container, opts, log, fix=False):
self.container = container
self.opts = opts
self.log = log
self.fix = fix
self.fix_pubdates()
self.fix_preserve_aspect_ratio()

View File

@ -0,0 +1,56 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os
from calibre.utils.config import OptionParser
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from calibre.utils.zipfile import ZipFile
from calibre.utils.logging import default_log
from calibre.customize.ui import epub_fixers
from calibre.ebooks.epub.fix.container import Container
def option_parser():
parser = OptionParser(usage=_(
'%prog [options] file.epub\n\n'
'Fix common problems in EPUB files that can cause them '
'to be rejected by poorly designed publishing services.\n\n'
'By default, no fixing is done and messages are printed out '
'for each error detected. Use the options to control which errors '
'are automatically fixed.'))
for fixer in epub_fixers():
fixer.add_options_to_parser(parser)
return parser
def run(epub, opts, log):
with TemporaryDirectory('_epub-fix') as tdir:
with CurrentDir(tdir):
zf = ZipFile(epub)
zf.extractall()
zf.close()
container = Container(tdir, log)
for fixer in epub_fixers():
fix = getattr(opts, fixer.fix_name, False)
fixer.run(container, opts, log, fix=fix)
container.write(epub)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
default_log.error(_('You must specify an epub file'))
return
epub = os.path.abspath(args[1])
run(epub, opts, default_log)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.epub.fix import ePubFixer
class Unmanifested(ePubFixer):
name = 'Fix unmanifested files'
@property
def short_description(self):
return _('Fix unmanifested files')
@property
def long_description(self):
return _('Fix unmanifested files. epub-fix can either add them to '
'the manifest or delete them as specified by the '
'delete unmanifested option.')
@property
def fix_name(self):
return 'unmanifested'
@property
def options(self):
return [('delete_unmanifested', 'bool', False,
_('Delete unmanifested files instead of adding them to the manifest'))]
def run(self, container, opts, log, fix=False):
dirtied = False
for name in list(container.manifest_worthy_names()):
item = container.manifest_item_for_name(name)
if item is None:
log.error(name, 'not in manifest')
if fix:
if opts.delete_unmanifested:
container.delete_name(name)
log('\tDeleted')
else:
container.add_name_to_manifest(name)
log('\tAdded to manifest')
dirtied = True
if dirtied:
container.set(container.opf_name, container.opf)

View File

@ -380,10 +380,9 @@ class EPUBOutput(OutputFormatPlugin):
sel = '.'+lb.get('class') sel = '.'+lb.get('class')
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
if sel == rule.selectorList.selectorText: if sel == rule.selectorList.selectorText:
val = rule.style.removeProperty('margin-left') rule.style.removeProperty('margin-left')
pval = rule.style.getProperty('padding-left') # padding-left breaks rendering in webkit and gecko
if val and not pval: rule.style.removeProperty('padding-left')
rule.style.setProperty('padding-left', val)
# }}} # }}}

View File

@ -20,7 +20,7 @@ from itertools import izip
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isfreebsd from calibre.constants import islinux, isfreebsd, iswindows
from calibre import unicode_path from calibre import unicode_path
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
@ -32,9 +32,14 @@ class Link(object):
@classmethod @classmethod
def url_to_local_path(cls, url, base): def url_to_local_path(cls, url, base):
path = urlunparse(('', '', url.path, url.params, url.query, '')) path = url.path
isabs = False
if iswindows and path.startswith('/'):
path = path[1:]
isabs = True
path = urlunparse(('', '', path, url.params, url.query, ''))
path = unquote(path) path = unquote(path)
if os.path.isabs(path): if isabs or os.path.isabs(path):
return path return path
return os.path.abspath(os.path.join(base, path)) return os.path.abspath(os.path.join(base, path))
@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
xpath xpath
from calibre import guess_type from calibre import guess_type
import cssutils import cssutils
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self, oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False) encoding=opts.input_encoding, populate=False)
self.oeb = oeb self.oeb = oeb
@ -323,7 +329,7 @@ class HTMLInput(InputFormatPlugin):
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
if not metadata.language: if not metadata.language:
oeb.logger.warn(u'Language not specified') oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang()) metadata.add('language', get_lang().replace('_', '-'))
if not metadata.creator: if not metadata.creator:
oeb.logger.warn('Creator not specified') oeb.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown'))) metadata.add('creator', self.oeb.translate(__('Unknown')))
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
rewrite_links(item.data, partial(self.resource_adder, base=dpath)) rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item.media_type in OEB_STYLES: if item.media_type in self.OEB_STYLES:
dpath = None dpath = None
for path, href in self.added_resources.items(): for path, href in self.added_resources.items():
if href == item.href: if href == item.href:
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
oeb.container = DirContainer(os.getcwdu(), oeb.log) oeb.container = DirContainer(os.getcwdu(), oeb.log)
return oeb return oeb
def link_to_local_path(self, link_, base=None):
if not isinstance(link_, unicode):
try:
link_ = link_.decode('utf-8', 'error')
except:
self.log.warn('Failed to decode link %r. Ignoring'%link_)
return None, None
try:
l = Link(link_, base if base else os.getcwdu())
except:
self.log.exception('Failed to process link: %r'%link_)
return None, None
if l.path is None:
# Not a local resource
return None, None
link = l.path.replace('/', os.sep).strip()
frag = l.fragment
if not link:
return None, None
return link, frag
def resource_adder(self, link_, base=None): def resource_adder(self, link_, base=None):
link = self.urlnormalize(link_) link, frag = self.link_to_local_path(link_, base=base)
link, frag = self.urldefrag(link) if link is None:
link = unquote(link).replace('/', os.sep)
if not link.strip():
return link_ return link_
try: try:
if base and not os.path.isabs(link): if base and not os.path.isabs(link):
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
item = self.oeb.manifest.add(id, href, media_type) item = self.oeb.manifest.add(id, href, media_type)
item.html_input_href = bhref item.html_input_href = bhref
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
item.data item.data
self.added_resources[link] = href self.added_resources[link] = href
@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
nlink = '#'.join((nlink, frag)) nlink = '#'.join((nlink, frag))
return nlink return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return (None, None)
try:
raw = open(link, 'rb').read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return (None, None)
return (None, raw)

View File

@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
xml = d.to_xml(write_files=True) xml = d.to_xml(write_files=True)
if options.verbose > 2: if options.verbose > 2:
open('lrs.xml', 'wb').write(xml.encode('utf-8')) open('lrs.xml', 'wb').write(xml.encode('utf-8'))
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(no_network=True, huge_tree=True)
doc = etree.fromstring(xml, parser=parser) doc = etree.fromstring(xml, parser=parser)
char_button_map = {} char_button_map = {}
for x in doc.xpath('//CharButton[@refobj]'): for x in doc.xpath('//CharButton[@refobj]'):

View File

@ -870,7 +870,7 @@ class Text(LRFStream):
open_containers = collections.deque() open_containers = collections.deque()
for c in self.content: for c in self.content:
if isinstance(c, basestring): if isinstance(c, basestring):
s += prepare_string_for_xml(c) s += prepare_string_for_xml(c).replace('\0', '')
elif c is None: elif c is None:
if open_containers: if open_containers:
p = open_containers.pop() p = open_containers.pop()

View File

@ -268,7 +268,7 @@ class MetaInformation(object):
): ):
prints(x, getattr(self, x, 'None')) prints(x, getattr(self, x, 'None'))
def smart_update(self, mi): def smart_update(self, mi, replace_tags=False):
''' '''
Merge the information in C{mi} into self. In case of conflicts, the information Merge the information in C{mi} into self. In case of conflicts, the information
in C{mi} takes precedence, unless the information in mi is NULL. in C{mi} takes precedence, unless the information in mi is NULL.
@ -282,7 +282,7 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'category', for attr in ('author_sort', 'title_sort', 'category',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer', 'cover', 'guide', 'book_producer',
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
'publication_type', 'uuid'): 'publication_type', 'uuid'):
if hasattr(mi, attr): if hasattr(mi, attr):
@ -291,7 +291,10 @@ class MetaInformation(object):
setattr(self, attr, val) setattr(self, attr, val)
if mi.tags: if mi.tags:
self.tags += mi.tags if replace_tags:
self.tags = mi.tags
else:
self.tags += mi.tags
self.tags = list(set(self.tags)) self.tags = list(set(self.tags))
if mi.author_sort_map: if mi.author_sort_map:
@ -314,6 +317,11 @@ class MetaInformation(object):
if len(other_comments.strip()) > len(my_comments.strip()): if len(other_comments.strip()) > len(my_comments.strip()):
self.comments = other_comments self.comments = other_comments
other_lang = getattr(mi, 'language', None)
if other_lang and other_lang.lower() != 'und':
self.language = other_lang
def format_series_index(self): def format_series_index(self):
try: try:
x = float(self.series_index) x = float(self.series_index)

View File

@ -0,0 +1,15 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize import Plugin
class CoverDownload(Plugin):
supported_platforms = ['windows', 'osx', 'linux']
author = 'Kovid Goyal'
type = _('Cover download')

View File

@ -15,7 +15,6 @@ from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.fetch import MetadataSource from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.date import parse_date, utcnow from calibre.utils.date import parse_date, utcnow
DOUBAN_API_KEY = None
NAMESPACES = { NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom', 'atom' : 'http://www.w3.org/2005/Atom',
@ -35,13 +34,15 @@ date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']") creator = XPath("descendant::db:attribute[@name='author']")
tag = XPath("descendant::db:tag") tag = XPath("descendant::db:tag")
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
class DoubanBooks(MetadataSource): class DoubanBooks(MetadataSource):
name = 'Douban Books' name = 'Douban Books'
description = _('Downloads metadata from Douban.com') description = _('Downloads metadata from Douban.com')
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
version = (1, 0, 0) # The version number of this plugin version = (1, 0, 1) # The version number of this plugin
def fetch(self): def fetch(self):
try: try:
@ -65,7 +66,7 @@ class Query(object):
type = "search" type = "search"
def __init__(self, title=None, author=None, publisher=None, isbn=None, def __init__(self, title=None, author=None, publisher=None, isbn=None,
max_results=20, start_index=1): max_results=20, start_index=1, api_key=''):
assert not(title is None and author is None and publisher is None and \ assert not(title is None and author is None and publisher is None and \
isbn is None) isbn is None)
assert (int(max_results) < 21) assert (int(max_results) < 21)
@ -89,16 +90,16 @@ class Query(object):
if self.type == "isbn": if self.type == "isbn":
self.url = self.ISBN_URL + q self.url = self.ISBN_URL + q
if DOUBAN_API_KEY is not None: if api_key != '':
self.url = self.url + "?apikey=" + DOUBAN_API_KEY self.url = self.url + "?apikey=" + api_key
else: else:
self.url = self.SEARCH_URL+urlencode({ self.url = self.SEARCH_URL+urlencode({
'q':q, 'q':q,
'max-results':max_results, 'max-results':max_results,
'start-index':start_index, 'start-index':start_index,
}) })
if DOUBAN_API_KEY is not None: if api_key != '':
self.url = self.url + "&apikey=" + DOUBAN_API_KEY self.url = self.url + "&apikey=" + api_key
def __call__(self, browser, verbose): def __call__(self, browser, verbose):
if verbose: if verbose:
@ -177,7 +178,7 @@ class ResultList(list):
d = None d = None
return d return d
def populate(self, entries, browser, verbose=False): def populate(self, entries, browser, verbose=False, api_key=''):
for x in entries: for x in entries:
try: try:
id_url = entry_id(x)[0].text id_url = entry_id(x)[0].text
@ -186,8 +187,8 @@ class ResultList(list):
report(verbose) report(verbose)
mi = MetaInformation(title, self.get_authors(x)) mi = MetaInformation(title, self.get_authors(x))
try: try:
if DOUBAN_API_KEY is not None: if api_key != '':
id_url = id_url + "?apikey=" + DOUBAN_API_KEY id_url = id_url + "?apikey=" + api_key
raw = browser.open(id_url).read() raw = browser.open(id_url).read()
feed = etree.fromstring(raw) feed = etree.fromstring(raw)
x = entry(feed)[0] x = entry(feed)[0]
@ -203,12 +204,16 @@ class ResultList(list):
self.append(mi) self.append(mi)
def search(title=None, author=None, publisher=None, isbn=None, def search(title=None, author=None, publisher=None, isbn=None,
verbose=False, max_results=40): verbose=False, max_results=40, api_key=None):
br = browser() br = browser()
start, entries = 1, [] start, entries = 1, []
if api_key is None:
api_key = CALIBRE_DOUBAN_API_KEY
while start > 0 and len(entries) <= max_results: while start > 0 and len(entries) <= max_results:
new, start = Query(title=title, author=author, publisher=publisher, new, start = Query(title=title, author=author, publisher=publisher,
isbn=isbn, max_results=max_results, start_index=start)(br, verbose) isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
if not new: if not new:
break break
entries.extend(new) entries.extend(new)
@ -216,7 +221,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
entries = entries[:max_results] entries = entries[:max_results]
ans = ResultList() ans = ResultList()
ans.populate(entries, br, verbose) ans.populate(entries, br, verbose, api_key)
return ans return ans
def option_parser(): def option_parser():

View File

@ -10,10 +10,11 @@ from calibre import prints
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.customize import Plugin from calibre.customize import Plugin
from calibre.ebooks.metadata.library_thing import check_for_cover
metadata_config = None metadata_config = None
class MetadataSource(Plugin): class MetadataSource(Plugin): # {{{
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -130,7 +131,9 @@ class MetadataSource(Plugin):
def customization_help(self): def customization_help(self):
return 'This plugin can only be customized using the GUI' return 'This plugin can only be customized using the GUI'
class GoogleBooks(MetadataSource): # }}}
class GoogleBooks(MetadataSource): # {{{
name = 'Google Books' name = 'Google Books'
description = _('Downloads metadata from Google Books') description = _('Downloads metadata from Google Books')
@ -145,8 +148,9 @@ class GoogleBooks(MetadataSource):
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
# }}}
class ISBNDB(MetadataSource): class ISBNDB(MetadataSource): # {{{
name = 'IsbnDB' name = 'IsbnDB'
description = _('Downloads metadata from isbndb.com') description = _('Downloads metadata from isbndb.com')
@ -181,7 +185,9 @@ class ISBNDB(MetadataSource):
'and enter your access key below.') 'and enter your access key below.')
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>') return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
class Amazon(MetadataSource): # }}}
class Amazon(MetadataSource): # {{{
name = 'Amazon' name = 'Amazon'
metadata_type = 'social' metadata_type = 'social'
@ -198,37 +204,27 @@ class Amazon(MetadataSource):
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
class LibraryThing(MetadataSource): # }}}
class LibraryThing(MetadataSource): # {{{
name = 'LibraryThing' name = 'LibraryThing'
metadata_type = 'social' metadata_type = 'social'
description = _('Downloads series information from librarything.com') description = _('Downloads series/tags/rating information from librarything.com')
def fetch(self): def fetch(self):
if not self.isbn: if not self.isbn:
return return
from calibre import browser from calibre.ebooks.metadata.library_thing import get_social_metadata
from calibre.ebooks.metadata import MetaInformation
import json
br = browser()
try: try:
raw = br.open( self.results = get_social_metadata(self.title, self.book_author,
'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn self.publisher, self.isbn)
).read()
data = json.loads(raw)
if not data:
return
if 'error' in data:
raise Exception(data['error'])
if 'series' in data and 'series_index' in data:
mi = MetaInformation(self.title, [])
mi.series = data['series']
mi.series_index = data['series_index']
self.results = mi
except Exception, e: except Exception, e:
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
# }}}
def result_index(source, result): def result_index(source, result):
if not result.isbn: if not result.isbn:
@ -268,6 +264,26 @@ class MetadataSources(object):
for s in self.sources: for s in self.sources:
s.join() s.join()
def filter_metadata_results(item):
keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
for keyword in keywords:
if item.publisher and keyword in item.publisher.lower():
return False
return True
def do_cover_check(item):
item.has_cover = False
if item.isbn:
try:
item.has_cover = check_for_cover(item.isbn)
except:
pass # Cover not found
def check_for_covers(items):
threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
for t in threads: t.start()
for t in threads: t.join()
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None, def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
verbose=0): verbose=0):
assert not(title is None and author is None and publisher is None and \ assert not(title is None and author is None and publisher is None and \
@ -285,10 +301,73 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
for fetcher in fetchers[1:]: for fetcher in fetchers[1:]:
merge_results(results, fetcher.results) merge_results(results, fetcher.results)
results = sorted(results, cmp=lambda x, y : cmp( results = list(filter(filter_metadata_results, results))
(x.comments.strip() if x.comments else ''),
(y.comments.strip() if y.comments else '') check_for_covers(results)
), reverse=True)
words = ("the", "a", "an", "of", "and")
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
trailing_paren_pat = re.compile(r'\(.*\)$')
whitespace_pat = re.compile(r'\s+')
def sort_func(x, y):
def cleanup_title(s):
if s is None:
s = _('Unknown')
s = s.strip().lower()
s = prefix_pat.sub(' ', s)
s = trailing_paren_pat.sub('', s)
s = whitespace_pat.sub(' ', s)
return s.strip()
t = cleanup_title(title)
x_title = cleanup_title(x.title)
y_title = cleanup_title(y.title)
# prefer titles that start with the search title
tx = cmp(t, x_title)
ty = cmp(t, y_title)
result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
# then prefer titles that have a cover image
if result == 0:
result = -cmp(x.has_cover, y.has_cover)
# then prefer titles with the longest comment, with in 10%
if result == 0:
cx = len(x.comments.strip() if x.comments else '')
cy = len(y.comments.strip() if y.comments else '')
t = (cx + cy) / 20
result = cy - cx
if abs(result) < t:
result = 0
return result
results = sorted(results, cmp=sort_func)
# if for some reason there is no comment in the top selection, go looking for one
if len(results) > 1:
if not results[0].comments or len(results[0].comments) == 0:
for r in results[1:]:
try:
if title and title.lower() == r.title[:len(title)].lower() \
and r.comments and len(r.comments):
results[0].comments = r.comments
break
except:
pass
# Find a pubdate
pubdate = None
for r in results:
if r.pubdate is not None:
pubdate = r.pubdate
break
if pubdate is not None:
for r in results:
if r.pubdate is None:
r.pubdate = pubdate
return results, [(x.name, x.exception, x.tb) for x in fetchers] return results, [(x.name, x.exception, x.tb) for x in fetchers]

View File

@ -34,7 +34,8 @@ def fetch_metadata(url, max=100, timeout=5.):
errmsg = soup.find('errormessage').string errmsg = soup.find('errormessage').string
raise ISBNDBError('Error fetching metadata: '+errmsg) raise ISBNDBError('Error fetching metadata: '+errmsg)
total_results = int(book_list['total_results']) total_results = int(book_list['total_results'])
np = '&page_number=%s&'%(page_number+1) page_number += 1
np = '&page_number=%s&'%page_number
url = re.sub(r'\&page_number=\d+\&', np, url) url = re.sub(r'\&page_number=\d+\&', np, url)
books.extend(book_list.findAll('bookdata')) books.extend(book_list.findAll('bookdata'))
max -= 1 max -= 1

Some files were not shown because too many files have changed in this diff Show More