Mergefrom trunk

This commit is contained in:
Charles Haley 2010-09-23 07:40:19 +01:00
commit 5192438503
43 changed files with 1664 additions and 172 deletions

688
imgsrc/trim.svg Normal file
View File

@ -0,0 +1,688 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="128"
height="128"
id="svg1307"
sodipodi:version="0.32"
inkscape:version="0.46+devel"
version="1.0"
sodipodi:docname="transform-crop.svgz"
inkscape:export-filename="/home/pinheiro/pics/oxygen-icons/scalable/actions/transform-crop.png"
inkscape:export-xdpi="90"
inkscape:export-ydpi="90"
inkscape:output_extension="org.inkscape.output.svgz.inkscape">
<defs
id="defs1309">
<linearGradient
inkscape:collect="always"
id="linearGradient2594">
<stop
style="stop-color:#fafafa;stop-opacity:1;"
offset="0"
id="stop2596" />
<stop
style="stop-color:#fafafa;stop-opacity:0;"
offset="1"
id="stop2598" />
</linearGradient>
<linearGradient
inkscape:collect="always"
id="linearGradient3969">
<stop
style="stop-color:#000000;stop-opacity:1;"
offset="0"
id="stop3971" />
<stop
style="stop-color:#000000;stop-opacity:0;"
offset="1"
id="stop3973" />
</linearGradient>
<linearGradient
id="linearGradient2783">
<stop
style="stop-color:#323232;stop-opacity:1;"
offset="0"
id="stop2785" />
<stop
id="stop2787"
offset="0.07692308"
style="stop-color:#dfe1e1;stop-opacity:1;" />
<stop
style="stop-color:#b6b1b1;stop-opacity:1;"
offset="0.26289096"
id="stop2799" />
<stop
id="stop2789"
offset="0.5"
style="stop-color:#8d8282;stop-opacity:1;" />
<stop
style="stop-color:#ffffff;stop-opacity:1;"
offset="0.78201604"
id="stop2791" />
<stop
style="stop-color:#dfd9df;stop-opacity:1;"
offset="0.9005897"
id="stop2793" />
<stop
style="stop-color:#3a3a3a;stop-opacity:1;"
offset="1"
id="stop2795" />
</linearGradient>
<linearGradient
id="linearGradient2222"
inkscape:collect="always">
<stop
id="stop2224"
offset="0"
style="stop-color:#0066ff;stop-opacity:1" />
<stop
id="stop2226"
offset="1"
style="stop-color:#80b3ff;stop-opacity:1" />
</linearGradient>
<linearGradient
id="linearGradient3314"
inkscape:collect="always">
<stop
id="stop3316"
offset="0"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop3318"
offset="1"
style="stop-color:#ffffff;stop-opacity:0;" />
</linearGradient>
<linearGradient
id="linearGradient2431">
<stop
style="stop-color:#ffffff;stop-opacity:1;"
offset="0"
id="stop2433" />
<stop
id="stop2435"
offset="0.42597079"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop2437"
offset="0.5892781"
style="stop-color:#f1f1f1;stop-opacity:1;" />
<stop
style="stop-color:#eaeaea;stop-opacity:1;"
offset="0.80219781"
id="stop2439" />
<stop
style="stop-color:#dfdfdf;stop-opacity:1;"
offset="1"
id="stop2441" />
</linearGradient>
<linearGradient
id="linearGradient7422">
<stop
style="stop-color:#b4b4b6;stop-opacity:1;"
offset="0"
id="stop7424" />
<stop
id="stop5348"
offset="0.5"
style="stop-color:#9c9ca1;stop-opacity:1;" />
<stop
id="stop7426"
offset="1"
style="stop-color:#cdcdd1;stop-opacity:1;" />
</linearGradient>
<linearGradient
id="linearGradient3310"
inkscape:collect="always">
<stop
id="stop3312"
offset="0"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop3314"
offset="1"
style="stop-color:#ffffff;stop-opacity:0;" />
</linearGradient>
<filter
inkscape:collect="always"
x="-0.21138181"
width="1.4227636"
y="-0.21047288"
height="1.4209458"
id="filter9723">
<feGaussianBlur
inkscape:collect="always"
stdDeviation="1.4336041"
id="feGaussianBlur9725" />
</filter>
<clipPath
clipPathUnits="userSpaceOnUse"
id="clipPath10698">
<path
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.80000001;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
d="M -128.2008,-3.392377 L -104.45558,6.3360672 L -102.43766,6.1757677 L -103.81912,-4.5678172 L -105.75454,-5.8316609 L -124.96922,-4.4459394 L -128.2008,-3.392377 z "
id="path10700"
sodipodi:nodetypes="ccccccc" />
</clipPath>
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient2783"
id="radialGradient3418"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.9728905,-8.15107,-18.526373,-2.211261,1957.2342,725.31677)"
cx="53.235302"
cy="106.0573"
fx="53.235302"
fy="106.0573"
r="9.1025209" />
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient2594"
id="radialGradient3420"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.5808473,-2.8009276,-6.4965168,-1.3472267,701.00301,348.75795)"
cx="53.347126"
cy="104.68401"
fx="53.347126"
fy="104.68401"
r="9.1025209" />
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient3314"
id="radialGradient3422"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(-2.9339535,-1.0170467,-1.1904108,3.4340702,323.071,-252.78281)"
cx="49.110855"
cy="105.43803"
fx="49.110855"
fy="105.43803"
r="10.20672" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2783"
id="linearGradient3425"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(2.2608955,0,0,1.9345479,-550.58555,-317.90247)"
x1="190.03462"
y1="90.22673"
x2="208.7153"
y2="90.22673" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3969"
id="linearGradient3430"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(2.2608955,0,0,1.9345479,-497.11778,-432.24104)"
x1="98.411324"
y1="185.68851"
x2="166.32983"
y2="155.59846" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient7422"
id="linearGradient3525"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(6.0715756e-2,0,0,9.7589526e-2,24.201706,-45.627655)"
x1="399.77466"
y1="1164.6696"
x2="399.77466"
y2="549.06134" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2431"
id="linearGradient3527"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.5415355,0,0,0.7222225,23.477667,-8.2222193)"
x1="119.57646"
y1="23.792561"
x2="15.999996"
y2="109.6508" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3310"
id="linearGradient3529"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0,-1.5975038,-2,0,96,199.26848)"
x1="102.31124"
y1="-5.8302126"
x2="74.330322"
y2="32" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2222"
id="linearGradient3538"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.7476489,0,0,0.7476489,0,-19.999999)"
x1="8.2386189"
y1="-13.864992"
x2="8.2386189"
y2="-1.4047648" />
<filter
inkscape:collect="always"
id="filter4420">
<feGaussianBlur
inkscape:collect="always"
stdDeviation="3.0486726"
id="feGaussianBlur4422" />
</filter>
<mask
maskUnits="userSpaceOnUse"
id="mask3562">
<rect
ry="1.4444447"
rx="1.1997639"
y="8"
x="-4.0000005"
height="116.00001"
width="124"
id="rect3564"
style="fill:#ffffff;fill-opacity:1;stroke:none;filter:url(#filter4420)"
transform="matrix(1.1453342,0,0,1.1453342,15.087799,-38.432604)" />
</mask>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="2.2136483"
inkscape:cx="77.317692"
inkscape:cy="55.850409"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:document-units="px"
inkscape:grid-bbox="true"
guidetolerance="4"
showguides="true"
inkscape:guide-bbox="true"
inkscape:window-width="1440"
inkscape:window-height="840"
inkscape:window-x="223"
inkscape:window-y="37"
objecttolerance="4"
gridtolerance="4">
<sodipodi:guide
orientation="horizontal"
position="-32.073749"
id="guide2204" />
<inkscape:grid
id="GridFromPre046Settings"
type="xygrid"
originx="0px"
originy="0px"
spacingx="4px"
spacingy="4px"
color="#0000ff"
empcolor="#0000ff"
opacity="0.2"
empopacity="0.4"
empspacing="4"
visible="true"
enabled="true" />
</sodipodi:namedview>
<metadata
id="metadata1312">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<cc:license
rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" />
<dc:contributor>
<cc:Agent>
<dc:title>Oxygen team</dc:title>
</cc:Agent>
</dc:contributor>
<dc:title></dc:title>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/LGPL/2.1/">
<cc:permits
rdf:resource="http://web.resource.org/cc/Reproduction" />
<cc:permits
rdf:resource="http://web.resource.org/cc/Distribution" />
<cc:requires
rdf:resource="http://web.resource.org/cc/Notice" />
<cc:permits
rdf:resource="http://web.resource.org/cc/DerivativeWorks" />
<cc:requires
rdf:resource="http://web.resource.org/cc/ShareAlike" />
<cc:requires
rdf:resource="http://web.resource.org/cc/SourceCode" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Layer 1"
inkscape:groupmode="layer">
<rect
ry="0.1870501"
rx="0.1537565"
y="28.129654"
x="8"
height="92"
width="92"
id="rect3226"
style="fill:#618fd2;fill-opacity:0.09195401;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" />
<g
id="g3520"
transform="translate(32,-0.1296539)">
<rect
inkscape:export-ydpi="90"
inkscape:export-xdpi="90"
inkscape:export-filename="/home/pinheiro/Desktop/mock2.png"
style="opacity:0.75;fill:url(#linearGradient3525);fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect3281"
width="92"
height="92"
x="28.129654"
y="-24"
inkscape:r_cx="true"
inkscape:r_cy="true"
ry="3.9616783"
rx="3.9616783"
transform="matrix(0,1,1,0,0,0)" />
<rect
ry="1.4444447"
rx="1.1997639"
y="-20"
x="32.129654"
height="84"
width="84"
id="rect3283"
style="fill:url(#linearGradient3527);fill-opacity:1;fill-rule:evenodd;stroke:none"
transform="matrix(0,1,1,0,0,0)" />
<path
id="path3285"
d="M 64,53.096891 C 45.143834,70.163928 24.748768,86.162699 -2.0000002e-07,96.129654 L -2.0000002e-07,52.647595 C 23.693959,50.212248 45.09831,42.609775 64,32.129654 L 64,53.096891 z"
style="fill:url(#linearGradient3529);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
</g>
<g
transform="translate(-16,20.129654)"
style="fill:#7193c6;fill-opacity:1"
id="g2250">
<rect
ry="1.3512546"
rx="0.077153668"
y="-116"
x="16"
height="4"
width="4"
id="rect3210"
style="opacity:1;fill:#7193c6;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
transform="matrix(0,1,-1,0,0,0)"
inkscape:tile-w="8"
inkscape:tile-h="8"
inkscape:tile-cx="124"
inkscape:tile-cy="28" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,8)"
id="use2236"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,16)"
id="use2240"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,24)"
id="use2244"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,32)"
id="use2248"
width="128"
height="128" />
<use
height="88"
width="88"
transform="translate(0,24)"
id="use3220"
xlink:href="#use2240"
y="0"
x="0" />
<use
height="88"
width="88"
transform="translate(0,24)"
id="use3222"
xlink:href="#use2244"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2230"
xlink:href="#use2244"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2232"
xlink:href="#use2248"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2234"
xlink:href="#use3220"
y="0"
x="0" />
</g>
<use
height="128"
width="128"
transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129659,128.12964)"
id="use2258"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(-88,0)"
id="use2314"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129651,216.12964)"
id="use2316"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(96,0.1296547)"
id="use3300"
xlink:href="#rect3222"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(7.4990672e-6,96.129662)"
id="use3302"
xlink:href="#rect3222"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(96,96.129652)"
id="use3304"
xlink:href="#rect3222"
y="0"
x="0" />
<rect
ry="0.18696606"
rx="0.15479258"
y="-32"
x="0"
height="12"
width="12"
id="rect3222"
style="fill:url(#linearGradient3538);fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
transform="scale(1,-1)" />
<rect
transform="scale(1,-1)"
style="fill:#bfd9ff;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
id="rect2225"
width="4"
height="4"
x="4"
y="-28"
rx="0.15479258"
ry="0.18696606" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(96,0.1296539)"
id="use3226"
xlink:href="#rect2225"
y="0"
x="0" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(7.5e-6,96.129661)"
id="use3228"
xlink:href="#rect2225"
y="0"
x="0" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(96,96.129654)"
id="use3230"
xlink:href="#rect2225"
y="0"
x="0" />
<rect
style="opacity:0.57786889;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.63199997;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
id="rect1327"
width="1"
height="0"
x="15.057414"
y="-308.20486" />
<g
id="g3407"
transform="matrix(0.8731076,0,0,0.8731076,-13.173272,33.555799)"
mask="url(#mask3562)">
<path
sodipodi:nodetypes="ccccccc"
id="path3836"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
style="fill:url(#radialGradient3418);fill-opacity:1;fill-rule:nonzero;stroke:none" />
<path
style="fill:#555753;fill-opacity:1;fill-rule:nonzero;stroke:none"
d="m 107.32508,50.938663 -74.427424,35.613119 -3.008197,6.986785 76.368201,-35.710168 3.7845,-5.046004 -2.71708,-1.843732 z"
id="path8241"
sodipodi:nodetypes="cccccc" />
<path
style="opacity:0.10688836;fill:url(#radialGradient3420);fill-opacity:1;fill-rule:nonzero;stroke:none"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
id="path11683"
sodipodi:nodetypes="ccccccc" />
<path
sodipodi:nodetypes="ccccccc"
id="path17921"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
style="fill:none;stroke:url(#radialGradient3422);stroke-width:0.86455041;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:4" />
<rect
style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect8239"
width="39.714981"
height="37.454777"
x="27.310663"
y="81.415123"
transform="matrix(0.6571695,-0.7537428,0.7537428,0.6571695,0,0)"
rx="3.8771732"
ry="3.8771732" />
<rect
transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)"
style="fill:url(#linearGradient3425);fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect2803"
width="40.499767"
height="122.13765"
x="-120.93575"
y="-157.97318"
rx="0"
ry="0" />
<rect
transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)"
y="-161.84383"
x="-119.89533"
height="126.00658"
width="39.223213"
id="rect3967"
style="fill:url(#linearGradient3430);fill-opacity:1;fill-rule:nonzero;stroke:none" />
<rect
transform="matrix(-0.6438304,0.7651682,0.7651682,0.6438304,0,0)"
y="80.243172"
x="-155.77248"
height="40.591759"
width="100.57008"
id="rect1851"
style="opacity:0.52459011;fill:#e0e0e0;fill-opacity:1;fill-rule:nonzero;stroke:none" />
<rect
ry="1.2485937"
rx="1.2485937"
transform="matrix(2.0406638,-2.3405465,2.3405465,2.0406638,304.62828,-199.57966)"
y="-5.487061"
x="-104.11894"
height="12.061829"
width="12.789698"
id="rect8248"
style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none;filter:url(#filter9723)"
clip-path="url(#clipPath10698)" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 646 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 323 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 634 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1017 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 722 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 722 B

BIN
resources/images/trim.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

@ -49,7 +49,11 @@ class Danas(BasicNewsRecipe):
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
preprocess_regexps = [
(re.compile(u'\u0110'), lambda match: u'\u00D0')
,(re.compile(u'\u201c'), lambda match: '"')
,(re.compile(u'\u201e'), lambda match: '"')
]
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
remove_tags = [

View File

@ -0,0 +1,104 @@
#!/usr/bin/env python
# # Przed uzyciem przeczytaj komentarz w sekcji "feeds"
__license__ = 'GPL v3'
__copyright__ = u'2010, Richard z forum.eksiazki.org'
'''pomorska.pl'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPomorska(BasicNewsRecipe):
title = u'Gazeta Pomorska'
publisher = u'Gazeta Pomorska'
description = u'Kujawy i Pomorze - wiadomo\u015bci'
language = 'pl'
__author__ = u'Richard z forum.eksiazki.org'
# # (dziekuje t3d z forum.eksiazki.org za testy)
oldest_article = 2
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
preprocess_regexps = [
(re.compile(r'<a href="http://maps.google[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'[<Bb >]*Poznaj opinie[^<]*[</Bb >]*[^<]*<a href[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'[<Bb >]*Przeczytaj[^<]*[</Bb >]*[^<]*<a href[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'[<Bb >]*Wi.cej informacji[^<]*[</Bb >]*[^<]*<a href[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'<a href[^>]*>[<Bb >]*Wideo[^<]*[</Bb >]*[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'<a href[^>]*>[<Bb >]*KLIKNIJ TUTAJ[^<]*[</Bb >]*[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: '')
]
feeds = [
# # Tutaj jest wymieniona lista kategorii jakie mozemy otrzymywac z Gazety
# # Pomorskiej, po jednej kategorii w wierszu. Jesli na poczatku danego wiersza
# # znajduje sie jeden znak "#", oznacza to ze kategoria jest zakomentowana
# # i nie bedziemy jej otrzymywac. Jesli chcemy ja otrzymywac nalezy usunac
# # znak # z jej wiersza.
# # Jesli subskrybujemy wiecej niz jedna kategorie, na koncu wiersza z kazda
# # kategoria musi sie znajdowac niezakomentowany przecinek, z wyjatkiem
# # ostatniego wiersza - ma byc bez przecinka na koncu.
# # Rekomendowane opcje wyboru kategorii:
# # 1. PomorskaRSS - wiadomosci kazdego typu, lub
# # 2. Region + wybrane miasta, lub
# # 3. Wiadomosci tematyczne.
# # Lista kategorii:
# # PomorskaRSS - wiadomosci kazdego typu, zakomentuj znakiem "#"
# # przed odkomentowaniem wiadomosci wybranego typu:
(u'PomorskaRSS', u'http://www.pomorska.pl/rss.xml')
# # wiadomosci z regionu nie przypisane do okreslonego miasta:
# (u'Region', u'http://www.pomorska.pl/region.xml'),
# # wiadomosci przypisane do miast:
# (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'),
# (u'Nak\u0142o', u'http://www.pomorska.pl/naklo.xml'),
# (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'),
# (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'),
# (u'Grudzi\u0105dz', u'http://www.pomorska.pl/grudziadz.xml'),
# (u'Inowroc\u0142aw', u'http://www.pomorska.pl/inowroclaw.xml'),
# (u'Toru\u0144', u'http://www.pomorska.pl/torun.xml'),
# (u'W\u0142oc\u0142awek', u'http://www.pomorska.pl/wloclawek.xml'),
# (u'Aleksandr\u00f3w Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'),
# (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'),
# (u'Che\u0142mno', u'http://www.pomorska.pl/chelmno.xml'),
# (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'),
# (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'),
# (u'Golub Dobrzy\u0144', u'http://www.pomorska.pl/golubdobrzyn.xml'),
# (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'),
# (u'Radziej\u00f3w', u'http://www.pomorska.pl/radziejow.xml'),
# (u'Rypin', u'http://www.pomorska.pl/rypin.xml'),
# (u'S\u0119p\u00f3lno', u'http://www.pomorska.pl/sepolno.xml'),
# (u'\u015awiecie', u'http://www.pomorska.pl/swiecie.xml'),
# (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'),
# (u'\u017bnin', u'http://www.pomorska.pl/znin.xml')
# # wiadomosci tematyczne (redundancja z region/miasta):
# (u'Sport', u'http://www.pomorska.pl/sport.xml'),
# (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'),
# (u'Auto', u'http://www.pomorska.pl/moto.xml'),
# (u'Dom', u'http://www.pomorska.pl/dom.xml'),
# (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'),
# (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')
]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
remove_tags = [
dict(name='p', attrs={'id':'articleTags'}),
dict(name='div', attrs={'id':'articleEpaper'}),
dict(name='div', attrs={'id':'articleConnections'}),
dict(name='div', attrs={'class':'articleFacts'}),
dict(name='div', attrs={'id':'articleExternalLink'}),
dict(name='div', attrs={'id':'articleMultimedia'}),
dict(name='div', attrs={'id':'articleGalleries'}),
dict(name='div', attrs={'id':'articleAlarm'}),
dict(name='div', attrs={'id':'adholder_srodek1'}),
dict(name='div', attrs={'id':'articleVideo'}),
dict(name='a', attrs={'name':'fb_share'})]
extra_css = '''h1 { font-size: 1.4em; }
h2 { font-size: 1.0em; }'''

View File

@ -0,0 +1,43 @@
__author__ = ' (lrfurtado@yahoo.com.br)'
from calibre.web.feeds.news import BasicNewsRecipe
class LeJournalDeMontrealRecipe(BasicNewsRecipe):
title = u'Le Journal de Montreal'
description = u'Le Journal de Montreal'
__author__ = 'Luciano Furtado'
language = 'fr'
oldest_article = 7
use_embedded_content=0
max_articles_per_feed = 15
remove_tags = [
dict(name='ul',attrs={'id':'mainNav'}),
dict(name='div',attrs={'id':'boxPolitique'}),
dict(name='div',attrs={'id':'boxScoop'}),
dict(name='div',attrs={'id':'DossierSpec'}),
dict(name='div',attrs={'id':'channelBoxes'}),
dict(name='div',attrs={'id':'sectionBoxes'}),
dict(name='div',attrs={'id':'header'}),
dict(name='div',attrs={'id':'footer'}),
dict(name='div',attrs={'id':'navbarCanoe_container'}),
dict(name='div',attrs={'id':'popularCanoe'}),
dict(name='div',attrs={'id':'textAds'}),
dict(name='div',attrs={'id':'24heures'}),
dict(name='div',attrs={'class':'bottomBox clear'}),
dict(name='div',attrs={'class':'articleControls thin'}),
]
feeds = [
(u'Actualites',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_actualites.xml'),
(u'Arts et spectacle',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_arts.xml'),
(u'Sports',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_sports.xml'),
(u'Chroniques',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_chroniques.xml'),
]

View File

@ -1,35 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
nczas.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
#
class NCzas(BasicNewsRecipe):
title = u'Najwy\u017cszy Czas!'
description = u'Najwy\u017cszy Czas!\nwydanie internetowe'
__author__ = u'Tomasz D\u0142ugosz'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://nczas.com/wp-content/themes/default/grafika/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':'trescartykulu'})]
feeds = [(u'Najwy\u017cszy Czas!', u'http://nczas.com/feed/')]
def postprocess_html(self, soup, first):
for tag in soup.findAll(name= 'img', alt=""):
tag.extract()
for item in soup.findAll(align = "right"):
del item['align']
return soup

View File

@ -21,8 +21,8 @@ class Novosti(BasicNewsRecipe):
encoding = 'utf-8'
language = 'sr'
publication_type = 'newspaper'
extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.author{font-size: small}
.articleLead{font-size: large; font-weight: bold}
"""
@ -47,6 +47,8 @@ class Novosti(BasicNewsRecipe):
item.name='p'
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,46 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rmf24.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RMF24_ESKN(BasicNewsRecipe):
title = u'Rmf24.pl - Ekonomia Sport Kultura Nauka'
description = u'Ekonomia, sport, kultura i nauka ze strony rmf24.pl'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True
remove_javascript = True
feeds = [(u'Ekonomia', u'http://www.rmf24.pl/ekonomia/feed'),
(u'Sport', u'http://www.rmf24.pl/sport/feed'),
(u'Kultura', u'http://www.rmf24.pl/kultura/feed'),
(u'Nauka', u'http://www.rmf24.pl/nauka/feed')]
keep_only_tags = [dict(name='div', attrs={'class':'box articleSingle print'})]
remove_tags = [
dict(name='div', attrs={'class':'toTop'}),
dict(name='div', attrs={'class':'category'}),
dict(name='div', attrs={'class':'REMOVE'}),
dict(name='div', attrs={'class':'embed embedAd'})]
extra_css = '''
h1 { font-size: 1.2em; }
'''
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
]
]

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rmf24.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RMF24(BasicNewsRecipe):
title = u'Rmf24.pl - Fakty'
description = u'Fakty ze strony rmf24.pl'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True
remove_javascript = True
feeds = [(u'Kraj', u'http://www.rmf24.pl/fakty/polska/feed'),
(u'\u015awiat', u'http://www.rmf24.pl/fakty/swiat/feed')]
keep_only_tags = [dict(name='div', attrs={'class':'box articleSingle print'})]
remove_tags = [
dict(name='div', attrs={'id':'adBox625'}),
dict(name='div', attrs={'class':'toTop'}),
dict(name='div', attrs={'class':'category'}),
dict(name='div', attrs={'class':'REMOVE'}),
dict(name='div', attrs={'class':'embed embedAd'})]
extra_css = '''
h1 { font-size: 1.2em; }
'''
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
]
]

View File

@ -0,0 +1,79 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Luciano Furtado <lrfurtado at yahoo.com.br>'
'''
www.superesportes.com.br
'''
from calibre.web.feeds.news import BasicNewsRecipe
class SuperEsportesRecipe(BasicNewsRecipe):
title = u'www.superesportes.com.br'
description = u'Superesportes - Notícias do esporte no Brasil e no mundo'
__author__ = 'Luciano Furtado'
language = 'pt'
category = 'esportes, Brasil'
no_stylesheets = True
oldest_article = 7
use_embedded_content=0
max_articles_per_feed = 10
cover_url = 'http://imgs.mg.superesportes.com.br/superesportes_logo.png'
extra_css = 'div.info_noticias h1 { font-size: 100% }'
remove_tags = [
dict(name='div',attrs={'class':'topo'}),
dict(name='div',attrs={'class':'rodape'}),
dict(name='div',attrs={'class':'navegacao'}),
dict(name='div',attrs={'class':'lateral2'}),
dict(name='div',attrs={'class':'leia_mais'}),
dict(name='div',attrs={'id':'comentar'}),
dict(name='div',attrs={'id':'vrumelc_noticia'}),
dict(name='div',attrs={'class':'compartilhe'}),
dict(name='div',attrs={'class':'linha_noticias'}),
dict(name='div',attrs={'class':'botoes_noticias'}),
dict(name='div',attrs={'class':'barra_time bg_time'}),
]
def parse_index(self):
feeds = []
sections = [
(u'Atletico', 'http://www.df.superesportes.com.br/futebol/atletico-mg/capa_atletico_mg/index.shtml'),
(u'Botafogo', 'http://www.df.superesportes.com.br/futebol/botafogo/capa_botafogo/index.shtml'),
(u'Corinthinas', 'http://www.df.superesportes.com.br/futebol/corinthians/capa_corinthians/index.shtml'),
(u'Cruzeiro', 'http://www.df.superesportes.com.br/futebol/cruzeiro/capa_cruzeiro/index.shtml'),
(u'Flamengo', 'http://www.df.superesportes.com.br/futebol/flamengo/capa_flamengo/index.shtml'),
(u'Fluminense', 'http://www.df.superesportes.com.br/futebol/fluminense/capa_fluminense/index.shtml'),
(u'Palmeiras', 'http://www.df.superesportes.com.br/futebol/palmeiras/capa_palmeiras/index.shtml'),
(u'Santos', 'http://www.df.superesportes.com.br/futebol/santos/capa_santos/index.shtml'),
(u'S√£o Paulo', 'http://www.df.superesportes.com.br/futebol/sao-paulo/capa_sao_paulo/index.shtml'),
(u'Vasco', 'http://www.df.superesportes.com.br/futebol/vasco/capa_vasco/index.shtml'),
]
for section, url in sections:
current_articles = []
soup = self.index_to_soup(url)
latestNews = soup.find(name='ul',attrs={'class': 'lista_ultimas_noticias'})
for li_tag in latestNews.findAll(name='li'):
a_tag = li_tag.find('a', href= True)
if a_tag is None:
continue
title = self.tag_to_string(a_tag)
url = a_tag.get('href', False)
self.log("\n\nFound title: " + title + "\nUrl: " + url + "\nSection: " + section)
current_articles.append({'title': title, 'url': url, 'description': title, 'date':''})
if current_articles:
feeds.append((section, current_articles))
return feeds

View File

@ -0,0 +1,45 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1284927619(BasicNewsRecipe):
title = u'Tagesanzeiger'
publisher = u'Tamedia AG'
oldest_article = 2
__author__ = 'noxxx'
max_articles_per_feed = 100
description = 'tagesanzeiger.ch: Nichts verpassen'
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
language = 'de'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
remove_tags = [
dict(name='img')
,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']})
,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']})
,dict(name='form',attrs={'id':['articleMailForm','commentform']})
,dict(name='div',attrs={'style':['position:absolute']})
,dict(name='script',attrs={'type':['text/javascript']})
,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']})
]
feeds = [
(u'Front', u'http://www.tagesanzeiger.ch/rss.html')
,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html')
,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html')
,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html')
,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html')
,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html')
,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html')
,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html')
,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html')
,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html')
,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html')
,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')]
def print_version(self, url):
return url + '/print.html'

View File

@ -0,0 +1,52 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'TheMarker Financial News in Hebrew'
__author__ = 'TonyTheBookworm, Marbs'
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
title = u'TheMarker'
language = 'he'
simultaneous_downloads = 5
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
max_articles_per_feed = 10
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
def print_version(self, url):
split1 = url.split("=")
weblinks = url
if weblinks is not None:
for link in weblinks:
#---------------------------------------------------------
#here we need some help with some regexpressions
#we are trying to find it.themarker.com in a url
#-----------------------------------------------------------
re1='.*?' # Non-greedy match on filler
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
m = rg.search(url)
if m:
split2 = url.split("article/")
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
else:
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
return print_url

View File

@ -70,13 +70,16 @@ class WallStreetJournal(BasicNewsRecipe):
def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title)
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
try:
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
except:
articles = []
if articles:
feeds.append((title, articles))
return feeds
return feeds
def parse_index(self):
soup = self.wsj_get_index()
@ -99,7 +102,7 @@ class WallStreetJournal(BasicNewsRecipe):
url = 'http://online.wsj.com' + a['href']
feeds = self.wsj_add_feed(feeds,title,url)
title = 'What''s News'
url = url.replace('pageone','whatsnews')
url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url)
else:
title = self.tag_to_string(a)
@ -141,7 +144,7 @@ class WallStreetJournal(BasicNewsRecipe):
articles = []
flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x})
if flavorarea is not None:
if flavorarea is not None:
flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article'))
if flavorstory is not None:
flavorstory['class'] = 'mjLinkItem'

View File

@ -54,10 +54,13 @@ class WallStreetJournal(BasicNewsRecipe):
def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title)
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
try:
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
except:
articles = []
if articles:
feeds.append((title, articles))
return feeds

View File

@ -666,13 +666,17 @@ class ActionCopyToLibrary(InterfaceActionBase):
name = 'Copy To Library'
actual_plugin = 'calibre.gui2.actions.copy_to_library:CopyToLibraryAction'
class ActionTweakEpub(InterfaceActionBase):
name = 'Tweak ePub'
actual_plugin = 'calibre.gui2.actions.tweak_epub:TweakEpubAction'
plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
ActionRestart, ActionOpenFolder, ActionConnectShare,
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
ActionCopyToLibrary]
ActionCopyToLibrary, ActionTweakEpub]
# }}}

View File

@ -29,7 +29,9 @@ class ANDROID(USBMS):
# Sony Ericsson
0xfce : { 0xd12e : [0x0100]},
0x18d1 : { 0x4e11 : [0x0100, 0x226], 0x4e12: [0x0100, 0x226]},
# Google
0x18d1 : { 0x4e11 : [0x0100, 0x226, 0x227], 0x4e12: [0x0100, 0x226,
0x227]},
# Samsung
0x04e8 : { 0x681d : [0x0222, 0x0400],

View File

@ -739,7 +739,7 @@ class ITUNES(DriverBase):
# Purge the booklist, self.cached_books, thumb cache
for i,bl_book in enumerate(booklists[0]):
if DEBUG:
if False:
self.log.info(" evaluating '%s' by '%s' uuid:%s" %
(bl_book.title, bl_book.author,bl_book.uuid))

View File

@ -11,6 +11,10 @@ import re
from calibre.devices.usbms.driver import USBMS
def is_alex(device_info):
return device_info[3] == u'Linux 2.6.28 with pxa3xx_u2d' and \
device_info[4] == u'Seleucia Disk'
class N516(USBMS):
name = 'N516 driver'
@ -34,6 +38,9 @@ class N516(USBMS):
EBOOK_DIR_MAIN = 'e_book'
SUPPORTS_SUB_DIRS = True
def can_handle(self, device_info, debug=False):
return not is_alex(device_info)
class THEBOOK(N516):
name = 'The Book driver'
gui_name = 'The Book'
@ -61,6 +68,9 @@ class ALEX(N516):
EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = True
def can_handle(self, device_info, debug=False):
return is_alex(device_info)
class AZBOOKA(ALEX):
name = 'Azbooka driver'
@ -74,6 +84,9 @@ class AZBOOKA(ALEX):
EBOOK_DIR_MAIN = ''
def can_handle(self, device_info, debug=False):
return not is_alex(device_info)
class EB511(USBMS):
name = 'Elonex EB 511 driver'

View File

@ -41,6 +41,10 @@ class Book(MetaInformation):
self.authors = ['']
else:
self.authors = [authors]
if not title:
self.title = _('Unknown')
self.mime = mime
self.size = size # will be set later if None

View File

@ -98,6 +98,8 @@ class KOBO(USBMS):
if readstatus == 1:
playlist_map[lpath]= "Im_Reading"
elif readstatus == 2:
playlist_map[lpath]= "Read"
path = self.normalize_path(path)
# print "Normalized FileName: " + path
@ -441,43 +443,99 @@ class KOBO(USBMS):
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
cursor = connection.cursor()
# Reset Im_Reading list in the database
if oncard == 'carda':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
elif oncard != 'carda' and oncard != 'cardb':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
if collections:
# Process any collections that exist
for category, books in collections.items():
if category == 'Im_Reading':
# Reset Im_Reading list in the database
if oncard == 'carda':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
elif oncard != 'carda' and oncard != 'cardb':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
try:
cursor.execute (query)
except:
debug_print('Database Exception: Unable to reset Im_Reading list')
raise
else:
# debug_print('Commit: Reset Im_Reading list')
connection.commit()
for category, books in collections.items():
if category == 'Im_Reading':
for book in books:
# debug_print('Title:', book.title, 'lpath:', book.path)
book.device_collections = ['Im_Reading']
extension = os.path.splitext(book.path)[1]
ContentType = self.get_content_type_from_extension(extension)
ContentID = self.contentid_from_path(book.path, ContentType)
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
t = (datelastread,ContentID,)
try:
cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
cursor.execute (query)
except:
debug_print('Database Exception: Unable create Im_Reading list')
debug_print('Database Exception: Unable to reset Im_Reading list')
raise
else:
# debug_print('Commit: Reset Im_Reading list')
connection.commit()
# debug_print('Database: Commit create Im_Reading list')
for book in books:
# debug_print('Title:', book.title, 'lpath:', book.path)
book.device_collections = ['Im_Reading']
extension = os.path.splitext(book.path)[1]
ContentType = self.get_content_type_from_extension(extension)
ContentID = self.contentid_from_path(book.path, ContentType)
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
t = (datelastread,ContentID,)
try:
cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
except:
debug_print('Database Exception: Unable create Im_Reading list')
raise
else:
connection.commit()
# debug_print('Database: Commit create Im_Reading list')
if category == 'Read':
# Reset Im_Reading list in the database
if oncard == 'carda':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID like \'file:///mnt/sd/%\''
elif oncard != 'carda' and oncard != 'cardb':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID not like \'file:///mnt/sd/%\''
try:
cursor.execute (query)
except:
debug_print('Database Exception: Unable to reset Im_Reading list')
raise
else:
# debug_print('Commit: Reset Im_Reading list')
connection.commit()
for book in books:
# debug_print('Title:', book.title, 'lpath:', book.path)
book.device_collections = ['Read']
extension = os.path.splitext(book.path)[1]
ContentType = self.get_content_type_from_extension(extension)
ContentID = self.contentid_from_path(book.path, ContentType)
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
t = (ContentID,)
try:
cursor.execute('update content set ReadStatus=2,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
except:
debug_print('Database Exception: Unable set book as Rinished')
raise
else:
connection.commit()
# debug_print('Database: Commit set ReadStatus as Finished')
else: # No collections
# Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
print "Reseting ReadStatus to 0"
# Reset Im_Reading list in the database
if oncard == 'carda':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
elif oncard != 'carda' and oncard != 'cardb':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
try:
cursor.execute (query)
except:
debug_print('Database Exception: Unable to reset Im_Reading list')
raise
else:
# debug_print('Commit: Reset Im_Reading list')
connection.commit()
cursor.close()
connection.close()

View File

@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter',
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '

View File

@ -106,6 +106,52 @@ def line_length(format, raw, percent):
return lengths[index]
class Dehyphenator(object):
'''
Analyzes words to determine whether hyphens should be retained/removed. Uses the document
itself is as a dictionary. This method handles all languages along with uncommon, made-up, and
scientific words. The primary disadvantage is that words appearing only once in the document
retain hyphens.
'''
def __init__(self):
# Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex'
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation
self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
def dehyphenate(self, match):
firsthalf = match.group('firstpart')
secondhalf = match.group('secondpart')
hyphenated = str(firsthalf) + "-" + str(secondhalf)
dehyphenated = str(firsthalf) + str(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
match = booklookup.search(self.html)
if match:
#print "returned dehyphenated word: " + str(dehyphenated)
return dehyphenated
else:
#print "returned hyphenated word: " + str(hyphenated)
return hyphenated
def __call__(self, html, format, length=1):
self.html = html
if format == 'html':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
elif format == 'pdf':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(<p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
elif format == 'individual_words':
intextmatch = re.compile('>[^<]*\b(?P<firstpart>[^"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
html = intextmatch.sub(self.dehyphenate, html)
return html
class CSSPreProcessor(object):
@ -328,11 +374,10 @@ class HTMLPreProcessor(object):
print 'Failed to parse remove_footer regexp'
traceback.print_exc()
# unwrap hyphenation - moved here so it's executed after header/footer removal
# unwrap em/en dashes, delete soft hyphens - moved here so it's executed after header/footer removal
if is_pdftohtml:
# unwrap visible dashes and hyphens - don't delete they are often hyphens for
# for compound words, formatting, etc
end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap em/en dashes
end_rules.append((re.compile(u'(?<=[–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens
end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting
@ -350,7 +395,7 @@ class HTMLPreProcessor(object):
# print "The pdf line length returned is " + str(length)
end_rules.append(
# Un wrap using punctuation
(re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
(re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
)
for rule in self.PREPROCESS + start_rules:
@ -380,6 +425,11 @@ class HTMLPreProcessor(object):
for rule in rules + end_rules:
html = rule[0].sub(rule[1], html)
if is_pdftohtml:
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'pdf', length)
#dump(html, 'post-preprocess')
# Handle broken XHTML w/ SVG (ugh)

View File

@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.ebooks.conversion.preprocess import line_length
from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator
from calibre.utils.logging import default_log
class PreProcessor(object):
@ -114,7 +114,7 @@ class PreProcessor(object):
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Get rid of empty span, bold, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<[ibu]>\s*(<[ibu]>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
@ -132,7 +132,6 @@ class PreProcessor(object):
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = re.sub(r"\s*</p>", "</p>\n", html)
html = re.sub(r"\s*<p>\s*", "\n<p>", html)
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
# detect chapters/sections to match xpath or splitting logic
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
@ -140,16 +139,16 @@ class PreProcessor(object):
#
# Start with most typical chapter headings, get more aggressive until one works
if self.html_preprocess_sections < 10:
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
html = chapdetect.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
###### Unwrap lines ######
@ -174,10 +173,16 @@ class PreProcessor(object):
length = line_length(format, html, getattr(self.extra_opts,
'html_unwrap_factor', 0.4))
self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
max_length = length * 1.4
min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})")
#
# Unwrap and/or delete soft-hyphens, hyphens
# Unwrap em/en dashes, delete soft-hyphens
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html)
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html', length)
# Unwrap lines using punctation and line length
unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
@ -186,7 +191,7 @@ class PreProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < 10:
self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html)
# search for places where a first or second level heading is immediately followed by another
# top level heading. demote the second heading to h3 to prevent splitting between chapter

View File

@ -1574,14 +1574,15 @@ class MobiWriter(object):
id = unicode(oeb.metadata.cover[0])
item = oeb.manifest.ids[id]
href = item.href
index = self._images[href] - 1
exth.write(pack('>III', 0xc9, 0x0c, index))
exth.write(pack('>III', 0xcb, 0x0c, 0))
nrecs += 2
index = self._add_thumbnail(item)
if index is not None:
exth.write(pack('>III', 0xca, 0x0c, index - 1))
nrecs += 1
if href in self._images:
index = self._images[href] - 1
exth.write(pack('>III', 0xc9, 0x0c, index))
exth.write(pack('>III', 0xcb, 0x0c, 0))
nrecs += 2
index = self._add_thumbnail(item)
if index is not None:
exth.write(pack('>III', 0xca, 0x0c, index - 1))
nrecs += 1
exth = exth.getvalue()
trail = len(exth) % 4

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" The GUI """
import os, sys
import os, sys, Queue
from threading import RLock
from PyQt4.Qt import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, \
@ -39,7 +39,7 @@ gprefs.defaults['action-layout-context-menu'] = (
'Edit Metadata', 'Send To Device', 'Save To Disk',
'Connect Share', 'Copy To Library', None,
'Convert Books', 'View', 'Open Folder', 'Show Book Details',
'Similar Books', None, 'Remove Books',
'Similar Books', 'Tweak ePub', None, 'Remove Books',
)
gprefs.defaults['action-layout-context-menu-device'] = (
@ -296,6 +296,34 @@ class Dispatcher(QObject):
def dispatch(self, args, kwargs):
self.func(*args, **kwargs)
class FunctionDispatcher(QObject):
'''
Convenience class to use Qt signals with arbitrary python functions.
By default, ensures that a function call always happens in the
thread this Dispatcher was created in.
'''
dispatch_signal = pyqtSignal(object, object, object)
def __init__(self, func, queued=True, parent=None):
QObject.__init__(self, parent)
self.func = func
typ = Qt.QueuedConnection
if not queued:
typ = Qt.AutoConnection if queued is None else Qt.DirectConnection
self.dispatch_signal.connect(self.dispatch, type=typ)
def __call__(self, *args, **kwargs):
q = Queue.Queue()
self.dispatch_signal.emit(q, args, kwargs)
return q.get()
def dispatch(self, q, args, kwargs):
try:
res = self.func(*args, **kwargs)
except:
res = None
q.put(res)
class GetMetadata(QObject):
'''
Convenience class to ensure that metadata readers are used only in the
@ -575,18 +603,6 @@ class Application(QApplication):
self._file_open_paths = []
self._file_open_lock = RLock()
if islinux:
self.setStyleSheet('''
QToolTip {
border: 2px solid black;
padding: 5px;
border-radius: 10px;
opacity: 200;
background-color: #e1e1ff;
color: black;
}
''')
def _send_file_open_events(self):
with self._file_open_lock:
if self._file_open_paths:

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.gui2 import error_dialog
from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.tweak_epub import TweakEpub
class TweakEpubAction(InterfaceAction):
name = 'Tweak ePub'
action_spec = (_('Tweak ePub'), 'trim.png',
_('Make small changes to ePub format books'),
_('T'))
dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
action_type = 'current'
def genesis(self):
self.qaction.triggered.connect(self.edit_epub_in_situ)
def edit_epub_in_situ(self, *args):
row = self.gui.library_view.currentIndex()
if not row.isValid():
return error_dialog(self.gui, _('Cannot tweak ePub'),
_('No book selected'), show=True)
# Confirm 'EPUB' in formats
book_id = self.gui.library_view.model().id(row)
try:
path_to_epub = self.gui.library_view.model().db.format_abspath(
book_id, 'EPUB', index_is_id=True)
except:
path_to_epub = None
if not path_to_epub:
return error_dialog(self.gui, _('Cannot tweak ePub'),
_('No ePub available. First convert the book to ePub.'),
show=True)
# Launch modal dialog waiting for user to tweak or cancel
dlg = TweakEpub(self.gui, path_to_epub)
if dlg.exec_() == dlg.Accepted:
self.update_db(book_id, dlg._output)
dlg.cleanup()
def update_db(self, book_id, rebuilt):
'''
Update the calibre db with the tweaked epub
'''
self.gui.library_view.model().db.add_format(book_id, 'EPUB',
open(rebuilt, 'rb'), index_is_id=True)

View File

@ -300,6 +300,24 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.cpixmap = pix
self.cover_data = cdata
def trim_cover(self, *args):
from calibre.utils.magick import Image
cdata = self.cover_data
if not cdata:
return
im = Image()
im.load(cdata)
im.trim(10)
cdata = im.export('jpg')
pix = QPixmap()
pix.loadFromData(cdata)
self.cover.setPixmap(pix)
self.cover_changed = True
self.cpixmap = pix
self.cover_data = cdata
def sync_formats(self):
old_extensions, new_extensions, paths = set(), set(), {}
for row in range(self.formats.count()):
@ -380,6 +398,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.remove_unused_series)
QObject.connect(self.auto_author_sort, SIGNAL('clicked()'),
self.deduce_author_sort)
self.trim_cover_button.clicked.connect(self.trim_cover)
self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'),
self.author_sort_box_changed)
self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'),

View File

@ -625,6 +625,17 @@ Using this button to create author sort will change author sort from red to gree
</property>
</widget>
</item>
<item>
<widget class="QToolButton" name="trim_cover_button">
<property name="toolTip">
<string>Remove border (if any) from cover</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/trim.png</normaloff>:/images/trim.png</iconset>
</property>
</widget>
</item>
<item>
<widget class="QToolButton" name="reset_cover">
<property name="toolTip">

View File

@ -0,0 +1,81 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, shutil
from contextlib import closing
from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
from PyQt4.Qt import QDialog
from calibre.gui2 import open_local_file
from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog
from calibre.libunzip import extract as zipextract
from calibre.ptempfile import PersistentTemporaryDirectory
class TweakEpub(QDialog, Ui_Dialog):
'''
Display controls for tweaking ePubs
'''
def __init__(self, parent, epub):
QDialog.__init__(self, parent)
self._epub = epub
self._exploded = None
self._output = None
# Run the dialog setup generated from tweak_epub.ui
self.setupUi(self)
self.cancel_button.clicked.connect(self.reject)
self.explode_button.clicked.connect(self.explode)
self.rebuild_button.clicked.connect(self.rebuild)
# Position update dialog overlaying top left of app window
parent_loc = parent.pos()
self.move(parent_loc.x(),parent_loc.y())
def cleanup(self):
# Delete directory containing exploded ePub
if self._exploded is not None:
shutil.rmtree(self._exploded, ignore_errors=True)
def display_exploded(self):
'''
Generic subprocess launch of native file browser
User can use right-click to 'Open with ...'
'''
open_local_file(self._exploded)
def explode(self, *args):
if self._exploded is None:
self._exploded = PersistentTemporaryDirectory("_exploded", prefix='')
zipextract(self._epub, self._exploded)
self.display_exploded()
self.rebuild_button.setEnabled(True)
self.explode_button.setEnabled(False)
def rebuild(self, *args):
self._output = os.path.join(self._exploded, 'rebuilt.epub')
with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf:
# Write mimetype
zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED)
# Write everything else
exclude_files = ['.DS_Store','mimetype','iTunesMetadata.plist','rebuilt.epub']
for root, dirs, files in os.walk(self._exploded):
for fn in files:
if fn in exclude_files:
continue
absfn = os.path.join(root, fn)
zfn = os.path.relpath(absfn,
self._exploded).replace(os.sep, '/')
zf.write(absfn, zfn)
return QDialog.accept(self)

View File

@ -0,0 +1,87 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="windowModality">
<enum>Qt::NonModal</enum>
</property>
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>382</width>
<height>242</height>
</rect>
</property>
<property name="windowTitle">
<string>Tweak ePub</string>
</property>
<property name="sizeGripEnabled">
<bool>false</bool>
</property>
<property name="modal">
<bool>false</bool>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0">
<widget class="QPushButton" name="explode_button">
<property name="statusTip">
<string>Display contents of exploded ePub</string>
</property>
<property name="text">
<string>&amp;Explode ePub</string>
</property>
<property name="icon">
<iconset>
<normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QPushButton" name="rebuild_button">
<property name="enabled">
<bool>false</bool>
</property>
<property name="statusTip">
<string>Rebuild ePub from exploded contents</string>
</property>
<property name="text">
<string>&amp;Rebuild ePub</string>
</property>
<property name="icon">
<iconset>
<normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QPushButton" name="cancel_button">
<property name="statusTip">
<string>Discard changes</string>
</property>
<property name="text">
<string>&amp;Cancel</string>
</property>
<property name="icon">
<iconset>
<normaloff>:/images/window-close.png</normaloff>:/images/window-close.png</iconset>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window. Rebuild the ePub, updating your calibre library.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections/>
</ui>

View File

@ -12,7 +12,7 @@ from operator import attrgetter
from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
QModelIndex, QVariant, QDate
from calibre.gui2 import NONE, config, UNDEFINED_QDATE
from calibre.gui2 import NONE, config, UNDEFINED_QDATE, FunctionDispatcher
from calibre.utils.pyparsing import ParseException
from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
from calibre.ptempfile import PersistentTemporaryFile
@ -151,7 +151,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.database_changed.emit(db)
if self.cover_cache is not None:
self.cover_cache.stop()
self.cover_cache = CoverCache(db)
self.cover_cache = CoverCache(db, FunctionDispatcher(self.db.cover))
self.cover_cache.start()
def refresh_cover(event, ids):
if event == 'cover' and self.cover_cache is not None:

View File

@ -217,6 +217,10 @@ def fetch_scheduled_recipe(arg):
if 'output_profile' in ps:
recs.append(('output_profile', ps['output_profile'],
OptionRecommendation.HIGH))
if ps['output_profile'] == 'kindle':
recs.append(('no_inline_toc', True,
OptionRecommendation.HIGH))
lf = load_defaults('look_and_feel')
if lf.get('base_font_size', 0.0) != 0.0:
recs.append(('base_font_size', lf['base_font_size'],

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, itertools
import re, itertools, time
from itertools import repeat
from datetime import timedelta
from threading import Thread, RLock
@ -23,10 +23,11 @@ from calibre import fit_image
class CoverCache(Thread):
def __init__(self, db):
def __init__(self, db, cover_func):
Thread.__init__(self)
self.daemon = True
self.db = db
self.cover_func = cover_func
self.load_queue = Queue()
self.keep_running = True
self.cache = {}
@ -37,7 +38,8 @@ class CoverCache(Thread):
self.keep_running = False
def _image_for_id(self, id_):
img = self.db.cover(id_, index_is_id=True, as_image=True)
time.sleep(0.050) # Limit 20/second to not overwhelm the GUI
img = self.cover_func(id_, index_is_id=True, as_image=True)
if img is None:
img = QImage()
if not img.isNull():

View File

@ -402,7 +402,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
path = path.lower()
return path
def set_path(self, index, index_is_id=False, commit=True):
def set_path(self, index, index_is_id=False):
'''
Set the path to the directory containing this books files based on its
current title and author. If there was a previous directory, its contents
@ -432,7 +432,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if current_path and os.path.exists(spath): # Migrate existing files
cdata = self.cover(id, index_is_id=True)
if cdata is not None:
open(os.path.join(tpath, 'cover.jpg'), 'wb').write(cdata)
with open(os.path.join(tpath, 'cover.jpg'), 'wb') as f:
f.write(cdata)
for format in formats:
# Get data as string (can't use file as source and target files may be the same)
f = self.format(id, format, index_is_id=True, as_file=False)
@ -442,8 +443,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.add_format(id, format, stream, index_is_id=True,
path=tpath, notify=False)
self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id))
if commit:
self.conn.commit()
self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True)
# Delete not needed directories
if current_path and os.path.exists(spath):
@ -452,6 +451,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
parent = os.path.dirname(spath)
if len(os.listdir(parent)) == 0:
self.rmtree(parent, permanent=True)
curpath = self.library_path
c1, c2 = current_path.split('/'), path.split('/')
if not self.is_case_sensitive and len(c1) == len(c2):
@ -466,13 +466,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
# the directories, so no need to do them here.
for oldseg, newseg in zip(c1, c2):
if oldseg.lower() == newseg.lower() and oldseg != newseg:
while True:
# need a temp name in the current segment for renames
tempname = os.path.join(curpath, 'TEMP.%f'%time.time())
if not os.path.exists(tempname):
break
os.rename(os.path.join(curpath, oldseg), tempname)
os.rename(tempname, os.path.join(curpath, newseg))
try:
os.rename(os.path.join(curpath, oldseg), os.path.join(curpath, newseg))
except:
break # Fail silently since nothing catastrophic has happened
curpath = os.path.join(curpath, newseg)
def add_listener(self, listener):
@ -1131,7 +1128,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def set_authors(self, id, authors, notify=True, commit=True):
'''
`authors`: A list of authors.
Note that even if commit is False, the db will still be committed to
because this causes the location of files to change
:param authors: A list of authors.
'''
if not authors:
authors = [_('Unknown')]
@ -1163,11 +1163,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
','.join([a.replace(',', '|') for a in authors]),
row_is_id=True)
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
self.set_path(id, index_is_id=True, commit=commit)
self.set_path(id, index_is_id=True)
if notify:
self.notify('metadata', [id])
def set_title(self, id, title, notify=True, commit=True):
'''
Note that even if commit is False, the db will still be committed to
because this causes the location of files to change
'''
if not title:
return
if not isinstance(title, unicode):
@ -1178,7 +1182,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True)
else:
self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True)
self.set_path(id, index_is_id=True, commit=commit)
self.set_path(id, index_is_id=True)
if commit:
self.conn.commit()
if notify:

View File

@ -184,7 +184,7 @@ class ContentServer(object):
if path and os.path.exists(path):
updated = fromtimestamp(os.stat(path).st_mtime)
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
return fmt.read()
return fmt
# }}}

View File

@ -60,15 +60,15 @@ def identify(path):
data = open(path, 'rb').read()
return identify_data(data)
def add_borders_to_image(path_to_image, left=0, top=0, right=0, bottom=0,
border_color='#ffffff'):
def add_borders_to_image(img_data, left=0, top=0, right=0, bottom=0,
border_color='#ffffff', fmt='jpg'):
img = Image()
img.open(path_to_image)
img.load(img_data)
lwidth, lheight = img.size
canvas = create_canvas(lwidth+left+right, lheight+top+bottom,
border_color)
canvas.compose(img, left, top)
canvas.save(path_to_image)
return canvas.export(fmt)
def create_text_wand(font_size, font_path=None):
if font_path is None:

View File

@ -7,7 +7,7 @@ Defines various abstract base classes that can be subclassed to create powerful
__docformat__ = "restructuredtext en"
import os, time, traceback, re, urlparse, sys
import os, time, traceback, re, urlparse, sys, cStringIO
from collections import defaultdict
from functools import partial
from contextlib import nested, closing
@ -27,6 +27,7 @@ from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.date import now as nowf
from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image
class LoginFailed(ValueError):
pass
@ -948,38 +949,36 @@ class BasicNewsRecipe(Recipe):
try:
cu = self.get_cover_url()
except Exception, err:
cu = None
self.log.error(_('Could not download cover: %s')%str(err))
self.log.debug(traceback.format_exc())
if cu is not None:
ext = cu.split('/')[-1].rpartition('.')[-1]
if '?' in ext:
ext = ''
ext = ext.lower() if ext and '/' not in ext else 'jpg'
cpath = os.path.join(self.output_dir, 'cover.'+ext)
else:
cdata = None
if os.access(cu, os.R_OK):
with open(cpath, 'wb') as cfile:
cfile.write(open(cu, 'rb').read())
cdata = open(cu, 'rb').read()
else:
self.report_progress(1, _('Downloading cover from %s')%cu)
with nested(open(cpath, 'wb'), closing(self.browser.open(cu))) as (cfile, r):
cfile.write(r.read())
if self.cover_margins[0] or self.cover_margins[1]:
from calibre.utils.magick.draw import add_borders_to_image
add_borders_to_image(cpath,
left=self.cover_margins[0],right=self.cover_margins[0],
top=self.cover_margins[1],bottom=self.cover_margins[1],
border_color=self.cover_margins[2])
if ext.lower() == 'pdf':
with closing(self.browser.open(cu)) as r:
cdata = r.read()
if not cdata:
return
ext = cu.split('/')[-1].rpartition('.')[-1].lower().strip()
if ext == 'pdf':
from calibre.ebooks.metadata.pdf import get_metadata
stream = open(cpath, 'rb')
stream = cStringIO.StringIO(cdata)
cdata = None
mi = get_metadata(stream)
cpath = None
if mi.cover_data and mi.cover_data[1]:
cpath = os.path.join(self.output_dir,
'cover.'+mi.cover_data[0])
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
cdata = mi.cover_data[1]
if not cdata:
return
if self.cover_margins[0] or self.cover_margins[1]:
cdata = add_borders_to_image(cdata,
left=self.cover_margins[0],right=self.cover_margins[0],
top=self.cover_margins[1],bottom=self.cover_margins[1],
border_color=self.cover_margins[2])
cpath = os.path.join(self.output_dir, 'cover.jpg')
save_cover_data_to(cdata, cpath)
self.cover_path = cpath
def download_cover(self):
@ -1422,7 +1421,6 @@ class CalibrePeriodical(BasicNewsRecipe):
return br
def download(self):
import cStringIO
self.log('Fetching downloaded recipe')
try:
raw = self.browser.open_novisit(