KG tweaks to tweak-epub

This commit is contained in:
GRiker 2010-09-21 04:34:51 -07:00
commit ea3b8fa177
20 changed files with 1049 additions and 178 deletions

688
imgsrc/trim.svg Normal file
View File

@ -0,0 +1,688 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="128"
height="128"
id="svg1307"
sodipodi:version="0.32"
inkscape:version="0.46+devel"
version="1.0"
sodipodi:docname="transform-crop.svgz"
inkscape:export-filename="/home/pinheiro/pics/oxygen-icons/scalable/actions/transform-crop.png"
inkscape:export-xdpi="90"
inkscape:export-ydpi="90"
inkscape:output_extension="org.inkscape.output.svgz.inkscape">
<defs
id="defs1309">
<linearGradient
inkscape:collect="always"
id="linearGradient2594">
<stop
style="stop-color:#fafafa;stop-opacity:1;"
offset="0"
id="stop2596" />
<stop
style="stop-color:#fafafa;stop-opacity:0;"
offset="1"
id="stop2598" />
</linearGradient>
<linearGradient
inkscape:collect="always"
id="linearGradient3969">
<stop
style="stop-color:#000000;stop-opacity:1;"
offset="0"
id="stop3971" />
<stop
style="stop-color:#000000;stop-opacity:0;"
offset="1"
id="stop3973" />
</linearGradient>
<linearGradient
id="linearGradient2783">
<stop
style="stop-color:#323232;stop-opacity:1;"
offset="0"
id="stop2785" />
<stop
id="stop2787"
offset="0.07692308"
style="stop-color:#dfe1e1;stop-opacity:1;" />
<stop
style="stop-color:#b6b1b1;stop-opacity:1;"
offset="0.26289096"
id="stop2799" />
<stop
id="stop2789"
offset="0.5"
style="stop-color:#8d8282;stop-opacity:1;" />
<stop
style="stop-color:#ffffff;stop-opacity:1;"
offset="0.78201604"
id="stop2791" />
<stop
style="stop-color:#dfd9df;stop-opacity:1;"
offset="0.9005897"
id="stop2793" />
<stop
style="stop-color:#3a3a3a;stop-opacity:1;"
offset="1"
id="stop2795" />
</linearGradient>
<linearGradient
id="linearGradient2222"
inkscape:collect="always">
<stop
id="stop2224"
offset="0"
style="stop-color:#0066ff;stop-opacity:1" />
<stop
id="stop2226"
offset="1"
style="stop-color:#80b3ff;stop-opacity:1" />
</linearGradient>
<linearGradient
id="linearGradient3314"
inkscape:collect="always">
<stop
id="stop3316"
offset="0"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop3318"
offset="1"
style="stop-color:#ffffff;stop-opacity:0;" />
</linearGradient>
<linearGradient
id="linearGradient2431">
<stop
style="stop-color:#ffffff;stop-opacity:1;"
offset="0"
id="stop2433" />
<stop
id="stop2435"
offset="0.42597079"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop2437"
offset="0.5892781"
style="stop-color:#f1f1f1;stop-opacity:1;" />
<stop
style="stop-color:#eaeaea;stop-opacity:1;"
offset="0.80219781"
id="stop2439" />
<stop
style="stop-color:#dfdfdf;stop-opacity:1;"
offset="1"
id="stop2441" />
</linearGradient>
<linearGradient
id="linearGradient7422">
<stop
style="stop-color:#b4b4b6;stop-opacity:1;"
offset="0"
id="stop7424" />
<stop
id="stop5348"
offset="0.5"
style="stop-color:#9c9ca1;stop-opacity:1;" />
<stop
id="stop7426"
offset="1"
style="stop-color:#cdcdd1;stop-opacity:1;" />
</linearGradient>
<linearGradient
id="linearGradient3310"
inkscape:collect="always">
<stop
id="stop3312"
offset="0"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop3314"
offset="1"
style="stop-color:#ffffff;stop-opacity:0;" />
</linearGradient>
<filter
inkscape:collect="always"
x="-0.21138181"
width="1.4227636"
y="-0.21047288"
height="1.4209458"
id="filter9723">
<feGaussianBlur
inkscape:collect="always"
stdDeviation="1.4336041"
id="feGaussianBlur9725" />
</filter>
<clipPath
clipPathUnits="userSpaceOnUse"
id="clipPath10698">
<path
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.80000001;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
d="M -128.2008,-3.392377 L -104.45558,6.3360672 L -102.43766,6.1757677 L -103.81912,-4.5678172 L -105.75454,-5.8316609 L -124.96922,-4.4459394 L -128.2008,-3.392377 z "
id="path10700"
sodipodi:nodetypes="ccccccc" />
</clipPath>
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient2783"
id="radialGradient3418"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.9728905,-8.15107,-18.526373,-2.211261,1957.2342,725.31677)"
cx="53.235302"
cy="106.0573"
fx="53.235302"
fy="106.0573"
r="9.1025209" />
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient2594"
id="radialGradient3420"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.5808473,-2.8009276,-6.4965168,-1.3472267,701.00301,348.75795)"
cx="53.347126"
cy="104.68401"
fx="53.347126"
fy="104.68401"
r="9.1025209" />
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient3314"
id="radialGradient3422"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(-2.9339535,-1.0170467,-1.1904108,3.4340702,323.071,-252.78281)"
cx="49.110855"
cy="105.43803"
fx="49.110855"
fy="105.43803"
r="10.20672" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2783"
id="linearGradient3425"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(2.2608955,0,0,1.9345479,-550.58555,-317.90247)"
x1="190.03462"
y1="90.22673"
x2="208.7153"
y2="90.22673" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3969"
id="linearGradient3430"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(2.2608955,0,0,1.9345479,-497.11778,-432.24104)"
x1="98.411324"
y1="185.68851"
x2="166.32983"
y2="155.59846" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient7422"
id="linearGradient3525"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(6.0715756e-2,0,0,9.7589526e-2,24.201706,-45.627655)"
x1="399.77466"
y1="1164.6696"
x2="399.77466"
y2="549.06134" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2431"
id="linearGradient3527"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.5415355,0,0,0.7222225,23.477667,-8.2222193)"
x1="119.57646"
y1="23.792561"
x2="15.999996"
y2="109.6508" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3310"
id="linearGradient3529"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0,-1.5975038,-2,0,96,199.26848)"
x1="102.31124"
y1="-5.8302126"
x2="74.330322"
y2="32" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2222"
id="linearGradient3538"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.7476489,0,0,0.7476489,0,-19.999999)"
x1="8.2386189"
y1="-13.864992"
x2="8.2386189"
y2="-1.4047648" />
<filter
inkscape:collect="always"
id="filter4420">
<feGaussianBlur
inkscape:collect="always"
stdDeviation="3.0486726"
id="feGaussianBlur4422" />
</filter>
<mask
maskUnits="userSpaceOnUse"
id="mask3562">
<rect
ry="1.4444447"
rx="1.1997639"
y="8"
x="-4.0000005"
height="116.00001"
width="124"
id="rect3564"
style="fill:#ffffff;fill-opacity:1;stroke:none;filter:url(#filter4420)"
transform="matrix(1.1453342,0,0,1.1453342,15.087799,-38.432604)" />
</mask>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="2.2136483"
inkscape:cx="77.317692"
inkscape:cy="55.850409"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:document-units="px"
inkscape:grid-bbox="true"
guidetolerance="4"
showguides="true"
inkscape:guide-bbox="true"
inkscape:window-width="1440"
inkscape:window-height="840"
inkscape:window-x="223"
inkscape:window-y="37"
objecttolerance="4"
gridtolerance="4">
<sodipodi:guide
orientation="horizontal"
position="-32.073749"
id="guide2204" />
<inkscape:grid
id="GridFromPre046Settings"
type="xygrid"
originx="0px"
originy="0px"
spacingx="4px"
spacingy="4px"
color="#0000ff"
empcolor="#0000ff"
opacity="0.2"
empopacity="0.4"
empspacing="4"
visible="true"
enabled="true" />
</sodipodi:namedview>
<metadata
id="metadata1312">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<cc:license
rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" />
<dc:contributor>
<cc:Agent>
<dc:title>Oxygen team</dc:title>
</cc:Agent>
</dc:contributor>
<dc:title></dc:title>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/LGPL/2.1/">
<cc:permits
rdf:resource="http://web.resource.org/cc/Reproduction" />
<cc:permits
rdf:resource="http://web.resource.org/cc/Distribution" />
<cc:requires
rdf:resource="http://web.resource.org/cc/Notice" />
<cc:permits
rdf:resource="http://web.resource.org/cc/DerivativeWorks" />
<cc:requires
rdf:resource="http://web.resource.org/cc/ShareAlike" />
<cc:requires
rdf:resource="http://web.resource.org/cc/SourceCode" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Layer 1"
inkscape:groupmode="layer">
<rect
ry="0.1870501"
rx="0.1537565"
y="28.129654"
x="8"
height="92"
width="92"
id="rect3226"
style="fill:#618fd2;fill-opacity:0.09195401;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" />
<g
id="g3520"
transform="translate(32,-0.1296539)">
<rect
inkscape:export-ydpi="90"
inkscape:export-xdpi="90"
inkscape:export-filename="/home/pinheiro/Desktop/mock2.png"
style="opacity:0.75;fill:url(#linearGradient3525);fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect3281"
width="92"
height="92"
x="28.129654"
y="-24"
inkscape:r_cx="true"
inkscape:r_cy="true"
ry="3.9616783"
rx="3.9616783"
transform="matrix(0,1,1,0,0,0)" />
<rect
ry="1.4444447"
rx="1.1997639"
y="-20"
x="32.129654"
height="84"
width="84"
id="rect3283"
style="fill:url(#linearGradient3527);fill-opacity:1;fill-rule:evenodd;stroke:none"
transform="matrix(0,1,1,0,0,0)" />
<path
id="path3285"
d="M 64,53.096891 C 45.143834,70.163928 24.748768,86.162699 -2.0000002e-07,96.129654 L -2.0000002e-07,52.647595 C 23.693959,50.212248 45.09831,42.609775 64,32.129654 L 64,53.096891 z"
style="fill:url(#linearGradient3529);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
</g>
<g
transform="translate(-16,20.129654)"
style="fill:#7193c6;fill-opacity:1"
id="g2250">
<rect
ry="1.3512546"
rx="0.077153668"
y="-116"
x="16"
height="4"
width="4"
id="rect3210"
style="opacity:1;fill:#7193c6;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
transform="matrix(0,1,-1,0,0,0)"
inkscape:tile-w="8"
inkscape:tile-h="8"
inkscape:tile-cx="124"
inkscape:tile-cy="28" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,8)"
id="use2236"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,16)"
id="use2240"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,24)"
id="use2244"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,32)"
id="use2248"
width="128"
height="128" />
<use
height="88"
width="88"
transform="translate(0,24)"
id="use3220"
xlink:href="#use2240"
y="0"
x="0" />
<use
height="88"
width="88"
transform="translate(0,24)"
id="use3222"
xlink:href="#use2244"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2230"
xlink:href="#use2244"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2232"
xlink:href="#use2248"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2234"
xlink:href="#use3220"
y="0"
x="0" />
</g>
<use
height="128"
width="128"
transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129659,128.12964)"
id="use2258"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(-88,0)"
id="use2314"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129651,216.12964)"
id="use2316"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(96,0.1296547)"
id="use3300"
xlink:href="#rect3222"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(7.4990672e-6,96.129662)"
id="use3302"
xlink:href="#rect3222"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(96,96.129652)"
id="use3304"
xlink:href="#rect3222"
y="0"
x="0" />
<rect
ry="0.18696606"
rx="0.15479258"
y="-32"
x="0"
height="12"
width="12"
id="rect3222"
style="fill:url(#linearGradient3538);fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
transform="scale(1,-1)" />
<rect
transform="scale(1,-1)"
style="fill:#bfd9ff;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
id="rect2225"
width="4"
height="4"
x="4"
y="-28"
rx="0.15479258"
ry="0.18696606" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(96,0.1296539)"
id="use3226"
xlink:href="#rect2225"
y="0"
x="0" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(7.5e-6,96.129661)"
id="use3228"
xlink:href="#rect2225"
y="0"
x="0" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(96,96.129654)"
id="use3230"
xlink:href="#rect2225"
y="0"
x="0" />
<rect
style="opacity:0.57786889;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.63199997;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
id="rect1327"
width="1"
height="0"
x="15.057414"
y="-308.20486" />
<g
id="g3407"
transform="matrix(0.8731076,0,0,0.8731076,-13.173272,33.555799)"
mask="url(#mask3562)">
<path
sodipodi:nodetypes="ccccccc"
id="path3836"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
style="fill:url(#radialGradient3418);fill-opacity:1;fill-rule:nonzero;stroke:none" />
<path
style="fill:#555753;fill-opacity:1;fill-rule:nonzero;stroke:none"
d="m 107.32508,50.938663 -74.427424,35.613119 -3.008197,6.986785 76.368201,-35.710168 3.7845,-5.046004 -2.71708,-1.843732 z"
id="path8241"
sodipodi:nodetypes="cccccc" />
<path
style="opacity:0.10688836;fill:url(#radialGradient3420);fill-opacity:1;fill-rule:nonzero;stroke:none"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
id="path11683"
sodipodi:nodetypes="ccccccc" />
<path
sodipodi:nodetypes="ccccccc"
id="path17921"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
style="fill:none;stroke:url(#radialGradient3422);stroke-width:0.86455041;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:4" />
<rect
style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect8239"
width="39.714981"
height="37.454777"
x="27.310663"
y="81.415123"
transform="matrix(0.6571695,-0.7537428,0.7537428,0.6571695,0,0)"
rx="3.8771732"
ry="3.8771732" />
<rect
transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)"
style="fill:url(#linearGradient3425);fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect2803"
width="40.499767"
height="122.13765"
x="-120.93575"
y="-157.97318"
rx="0"
ry="0" />
<rect
transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)"
y="-161.84383"
x="-119.89533"
height="126.00658"
width="39.223213"
id="rect3967"
style="fill:url(#linearGradient3430);fill-opacity:1;fill-rule:nonzero;stroke:none" />
<rect
transform="matrix(-0.6438304,0.7651682,0.7651682,0.6438304,0,0)"
y="80.243172"
x="-155.77248"
height="40.591759"
width="100.57008"
id="rect1851"
style="opacity:0.52459011;fill:#e0e0e0;fill-opacity:1;fill-rule:nonzero;stroke:none" />
<rect
ry="1.2485937"
rx="1.2485937"
transform="matrix(2.0406638,-2.3405465,2.3405465,2.0406638,304.62828,-199.57966)"
y="-5.487061"
x="-104.11894"
height="12.061829"
width="12.789698"
id="rect8248"
style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none;filter:url(#filter9723)"
clip-path="url(#clipPath10698)" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 21 KiB

BIN
resources/images/trim.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -0,0 +1,43 @@
__author__ = ' (lrfurtado@yahoo.com.br)'
from calibre.web.feeds.news import BasicNewsRecipe
class LeJournalDeMontrealRecipe(BasicNewsRecipe):
title = u'Le Journal de Montreal'
description = u'Le Journal de Montreal'
__author__ = 'Luciano Furtado'
language = 'fr'
oldest_article = 7
use_embedded_content=0
max_articles_per_feed = 15
remove_tags = [
dict(name='ul',attrs={'id':'mainNav'}),
dict(name='div',attrs={'id':'boxPolitique'}),
dict(name='div',attrs={'id':'boxScoop'}),
dict(name='div',attrs={'id':'DossierSpec'}),
dict(name='div',attrs={'id':'channelBoxes'}),
dict(name='div',attrs={'id':'sectionBoxes'}),
dict(name='div',attrs={'id':'header'}),
dict(name='div',attrs={'id':'footer'}),
dict(name='div',attrs={'id':'navbarCanoe_container'}),
dict(name='div',attrs={'id':'popularCanoe'}),
dict(name='div',attrs={'id':'textAds'}),
dict(name='div',attrs={'id':'24heures'}),
dict(name='div',attrs={'class':'bottomBox clear'}),
dict(name='div',attrs={'class':'articleControls thin'}),
]
feeds = [
(u'Actualites',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_actualites.xml'),
(u'Arts et spectacle',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_arts.xml'),
(u'Sports',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_sports.xml'),
(u'Chroniques',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_chroniques.xml'),
]

View File

@ -0,0 +1,45 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1284927619(BasicNewsRecipe):
title = u'Tagesanzeiger'
publisher = u'Tamedia AG'
oldest_article = 2
__author__ = 'noxxx'
max_articles_per_feed = 100
description = 'tagesanzeiger.ch: Nichts verpassen'
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
language = 'de'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
remove_tags = [
dict(name='img')
,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']})
,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']})
,dict(name='form',attrs={'id':['articleMailForm','commentform']})
,dict(name='div',attrs={'style':['position:absolute']})
,dict(name='script',attrs={'type':['text/javascript']})
,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']})
]
feeds = [
(u'Front', u'http://www.tagesanzeiger.ch/rss.html')
,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html')
,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html')
,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html')
,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html')
,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html')
,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html')
,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html')
,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html')
,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html')
,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html')
,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')]
def print_version(self, url):
return url + '/print.html'

View File

@ -0,0 +1,52 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'TheMarker Financial News in Hebrew'
__author__ = 'TonyTheBookworm, Marbs'
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
title = u'TheMarker'
language = 'he'
simultaneous_downloads = 5
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
max_articles_per_feed = 10
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
def print_version(self, url):
split1 = url.split("=")
weblinks = url
if weblinks is not None:
for link in weblinks:
#---------------------------------------------------------
#here we need some help with some regexpressions
#we are trying to find it.themarker.com in a url
#-----------------------------------------------------------
re1='.*?' # Non-greedy match on filler
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
m = rg.search(url)
if m:
split2 = url.split("article/")
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
else:
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
return print_url

View File

@ -70,13 +70,16 @@ class WallStreetJournal(BasicNewsRecipe):
def wsj_add_feed(self,feeds,title,url): def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title) self.log('Found section:', title)
if url.endswith('whatsnews'): try:
articles = self.wsj_find_wn_articles(url) if url.endswith('whatsnews'):
else: articles = self.wsj_find_wn_articles(url)
articles = self.wsj_find_articles(url) else:
articles = self.wsj_find_articles(url)
except:
articles = []
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
return feeds return feeds
def parse_index(self): def parse_index(self):
soup = self.wsj_get_index() soup = self.wsj_get_index()
@ -99,7 +102,7 @@ class WallStreetJournal(BasicNewsRecipe):
url = 'http://online.wsj.com' + a['href'] url = 'http://online.wsj.com' + a['href']
feeds = self.wsj_add_feed(feeds,title,url) feeds = self.wsj_add_feed(feeds,title,url)
title = 'What''s News' title = 'What''s News'
url = url.replace('pageone','whatsnews') url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url) feeds = self.wsj_add_feed(feeds,title,url)
else: else:
title = self.tag_to_string(a) title = self.tag_to_string(a)
@ -141,7 +144,7 @@ class WallStreetJournal(BasicNewsRecipe):
articles = [] articles = []
flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x}) flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x})
if flavorarea is not None: if flavorarea is not None:
flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article')) flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article'))
if flavorstory is not None: if flavorstory is not None:
flavorstory['class'] = 'mjLinkItem' flavorstory['class'] = 'mjLinkItem'

View File

@ -54,10 +54,13 @@ class WallStreetJournal(BasicNewsRecipe):
def wsj_add_feed(self,feeds,title,url): def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title) self.log('Found section:', title)
if url.endswith('whatsnews'): try:
articles = self.wsj_find_wn_articles(url) if url.endswith('whatsnews'):
else: articles = self.wsj_find_wn_articles(url)
articles = self.wsj_find_articles(url) else:
articles = self.wsj_find_articles(url)
except:
articles = []
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
return feeds return feeds

View File

@ -443,9 +443,9 @@ class KOBO(USBMS):
# Reset Im_Reading list in the database # Reset Im_Reading list in the database
if oncard == 'carda': if oncard == 'carda':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\'' query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
elif oncard != 'carda' and oncard != 'cardb': elif oncard != 'carda' and oncard != 'cardb':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
try: try:
cursor.execute (query) cursor.execute (query)

View File

@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter',
OptionRecommendation(name='chapter', OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and " recommended_value="//*[((name()='h1' or name()='h2') and "
r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class " r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW, "= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default ' help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words ' 'is to consider <h1> or <h2> tags that contain the words '

View File

@ -106,6 +106,52 @@ def line_length(format, raw, percent):
return lengths[index] return lengths[index]
class Dehyphenator(object):
'''
Analyzes words to determine whether hyphens should be retained/removed. Uses the document
itself is as a dictionary. This method handles all languages along with uncommon, made-up, and
scientific words. The primary disadvantage is that words appearing only once in the document
retain hyphens.
'''
def __init__(self):
# Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex'
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation
self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
def dehyphenate(self, match):
firsthalf = match.group('firstpart')
secondhalf = match.group('secondpart')
hyphenated = str(firsthalf) + "-" + str(secondhalf)
dehyphenated = str(firsthalf) + str(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
match = booklookup.search(self.html)
if match:
#print "returned dehyphenated word: " + str(dehyphenated)
return dehyphenated
else:
#print "returned hyphenated word: " + str(hyphenated)
return hyphenated
def __call__(self, html, format, length=1):
self.html = html
if format == 'html':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
elif format == 'pdf':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(<p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
elif format == 'individual_words':
intextmatch = re.compile('>[^<]*\b(?P<firstpart>[^"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
html = intextmatch.sub(self.dehyphenate, html)
return html
class CSSPreProcessor(object): class CSSPreProcessor(object):
@ -328,11 +374,10 @@ class HTMLPreProcessor(object):
print 'Failed to parse remove_footer regexp' print 'Failed to parse remove_footer regexp'
traceback.print_exc() traceback.print_exc()
# unwrap hyphenation - moved here so it's executed after header/footer removal # unwrap em/en dashes, delete soft hyphens - moved here so it's executed after header/footer removal
if is_pdftohtml: if is_pdftohtml:
# unwrap visible dashes and hyphens - don't delete they are often hyphens for # unwrap em/en dashes
# for compound words, formatting, etc end_rules.append((re.compile(u'(?<=[–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens # unwrap/delete soft hyphens
end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: '')) end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting # unwrap/delete soft hyphens with formatting
@ -350,7 +395,7 @@ class HTMLPreProcessor(object):
# print "The pdf line length returned is " + str(length) # print "The pdf line length returned is " + str(length)
end_rules.append( end_rules.append(
# Un wrap using punctuation # Un wrap using punctuation
(re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), (re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
) )
for rule in self.PREPROCESS + start_rules: for rule in self.PREPROCESS + start_rules:
@ -380,6 +425,11 @@ class HTMLPreProcessor(object):
for rule in rules + end_rules: for rule in rules + end_rules:
html = rule[0].sub(rule[1], html) html = rule[0].sub(rule[1], html)
if is_pdftohtml:
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'pdf', length)
#dump(html, 'post-preprocess') #dump(html, 'post-preprocess')
# Handle broken XHTML w/ SVG (ugh) # Handle broken XHTML w/ SVG (ugh)

View File

@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re
from calibre.ebooks.conversion.preprocess import line_length from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
class PreProcessor(object): class PreProcessor(object):
@ -114,7 +114,7 @@ class PreProcessor(object):
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Get rid of empty span, bold, & italics tags # Get rid of empty span, bold, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<[ibu]>\s*(<[ibu]>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html) html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
@ -132,7 +132,6 @@ class PreProcessor(object):
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = re.sub(r"\s*</p>", "</p>\n", html) html = re.sub(r"\s*</p>", "</p>\n", html)
html = re.sub(r"\s*<p>\s*", "\n<p>", html) html = re.sub(r"\s*<p>\s*", "\n<p>", html)
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
# detect chapters/sections to match xpath or splitting logic # detect chapters/sections to match xpath or splitting logic
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html)) self.html_preprocess_sections = len(heading.findall(html))
@ -140,16 +139,16 @@ class PreProcessor(object):
# #
# Start with most typical chapter headings, get more aggressive until one works # Start with most typical chapter headings, get more aggressive until one works
if self.html_preprocess_sections < 10: if self.html_preprocess_sections < 10:
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
html = chapdetect.sub(self.chapter_head, html) html = chapdetect.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10: if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html) html = chapdetect2.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10: if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html) html = chapdetect2.sub(self.chapter_head, html)
###### Unwrap lines ###### ###### Unwrap lines ######
@ -174,10 +173,16 @@ class PreProcessor(object):
length = line_length(format, html, getattr(self.extra_opts, length = line_length(format, html, getattr(self.extra_opts,
'html_unwrap_factor', 0.4)) 'html_unwrap_factor', 0.4))
self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***") self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
max_length = length * 1.4
min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})")
# #
# Unwrap and/or delete soft-hyphens, hyphens # Unwrap em/en dashes, delete soft-hyphens
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html) html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html)
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html', length)
# Unwrap lines using punctation and line length # Unwrap lines using punctation and line length
unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
@ -186,7 +191,7 @@ class PreProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation # If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < 10: if self.html_preprocess_sections < 10:
self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html) html = chapdetect3.sub(self.chapter_break, html)
# search for places where a first or second level heading is immediately followed by another # search for places where a first or second level heading is immediately followed by another
# top level heading. demote the second heading to h3 to prevent splitting between chapter # top level heading. demote the second heading to h3 to prevent splitting between chapter

View File

@ -5,7 +5,6 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.constants import iswindows, isosx
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.tweak_epub import TweakEpub from calibre.gui2.dialogs.tweak_epub import TweakEpub
@ -13,53 +12,45 @@ from calibre.gui2.dialogs.tweak_epub import TweakEpub
class TweakEpubAction(InterfaceAction): class TweakEpubAction(InterfaceAction):
name = 'Tweak ePub' name = 'Tweak ePub'
action_spec = (_('Tweak ePub'), 'tweak_epub.png', 'Edit ePub in situ', action_spec = (_('Tweak ePub'), 'trim.png',
_('T')) _('Make small changes to ePub format books'),
_('T'))
dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
action_type = 'current' action_type = 'current'
def genesis(self): def genesis(self):
self.qaction.triggered.connect(self._edit_epub_in_situ) self.qaction.triggered.connect(self.edit_epub_in_situ)
def _edit_epub_in_situ(self, *args): def edit_epub_in_situ(self, *args):
row = self.gui.library_view.currentIndex()
# Assure exactly one row selected if not row.isValid():
rows = self.gui.library_view.selectionModel().selectedRows() return error_dialog(self.gui, _('Cannot tweak ePub'),
if not rows or len(rows) == 0: _('No book selected'), show=True)
d = error_dialog(self.gui, _('Cannot tweak ePub'), _('No book selected'))
d.exec_()
return
if len(rows) > 1:
d = error_dialog(self.gui, _('Cannot tweak ePub'), _('Multiple books selected'))
d.exec_()
return
# Confirm 'EPUB' in formats # Confirm 'EPUB' in formats
row = rows[0].row() book_id = self.gui.library_view.model().id(row)
formats = self.gui.library_view.model().db.formats(row).upper().split(',') try:
if not 'EPUB' in formats: path_to_epub = self.gui.library_view.model().db.format_abspath(
d = error_dialog(self.gui, _('Cannot tweak ePub'), _('No EPUB available')) book_id, 'EPUB', index_is_id=True)
d.exec_() except:
return path_to_epub = None
if not path_to_epub:
return error_dialog(self.gui, _('Cannot tweak ePub'),
_('No ePub available. First convert the book to ePub.'),
show=True)
path_to_epub = self.gui.library_view.model().db.format_abspath(row, 'EPUB')
id = self._get_selected_id()
# Launch a modal dialog waiting for user to complete or cancel # Launch a modal dialog waiting for user to complete or cancel
dlg = TweakEpub(self.gui, path_to_epub) dlg = TweakEpub(self.gui, path_to_epub)
if dlg.exec_() == dlg.Accepted: if dlg.exec_() == dlg.Accepted:
self._update_db(id, dlg._output) self.update_db(book_id, dlg._output)
dlg.cleanup() dlg.cleanup()
def _get_selected_id(self): def update_db(self, book_id, rebuilt):
rows = self.gui.library_view.selectionModel().selectedRows()
return map(self.gui.library_view.model().id, rows)[0]
def _update_db(self, id, rebuilt):
''' '''
Update the calibre db with the tweaked epub Update the calibre db with the tweaked epub
''' '''
print "gui2.actions.tweak_epub:TweakEpubAction._update_db()" self.gui.library_view.model().db.add_format(book_id, 'EPUB',
print " updating id %d from %s" % (id, rebuilt) open(rebuilt, 'rb'), index_is_id=True)
self.gui.library_view.model().db.add_format_with_hooks(id, 'EPUB', rebuilt, index_is_id=True)

View File

@ -800,7 +800,7 @@ class DeviceMixin(object): # {{{
# if set_books_in_library did not. # if set_books_in_library did not.
if not self.set_books_in_library(self.booklists(), reset=True): if not self.set_books_in_library(self.booklists(), reset=True):
self.upload_booklists() self.upload_booklists()
self.book_on_device(None, None, reset=True) self.book_on_device(None, reset=True)
# We need to reset the ondevice flags in the library. Use a big hammer, # We need to reset the ondevice flags in the library. Use a big hammer,
# so we don't need to worry about whether some succeeded or not. # so we don't need to worry about whether some succeeded or not.
self.refresh_ondevice_info(device_connected=True, reset_only=False) self.refresh_ondevice_info(device_connected=True, reset_only=False)
@ -1309,7 +1309,7 @@ class DeviceMixin(object): # {{{
for f in files: for f in files:
getattr(f, 'close', lambda : True)() getattr(f, 'close', lambda : True)()
def book_on_device(self, id, format=None, reset=False): def book_on_device(self, id, reset=False):
''' '''
Return an indication of whether the given book represented by its db id Return an indication of whether the given book represented by its db id
is on the currently connected device. It returns a 5 element list. The is on the currently connected device. It returns a 5 element list. The
@ -1338,8 +1338,6 @@ class DeviceMixin(object): # {{{
self.book_db_id_cache.append(set()) self.book_db_id_cache.append(set())
for book in l: for book in l:
db_id = getattr(book, 'application_id', None) db_id = getattr(book, 'application_id', None)
if db_id is None:
db_id = book.db_id
if db_id is not None: if db_id is not None:
# increment the count of books on the device with this # increment the count of books on the device with this
# db_id. # db_id.

View File

@ -300,6 +300,24 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.cpixmap = pix self.cpixmap = pix
self.cover_data = cdata self.cover_data = cdata
def trim_cover(self, *args):
from calibre.utils.magick import Image
cdata = self.cover_data
if not cdata:
return
im = Image()
im.load(cdata)
im.trim(10)
cdata = im.export('jpg')
pix = QPixmap()
pix.loadFromData(cdata)
self.cover.setPixmap(pix)
self.cover_changed = True
self.cpixmap = pix
self.cover_data = cdata
def sync_formats(self): def sync_formats(self):
old_extensions, new_extensions, paths = set(), set(), {} old_extensions, new_extensions, paths = set(), set(), {}
for row in range(self.formats.count()): for row in range(self.formats.count()):
@ -380,6 +398,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.remove_unused_series) self.remove_unused_series)
QObject.connect(self.auto_author_sort, SIGNAL('clicked()'), QObject.connect(self.auto_author_sort, SIGNAL('clicked()'),
self.deduce_author_sort) self.deduce_author_sort)
self.trim_cover_button.clicked.connect(self.trim_cover)
self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'), self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'),
self.author_sort_box_changed) self.author_sort_box_changed)
self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'), self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'),

View File

@ -625,6 +625,17 @@ Using this button to create author sort will change author sort from red to gree
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QToolButton" name="trim_cover_button">
<property name="toolTip">
<string>Remove border (if any) from cover</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/trim.png</normaloff>:/images/trim.png</iconset>
</property>
</widget>
</item>
<item> <item>
<widget class="QToolButton" name="reset_cover"> <widget class="QToolButton" name="reset_cover">
<property name="toolTip"> <property name="toolTip">

View File

@ -6,15 +6,12 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, shutil, subprocess, sys import os, shutil
from contextlib import closing from contextlib import closing
from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
from PyQt4 import QtGui from PyQt4.Qt import QDialog
from PyQt4.Qt import QDialog, SIGNAL
from calibre import prints
from calibre.constants import iswindows, isosx, DEBUG
from calibre.gui2 import open_local_file from calibre.gui2 import open_local_file
from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog
from calibre.libunzip import extract as zipextract from calibre.libunzip import extract as zipextract
@ -26,7 +23,6 @@ class TweakEpub(QDialog, Ui_Dialog):
To do: To do:
- need way to kill file browser proc in cleanup() - need way to kill file browser proc in cleanup()
- linux file browser launch
''' '''
def __init__(self, parent, epub): def __init__(self, parent, epub):
@ -40,36 +36,17 @@ class TweakEpub(QDialog, Ui_Dialog):
# Run the dialog setup generated from tweak_epub.ui # Run the dialog setup generated from tweak_epub.ui
self.setupUi(self) self.setupUi(self)
self.connect(self.cancel_button, self.cancel_button.clicked.connect(self.reject)
SIGNAL("clicked()"), self.explode_button.clicked.connect(self.explode)
self.cancel) self.rebuild_button.clicked.connect(self.rebuild)
self.connect(self.explode_button,
SIGNAL("clicked()"),
self.explode)
self.connect(self.rebuild_button,
SIGNAL("clicked()"),
self.rebuild)
# Position update dialog overlaying top left of app window # Position update dialog overlaying top left of app window
parent_loc = parent.pos() parent_loc = parent.pos()
self.move(parent_loc.x(),parent_loc.y()) self.move(parent_loc.x(),parent_loc.y())
def cancel(self):
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.cancel()")
return QDialog.reject(self)
def cleanup(self): def cleanup(self):
'''
Kill the file browser
'''
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.cleanup()")
# Delete directory containing exploded ePub # Delete directory containing exploded ePub
if self._exploded is not None: if self._exploded is not None:
if DEBUG:
prints(" removing exploded dir\n %s" % self._exploded)
shutil.rmtree(self._exploded, ignore_errors=True) shutil.rmtree(self._exploded, ignore_errors=True)
@ -78,37 +55,17 @@ class TweakEpub(QDialog, Ui_Dialog):
Generic subprocess launch of native file browser Generic subprocess launch of native file browser
User can use right-click to 'Open with ...' User can use right-click to 'Open with ...'
''' '''
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.display_exploded()")
'''
if isosx:
cmd = 'open %s' % self._exploded
elif iswindows:
cmd = 'start explorer.exe /e,/root,%s' % self._exploded
else:
# *** Kovid - need proper linux invocation here ***
cmd = '<linux command to open native file browser>'
# *** Kovid - need a way of launching this process than can be killed in cleanup() ***
self._file_browser_proc = subprocess.Popen(cmd, shell=True)
'''
open_local_file(self._exploded) open_local_file(self._exploded)
def explode(self): def explode(self, *args):
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.explode()")
if self._exploded is None: if self._exploded is None:
if DEBUG:
prints(" exploding %s" % self._epub)
self._exploded = PersistentTemporaryDirectory("_exploded", prefix='') self._exploded = PersistentTemporaryDirectory("_exploded", prefix='')
zipextract(self._epub, self._exploded) zipextract(self._epub, self._exploded)
self.display_exploded() self.display_exploded()
self.rebuild_button.setEnabled(True) self.rebuild_button.setEnabled(True)
self.explode_button.setEnabled(False) self.explode_button.setEnabled(False)
def rebuild(self): def rebuild(self, *args):
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.rebuild()")
self._output = os.path.join(self._exploded, 'rebuilt.epub') self._output = os.path.join(self._exploded, 'rebuilt.epub')
with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf: with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf:
# Write mimetype # Write mimetype
@ -120,7 +77,8 @@ class TweakEpub(QDialog, Ui_Dialog):
if fn in exclude_files: if fn in exclude_files:
continue continue
absfn = os.path.join(root, fn) absfn = os.path.join(root, fn)
zfn = absfn[len(self._exploded) + len(os.sep):] zfn = os.path.relpath(absfn,
self._exploded).replace(os.sep, '/')
zf.write(absfn, zfn) zf.write(absfn, zfn)
return QDialog.accept(self) return QDialog.accept(self)

View File

@ -9,8 +9,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>161</width> <width>382</width>
<height>132</height> <height>242</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -22,65 +22,66 @@
<property name="modal"> <property name="modal">
<bool>false</bool> <bool>false</bool>
</property> </property>
<widget class="QWidget" name="verticalLayoutWidget"> <layout class="QGridLayout" name="gridLayout">
<property name="geometry"> <item row="1" column="0">
<rect> <widget class="QPushButton" name="explode_button">
<x>10</x> <property name="statusTip">
<y>10</y> <string>Display contents of exploded ePub</string>
<width>141</width> </property>
<height>110</height> <property name="text">
</rect> <string>&amp;Explode ePub</string>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout"> <property name="icon">
<item> <iconset resource="../../../../resources/images.qrc">
<widget class="QPushButton" name="explode_button"> <normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset>
<property name="statusTip"> </property>
<string>Display contents of exploded ePub</string> </widget>
</property> </item>
<property name="text"> <item row="2" column="0">
<string>Explode ePub</string> <widget class="QPushButton" name="rebuild_button">
</property> <property name="enabled">
</widget> <bool>false</bool>
</item> </property>
<item> <property name="statusTip">
<widget class="QPushButton" name="rebuild_button"> <string>Rebuild ePub from exploded contents</string>
<property name="enabled"> </property>
<bool>false</bool> <property name="text">
</property> <string>&amp;Rebuild ePub</string>
<property name="statusTip"> </property>
<string>Rebuild ePub from exploded contents</string> <property name="icon">
</property> <iconset resource="../../../../resources/images.qrc">
<property name="text"> <normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
<string>Rebuild ePub</string> </property>
</property> </widget>
</widget> </item>
</item> <item row="3" column="0">
<item> <widget class="QPushButton" name="cancel_button">
<widget class="QPushButton" name="cancel_button"> <property name="statusTip">
<property name="statusTip"> <string>Discard changes</string>
<string>Discard changes</string> </property>
</property> <property name="text">
<property name="text"> <string>&amp;Cancel</string>
<string>Cancel</string> </property>
</property> <property name="icon">
</widget> <iconset resource="../../../../resources/images.qrc">
</item> <normaloff>:/images/window-close.png</normaloff>:/images/window-close.png</iconset>
<item> </property>
<spacer name="verticalSpacer"> </widget>
<property name="orientation"> </item>
<enum>Qt::Vertical</enum> <item row="0" column="0">
</property> <widget class="QLabel" name="label">
<property name="sizeHint" stdset="0"> <property name="text">
<size> <string>First, explode the epub. Then edit is contents by right clicking on the individual files and selecting the editor of your choice. When you are done, click rebuild epub and the epub in your calibre library will be updated with the changes you have made.</string>
<width>20</width> </property>
<height>40</height> <property name="wordWrap">
</size> <bool>true</bool>
</property> </property>
</spacer> </widget>
</item> </item>
</layout> </layout>
</widget>
</widget> </widget>
<resources/> <resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections/> <connections/>
</ui> </ui>

View File

@ -217,6 +217,10 @@ def fetch_scheduled_recipe(arg):
if 'output_profile' in ps: if 'output_profile' in ps:
recs.append(('output_profile', ps['output_profile'], recs.append(('output_profile', ps['output_profile'],
OptionRecommendation.HIGH)) OptionRecommendation.HIGH))
if ps['output_profile'] == 'kindle':
recs.append(('no_inline_toc', True,
OptionRecommendation.HIGH))
lf = load_defaults('look_and_feel') lf = load_defaults('look_and_feel')
if lf.get('base_font_size', 0.0) != 0.0: if lf.get('base_font_size', 0.0) != 0.0:
recs.append(('base_font_size', lf['base_font_size'], recs.append(('base_font_size', lf['base_font_size'],

View File

@ -184,7 +184,7 @@ class ContentServer(object):
if path and os.path.exists(path): if path and os.path.exists(path):
updated = fromtimestamp(os.stat(path).st_mtime) updated = fromtimestamp(os.stat(path).st_mtime)
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
return fmt.read() return fmt
# }}} # }}}