KG tweaks to tweak-epub

This commit is contained in:
GRiker 2010-09-21 04:34:51 -07:00
commit ea3b8fa177
20 changed files with 1049 additions and 178 deletions

688
imgsrc/trim.svg Normal file
View File

@ -0,0 +1,688 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="128"
height="128"
id="svg1307"
sodipodi:version="0.32"
inkscape:version="0.46+devel"
version="1.0"
sodipodi:docname="transform-crop.svgz"
inkscape:export-filename="/home/pinheiro/pics/oxygen-icons/scalable/actions/transform-crop.png"
inkscape:export-xdpi="90"
inkscape:export-ydpi="90"
inkscape:output_extension="org.inkscape.output.svgz.inkscape">
<defs
id="defs1309">
<linearGradient
inkscape:collect="always"
id="linearGradient2594">
<stop
style="stop-color:#fafafa;stop-opacity:1;"
offset="0"
id="stop2596" />
<stop
style="stop-color:#fafafa;stop-opacity:0;"
offset="1"
id="stop2598" />
</linearGradient>
<linearGradient
inkscape:collect="always"
id="linearGradient3969">
<stop
style="stop-color:#000000;stop-opacity:1;"
offset="0"
id="stop3971" />
<stop
style="stop-color:#000000;stop-opacity:0;"
offset="1"
id="stop3973" />
</linearGradient>
<linearGradient
id="linearGradient2783">
<stop
style="stop-color:#323232;stop-opacity:1;"
offset="0"
id="stop2785" />
<stop
id="stop2787"
offset="0.07692308"
style="stop-color:#dfe1e1;stop-opacity:1;" />
<stop
style="stop-color:#b6b1b1;stop-opacity:1;"
offset="0.26289096"
id="stop2799" />
<stop
id="stop2789"
offset="0.5"
style="stop-color:#8d8282;stop-opacity:1;" />
<stop
style="stop-color:#ffffff;stop-opacity:1;"
offset="0.78201604"
id="stop2791" />
<stop
style="stop-color:#dfd9df;stop-opacity:1;"
offset="0.9005897"
id="stop2793" />
<stop
style="stop-color:#3a3a3a;stop-opacity:1;"
offset="1"
id="stop2795" />
</linearGradient>
<linearGradient
id="linearGradient2222"
inkscape:collect="always">
<stop
id="stop2224"
offset="0"
style="stop-color:#0066ff;stop-opacity:1" />
<stop
id="stop2226"
offset="1"
style="stop-color:#80b3ff;stop-opacity:1" />
</linearGradient>
<linearGradient
id="linearGradient3314"
inkscape:collect="always">
<stop
id="stop3316"
offset="0"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop3318"
offset="1"
style="stop-color:#ffffff;stop-opacity:0;" />
</linearGradient>
<linearGradient
id="linearGradient2431">
<stop
style="stop-color:#ffffff;stop-opacity:1;"
offset="0"
id="stop2433" />
<stop
id="stop2435"
offset="0.42597079"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop2437"
offset="0.5892781"
style="stop-color:#f1f1f1;stop-opacity:1;" />
<stop
style="stop-color:#eaeaea;stop-opacity:1;"
offset="0.80219781"
id="stop2439" />
<stop
style="stop-color:#dfdfdf;stop-opacity:1;"
offset="1"
id="stop2441" />
</linearGradient>
<linearGradient
id="linearGradient7422">
<stop
style="stop-color:#b4b4b6;stop-opacity:1;"
offset="0"
id="stop7424" />
<stop
id="stop5348"
offset="0.5"
style="stop-color:#9c9ca1;stop-opacity:1;" />
<stop
id="stop7426"
offset="1"
style="stop-color:#cdcdd1;stop-opacity:1;" />
</linearGradient>
<linearGradient
id="linearGradient3310"
inkscape:collect="always">
<stop
id="stop3312"
offset="0"
style="stop-color:#ffffff;stop-opacity:1;" />
<stop
id="stop3314"
offset="1"
style="stop-color:#ffffff;stop-opacity:0;" />
</linearGradient>
<filter
inkscape:collect="always"
x="-0.21138181"
width="1.4227636"
y="-0.21047288"
height="1.4209458"
id="filter9723">
<feGaussianBlur
inkscape:collect="always"
stdDeviation="1.4336041"
id="feGaussianBlur9725" />
</filter>
<clipPath
clipPathUnits="userSpaceOnUse"
id="clipPath10698">
<path
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.80000001;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
d="M -128.2008,-3.392377 L -104.45558,6.3360672 L -102.43766,6.1757677 L -103.81912,-4.5678172 L -105.75454,-5.8316609 L -124.96922,-4.4459394 L -128.2008,-3.392377 z "
id="path10700"
sodipodi:nodetypes="ccccccc" />
</clipPath>
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient2783"
id="radialGradient3418"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.9728905,-8.15107,-18.526373,-2.211261,1957.2342,725.31677)"
cx="53.235302"
cy="106.0573"
fx="53.235302"
fy="106.0573"
r="9.1025209" />
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient2594"
id="radialGradient3420"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.5808473,-2.8009276,-6.4965168,-1.3472267,701.00301,348.75795)"
cx="53.347126"
cy="104.68401"
fx="53.347126"
fy="104.68401"
r="9.1025209" />
<radialGradient
inkscape:collect="always"
xlink:href="#linearGradient3314"
id="radialGradient3422"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(-2.9339535,-1.0170467,-1.1904108,3.4340702,323.071,-252.78281)"
cx="49.110855"
cy="105.43803"
fx="49.110855"
fy="105.43803"
r="10.20672" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2783"
id="linearGradient3425"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(2.2608955,0,0,1.9345479,-550.58555,-317.90247)"
x1="190.03462"
y1="90.22673"
x2="208.7153"
y2="90.22673" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3969"
id="linearGradient3430"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(2.2608955,0,0,1.9345479,-497.11778,-432.24104)"
x1="98.411324"
y1="185.68851"
x2="166.32983"
y2="155.59846" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient7422"
id="linearGradient3525"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(6.0715756e-2,0,0,9.7589526e-2,24.201706,-45.627655)"
x1="399.77466"
y1="1164.6696"
x2="399.77466"
y2="549.06134" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2431"
id="linearGradient3527"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.5415355,0,0,0.7222225,23.477667,-8.2222193)"
x1="119.57646"
y1="23.792561"
x2="15.999996"
y2="109.6508" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient3310"
id="linearGradient3529"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0,-1.5975038,-2,0,96,199.26848)"
x1="102.31124"
y1="-5.8302126"
x2="74.330322"
y2="32" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient2222"
id="linearGradient3538"
gradientUnits="userSpaceOnUse"
gradientTransform="matrix(0.7476489,0,0,0.7476489,0,-19.999999)"
x1="8.2386189"
y1="-13.864992"
x2="8.2386189"
y2="-1.4047648" />
<filter
inkscape:collect="always"
id="filter4420">
<feGaussianBlur
inkscape:collect="always"
stdDeviation="3.0486726"
id="feGaussianBlur4422" />
</filter>
<mask
maskUnits="userSpaceOnUse"
id="mask3562">
<rect
ry="1.4444447"
rx="1.1997639"
y="8"
x="-4.0000005"
height="116.00001"
width="124"
id="rect3564"
style="fill:#ffffff;fill-opacity:1;stroke:none;filter:url(#filter4420)"
transform="matrix(1.1453342,0,0,1.1453342,15.087799,-38.432604)" />
</mask>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="2.2136483"
inkscape:cx="77.317692"
inkscape:cy="55.850409"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:document-units="px"
inkscape:grid-bbox="true"
guidetolerance="4"
showguides="true"
inkscape:guide-bbox="true"
inkscape:window-width="1440"
inkscape:window-height="840"
inkscape:window-x="223"
inkscape:window-y="37"
objecttolerance="4"
gridtolerance="4">
<sodipodi:guide
orientation="horizontal"
position="-32.073749"
id="guide2204" />
<inkscape:grid
id="GridFromPre046Settings"
type="xygrid"
originx="0px"
originy="0px"
spacingx="4px"
spacingy="4px"
color="#0000ff"
empcolor="#0000ff"
opacity="0.2"
empopacity="0.4"
empspacing="4"
visible="true"
enabled="true" />
</sodipodi:namedview>
<metadata
id="metadata1312">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<cc:license
rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" />
<dc:contributor>
<cc:Agent>
<dc:title>Oxygen team</dc:title>
</cc:Agent>
</dc:contributor>
<dc:title></dc:title>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/LGPL/2.1/">
<cc:permits
rdf:resource="http://web.resource.org/cc/Reproduction" />
<cc:permits
rdf:resource="http://web.resource.org/cc/Distribution" />
<cc:requires
rdf:resource="http://web.resource.org/cc/Notice" />
<cc:permits
rdf:resource="http://web.resource.org/cc/DerivativeWorks" />
<cc:requires
rdf:resource="http://web.resource.org/cc/ShareAlike" />
<cc:requires
rdf:resource="http://web.resource.org/cc/SourceCode" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Layer 1"
inkscape:groupmode="layer">
<rect
ry="0.1870501"
rx="0.1537565"
y="28.129654"
x="8"
height="92"
width="92"
id="rect3226"
style="fill:#618fd2;fill-opacity:0.09195401;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1" />
<g
id="g3520"
transform="translate(32,-0.1296539)">
<rect
inkscape:export-ydpi="90"
inkscape:export-xdpi="90"
inkscape:export-filename="/home/pinheiro/Desktop/mock2.png"
style="opacity:0.75;fill:url(#linearGradient3525);fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect3281"
width="92"
height="92"
x="28.129654"
y="-24"
inkscape:r_cx="true"
inkscape:r_cy="true"
ry="3.9616783"
rx="3.9616783"
transform="matrix(0,1,1,0,0,0)" />
<rect
ry="1.4444447"
rx="1.1997639"
y="-20"
x="32.129654"
height="84"
width="84"
id="rect3283"
style="fill:url(#linearGradient3527);fill-opacity:1;fill-rule:evenodd;stroke:none"
transform="matrix(0,1,1,0,0,0)" />
<path
id="path3285"
d="M 64,53.096891 C 45.143834,70.163928 24.748768,86.162699 -2.0000002e-07,96.129654 L -2.0000002e-07,52.647595 C 23.693959,50.212248 45.09831,42.609775 64,32.129654 L 64,53.096891 z"
style="fill:url(#linearGradient3529);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
</g>
<g
transform="translate(-16,20.129654)"
style="fill:#7193c6;fill-opacity:1"
id="g2250">
<rect
ry="1.3512546"
rx="0.077153668"
y="-116"
x="16"
height="4"
width="4"
id="rect3210"
style="opacity:1;fill:#7193c6;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
transform="matrix(0,1,-1,0,0,0)"
inkscape:tile-w="8"
inkscape:tile-h="8"
inkscape:tile-cx="124"
inkscape:tile-cy="28" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,8)"
id="use2236"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,16)"
id="use2240"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,24)"
id="use2244"
width="128"
height="128" />
<use
style="fill:#7193c6;fill-opacity:1"
x="0"
y="0"
inkscape:tiled-clone-of="#rect3210"
xlink:href="#rect3210"
transform="translate(0,32)"
id="use2248"
width="128"
height="128" />
<use
height="88"
width="88"
transform="translate(0,24)"
id="use3220"
xlink:href="#use2240"
y="0"
x="0" />
<use
height="88"
width="88"
transform="translate(0,24)"
id="use3222"
xlink:href="#use2244"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2230"
xlink:href="#use2244"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2232"
xlink:href="#use2248"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(0,32)"
id="use2234"
xlink:href="#use3220"
y="0"
x="0" />
</g>
<use
height="128"
width="128"
transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129659,128.12964)"
id="use2258"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(-88,0)"
id="use2314"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="matrix(8.5712909e-8,-0.9999999,0.9999999,8.5712909e-8,-20.129651,216.12964)"
id="use2316"
xlink:href="#g2250"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(96,0.1296547)"
id="use3300"
xlink:href="#rect3222"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(7.4990672e-6,96.129662)"
id="use3302"
xlink:href="#rect3222"
y="0"
x="0" />
<use
height="128"
width="128"
transform="translate(96,96.129652)"
id="use3304"
xlink:href="#rect3222"
y="0"
x="0" />
<rect
ry="0.18696606"
rx="0.15479258"
y="-32"
x="0"
height="12"
width="12"
id="rect3222"
style="fill:url(#linearGradient3538);fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
transform="scale(1,-1)" />
<rect
transform="scale(1,-1)"
style="fill:#bfd9ff;fill-opacity:1;stroke:none;stroke-width:0.86699998;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1"
id="rect2225"
width="4"
height="4"
x="4"
y="-28"
rx="0.15479258"
ry="0.18696606" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(96,0.1296539)"
id="use3226"
xlink:href="#rect2225"
y="0"
x="0" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(7.5e-6,96.129661)"
id="use3228"
xlink:href="#rect2225"
y="0"
x="0" />
<use
style="fill:#a4c0e4"
height="88"
width="88"
transform="translate(96,96.129654)"
id="use3230"
xlink:href="#rect2225"
y="0"
x="0" />
<rect
style="opacity:0.57786889;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3.63199997;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:4;stroke-opacity:1"
id="rect1327"
width="1"
height="0"
x="15.057414"
y="-308.20486" />
<g
id="g3407"
transform="matrix(0.8731076,0,0,0.8731076,-13.173272,33.555799)"
mask="url(#mask3562)">
<path
sodipodi:nodetypes="ccccccc"
id="path3836"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
style="fill:url(#radialGradient3418);fill-opacity:1;fill-rule:nonzero;stroke:none" />
<path
style="fill:#555753;fill-opacity:1;fill-rule:nonzero;stroke:none"
d="m 107.32508,50.938663 -74.427424,35.613119 -3.008197,6.986785 76.368201,-35.710168 3.7845,-5.046004 -2.71708,-1.843732 z"
id="path8241"
sodipodi:nodetypes="cccccc" />
<path
style="opacity:0.10688836;fill:url(#radialGradient3420);fill-opacity:1;fill-rule:nonzero;stroke:none"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
id="path11683"
sodipodi:nodetypes="ccccccc" />
<path
sodipodi:nodetypes="ccccccc"
id="path17921"
d="m 29.733826,93.557578 76.565594,-35.724313 3.74271,-5.050163 -27.964957,-18.69067 -6.907623,1.950856 -41.307066,47.80066 -4.128658,9.71363 z"
style="fill:none;stroke:url(#radialGradient3422);stroke-width:0.86455041;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:4" />
<rect
style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect8239"
width="39.714981"
height="37.454777"
x="27.310663"
y="81.415123"
transform="matrix(0.6571695,-0.7537428,0.7537428,0.6571695,0,0)"
rx="3.8771732"
ry="3.8771732" />
<rect
transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)"
style="fill:url(#linearGradient3425);fill-opacity:1;fill-rule:nonzero;stroke:none"
id="rect2803"
width="40.499767"
height="122.13765"
x="-120.93575"
y="-157.97318"
rx="0"
ry="0" />
<rect
transform="matrix(-0.7651682,-0.6438304,-0.6438304,0.7651682,0,0)"
y="-161.84383"
x="-119.89533"
height="126.00658"
width="39.223213"
id="rect3967"
style="fill:url(#linearGradient3430);fill-opacity:1;fill-rule:nonzero;stroke:none" />
<rect
transform="matrix(-0.6438304,0.7651682,0.7651682,0.6438304,0,0)"
y="80.243172"
x="-155.77248"
height="40.591759"
width="100.57008"
id="rect1851"
style="opacity:0.52459011;fill:#e0e0e0;fill-opacity:1;fill-rule:nonzero;stroke:none" />
<rect
ry="1.2485937"
rx="1.2485937"
transform="matrix(2.0406638,-2.3405465,2.3405465,2.0406638,304.62828,-199.57966)"
y="-5.487061"
x="-104.11894"
height="12.061829"
width="12.789698"
id="rect8248"
style="fill:#2e3436;fill-opacity:1;fill-rule:nonzero;stroke:none;filter:url(#filter9723)"
clip-path="url(#clipPath10698)" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 21 KiB

BIN
resources/images/trim.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -0,0 +1,43 @@
__author__ = ' (lrfurtado@yahoo.com.br)'
from calibre.web.feeds.news import BasicNewsRecipe
class LeJournalDeMontrealRecipe(BasicNewsRecipe):
title = u'Le Journal de Montreal'
description = u'Le Journal de Montreal'
__author__ = 'Luciano Furtado'
language = 'fr'
oldest_article = 7
use_embedded_content=0
max_articles_per_feed = 15
remove_tags = [
dict(name='ul',attrs={'id':'mainNav'}),
dict(name='div',attrs={'id':'boxPolitique'}),
dict(name='div',attrs={'id':'boxScoop'}),
dict(name='div',attrs={'id':'DossierSpec'}),
dict(name='div',attrs={'id':'channelBoxes'}),
dict(name='div',attrs={'id':'sectionBoxes'}),
dict(name='div',attrs={'id':'header'}),
dict(name='div',attrs={'id':'footer'}),
dict(name='div',attrs={'id':'navbarCanoe_container'}),
dict(name='div',attrs={'id':'popularCanoe'}),
dict(name='div',attrs={'id':'textAds'}),
dict(name='div',attrs={'id':'24heures'}),
dict(name='div',attrs={'class':'bottomBox clear'}),
dict(name='div',attrs={'class':'articleControls thin'}),
]
feeds = [
(u'Actualites',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_actualites.xml'),
(u'Arts et spectacle',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_arts.xml'),
(u'Sports',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_sports.xml'),
(u'Chroniques',
u'http://www.canoe.com/rss/feed/nouvelles/ljm_chroniques.xml'),
]

View File

@ -0,0 +1,45 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1284927619(BasicNewsRecipe):
title = u'Tagesanzeiger'
publisher = u'Tamedia AG'
oldest_article = 2
__author__ = 'noxxx'
max_articles_per_feed = 100
description = 'tagesanzeiger.ch: Nichts verpassen'
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
language = 'de'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
remove_tags = [
dict(name='img')
,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']})
,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']})
,dict(name='form',attrs={'id':['articleMailForm','commentform']})
,dict(name='div',attrs={'style':['position:absolute']})
,dict(name='script',attrs={'type':['text/javascript']})
,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']})
]
feeds = [
(u'Front', u'http://www.tagesanzeiger.ch/rss.html')
,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html')
,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html')
,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html')
,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html')
,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html')
,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html')
,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html')
,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html')
,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html')
,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html')
,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')]
def print_version(self, url):
return url + '/print.html'

View File

@ -0,0 +1,52 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'TheMarker Financial News in Hebrew'
__author__ = 'TonyTheBookworm, Marbs'
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
title = u'TheMarker'
language = 'he'
simultaneous_downloads = 5
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
max_articles_per_feed = 10
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
def print_version(self, url):
split1 = url.split("=")
weblinks = url
if weblinks is not None:
for link in weblinks:
#---------------------------------------------------------
#here we need some help with some regexpressions
#we are trying to find it.themarker.com in a url
#-----------------------------------------------------------
re1='.*?' # Non-greedy match on filler
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
m = rg.search(url)
if m:
split2 = url.split("article/")
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
else:
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
return print_url

View File

@ -70,13 +70,16 @@ class WallStreetJournal(BasicNewsRecipe):
def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title)
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
try:
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
except:
articles = []
if articles:
feeds.append((title, articles))
return feeds
return feeds
def parse_index(self):
soup = self.wsj_get_index()
@ -99,7 +102,7 @@ class WallStreetJournal(BasicNewsRecipe):
url = 'http://online.wsj.com' + a['href']
feeds = self.wsj_add_feed(feeds,title,url)
title = 'What''s News'
url = url.replace('pageone','whatsnews')
url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url)
else:
title = self.tag_to_string(a)
@ -141,7 +144,7 @@ class WallStreetJournal(BasicNewsRecipe):
articles = []
flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x})
if flavorarea is not None:
if flavorarea is not None:
flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article'))
if flavorstory is not None:
flavorstory['class'] = 'mjLinkItem'

View File

@ -54,10 +54,13 @@ class WallStreetJournal(BasicNewsRecipe):
def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title)
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
try:
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
except:
articles = []
if articles:
feeds.append((title, articles))
return feeds

View File

@ -443,9 +443,9 @@ class KOBO(USBMS):
# Reset Im_Reading list in the database
if oncard == 'carda':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
elif oncard != 'carda' and oncard != 'cardb':
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
try:
cursor.execute (query)

View File

@ -241,7 +241,7 @@ OptionRecommendation(name='toc_filter',
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '

View File

@ -106,6 +106,52 @@ def line_length(format, raw, percent):
return lengths[index]
class Dehyphenator(object):
'''
Analyzes words to determine whether hyphens should be retained/removed. Uses the document
itself is as a dictionary. This method handles all languages along with uncommon, made-up, and
scientific words. The primary disadvantage is that words appearing only once in the document
retain hyphens.
'''
def __init__(self):
# Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex'
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation
self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
def dehyphenate(self, match):
firsthalf = match.group('firstpart')
secondhalf = match.group('secondpart')
hyphenated = str(firsthalf) + "-" + str(secondhalf)
dehyphenated = str(firsthalf) + str(secondhalf)
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
match = booklookup.search(self.html)
if match:
#print "returned dehyphenated word: " + str(dehyphenated)
return dehyphenated
else:
#print "returned hyphenated word: " + str(hyphenated)
return hyphenated
def __call__(self, html, format, length=1):
self.html = html
if format == 'html':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
elif format == 'pdf':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(<p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
elif format == 'individual_words':
intextmatch = re.compile('>[^<]*\b(?P<firstpart>[^"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
html = intextmatch.sub(self.dehyphenate, html)
return html
class CSSPreProcessor(object):
@ -328,11 +374,10 @@ class HTMLPreProcessor(object):
print 'Failed to parse remove_footer regexp'
traceback.print_exc()
# unwrap hyphenation - moved here so it's executed after header/footer removal
# unwrap em/en dashes, delete soft hyphens - moved here so it's executed after header/footer removal
if is_pdftohtml:
# unwrap visible dashes and hyphens - don't delete they are often hyphens for
# for compound words, formatting, etc
end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap em/en dashes
end_rules.append((re.compile(u'(?<=[–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens
end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting
@ -350,7 +395,7 @@ class HTMLPreProcessor(object):
# print "The pdf line length returned is " + str(length)
end_rules.append(
# Un wrap using punctuation
(re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
(re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
)
for rule in self.PREPROCESS + start_rules:
@ -380,6 +425,11 @@ class HTMLPreProcessor(object):
for rule in rules + end_rules:
html = rule[0].sub(rule[1], html)
if is_pdftohtml:
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'pdf', length)
#dump(html, 'post-preprocess')
# Handle broken XHTML w/ SVG (ugh)

View File

@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.ebooks.conversion.preprocess import line_length
from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator
from calibre.utils.logging import default_log
class PreProcessor(object):
@ -114,7 +114,7 @@ class PreProcessor(object):
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Get rid of empty span, bold, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<[ibu]>\s*(<[ibu]>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
@ -132,7 +132,6 @@ class PreProcessor(object):
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
html = re.sub(r"\s*</p>", "</p>\n", html)
html = re.sub(r"\s*<p>\s*", "\n<p>", html)
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
# detect chapters/sections to match xpath or splitting logic
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
@ -140,16 +139,16 @@ class PreProcessor(object):
#
# Start with most typical chapter headings, get more aggressive until one works
if self.html_preprocess_sections < 10:
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
html = chapdetect.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
###### Unwrap lines ######
@ -174,10 +173,16 @@ class PreProcessor(object):
length = line_length(format, html, getattr(self.extra_opts,
'html_unwrap_factor', 0.4))
self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
max_length = length * 1.4
min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})")
#
# Unwrap and/or delete soft-hyphens, hyphens
# Unwrap em/en dashes, delete soft-hyphens
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html)
# Dehyphenate
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html', length)
# Unwrap lines using punctation and line length
unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
@ -186,7 +191,7 @@ class PreProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < 10:
self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections))
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html)
# search for places where a first or second level heading is immediately followed by another
# top level heading. demote the second heading to h3 to prevent splitting between chapter

View File

@ -5,7 +5,6 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.constants import iswindows, isosx
from calibre.gui2 import error_dialog
from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.tweak_epub import TweakEpub
@ -13,53 +12,45 @@ from calibre.gui2.dialogs.tweak_epub import TweakEpub
class TweakEpubAction(InterfaceAction):
name = 'Tweak ePub'
action_spec = (_('Tweak ePub'), 'tweak_epub.png', 'Edit ePub in situ',
_('T'))
action_spec = (_('Tweak ePub'), 'trim.png',
_('Make small changes to ePub format books'),
_('T'))
dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
action_type = 'current'
def genesis(self):
self.qaction.triggered.connect(self._edit_epub_in_situ)
self.qaction.triggered.connect(self.edit_epub_in_situ)
def _edit_epub_in_situ(self, *args):
# Assure exactly one row selected
rows = self.gui.library_view.selectionModel().selectedRows()
if not rows or len(rows) == 0:
d = error_dialog(self.gui, _('Cannot tweak ePub'), _('No book selected'))
d.exec_()
return
if len(rows) > 1:
d = error_dialog(self.gui, _('Cannot tweak ePub'), _('Multiple books selected'))
d.exec_()
return
def edit_epub_in_situ(self, *args):
row = self.gui.library_view.currentIndex()
if not row.isValid():
return error_dialog(self.gui, _('Cannot tweak ePub'),
_('No book selected'), show=True)
# Confirm 'EPUB' in formats
row = rows[0].row()
formats = self.gui.library_view.model().db.formats(row).upper().split(',')
if not 'EPUB' in formats:
d = error_dialog(self.gui, _('Cannot tweak ePub'), _('No EPUB available'))
d.exec_()
return
book_id = self.gui.library_view.model().id(row)
try:
path_to_epub = self.gui.library_view.model().db.format_abspath(
book_id, 'EPUB', index_is_id=True)
except:
path_to_epub = None
if not path_to_epub:
return error_dialog(self.gui, _('Cannot tweak ePub'),
_('No ePub available. First convert the book to ePub.'),
show=True)
path_to_epub = self.gui.library_view.model().db.format_abspath(row, 'EPUB')
id = self._get_selected_id()
# Launch a modal dialog waiting for user to complete or cancel
dlg = TweakEpub(self.gui, path_to_epub)
if dlg.exec_() == dlg.Accepted:
self._update_db(id, dlg._output)
self.update_db(book_id, dlg._output)
dlg.cleanup()
def _get_selected_id(self):
rows = self.gui.library_view.selectionModel().selectedRows()
return map(self.gui.library_view.model().id, rows)[0]
def _update_db(self, id, rebuilt):
def update_db(self, book_id, rebuilt):
'''
Update the calibre db with the tweaked epub
'''
print "gui2.actions.tweak_epub:TweakEpubAction._update_db()"
print " updating id %d from %s" % (id, rebuilt)
self.gui.library_view.model().db.add_format_with_hooks(id, 'EPUB', rebuilt, index_is_id=True)
self.gui.library_view.model().db.add_format(book_id, 'EPUB',
open(rebuilt, 'rb'), index_is_id=True)

View File

@ -800,7 +800,7 @@ class DeviceMixin(object): # {{{
# if set_books_in_library did not.
if not self.set_books_in_library(self.booklists(), reset=True):
self.upload_booklists()
self.book_on_device(None, None, reset=True)
self.book_on_device(None, reset=True)
# We need to reset the ondevice flags in the library. Use a big hammer,
# so we don't need to worry about whether some succeeded or not.
self.refresh_ondevice_info(device_connected=True, reset_only=False)
@ -1309,7 +1309,7 @@ class DeviceMixin(object): # {{{
for f in files:
getattr(f, 'close', lambda : True)()
def book_on_device(self, id, format=None, reset=False):
def book_on_device(self, id, reset=False):
'''
Return an indication of whether the given book represented by its db id
is on the currently connected device. It returns a 5 element list. The
@ -1338,8 +1338,6 @@ class DeviceMixin(object): # {{{
self.book_db_id_cache.append(set())
for book in l:
db_id = getattr(book, 'application_id', None)
if db_id is None:
db_id = book.db_id
if db_id is not None:
# increment the count of books on the device with this
# db_id.

View File

@ -300,6 +300,24 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.cpixmap = pix
self.cover_data = cdata
def trim_cover(self, *args):
from calibre.utils.magick import Image
cdata = self.cover_data
if not cdata:
return
im = Image()
im.load(cdata)
im.trim(10)
cdata = im.export('jpg')
pix = QPixmap()
pix.loadFromData(cdata)
self.cover.setPixmap(pix)
self.cover_changed = True
self.cpixmap = pix
self.cover_data = cdata
def sync_formats(self):
old_extensions, new_extensions, paths = set(), set(), {}
for row in range(self.formats.count()):
@ -380,6 +398,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.remove_unused_series)
QObject.connect(self.auto_author_sort, SIGNAL('clicked()'),
self.deduce_author_sort)
self.trim_cover_button.clicked.connect(self.trim_cover)
self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'),
self.author_sort_box_changed)
self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'),

View File

@ -625,6 +625,17 @@ Using this button to create author sort will change author sort from red to gree
</property>
</widget>
</item>
<item>
<widget class="QToolButton" name="trim_cover_button">
<property name="toolTip">
<string>Remove border (if any) from cover</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/trim.png</normaloff>:/images/trim.png</iconset>
</property>
</widget>
</item>
<item>
<widget class="QToolButton" name="reset_cover">
<property name="toolTip">

View File

@ -6,15 +6,12 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, shutil, subprocess, sys
import os, shutil
from contextlib import closing
from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
from PyQt4 import QtGui
from PyQt4.Qt import QDialog, SIGNAL
from PyQt4.Qt import QDialog
from calibre import prints
from calibre.constants import iswindows, isosx, DEBUG
from calibre.gui2 import open_local_file
from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog
from calibre.libunzip import extract as zipextract
@ -26,7 +23,6 @@ class TweakEpub(QDialog, Ui_Dialog):
To do:
- need way to kill file browser proc in cleanup()
- linux file browser launch
'''
def __init__(self, parent, epub):
@ -40,36 +36,17 @@ class TweakEpub(QDialog, Ui_Dialog):
# Run the dialog setup generated from tweak_epub.ui
self.setupUi(self)
self.connect(self.cancel_button,
SIGNAL("clicked()"),
self.cancel)
self.connect(self.explode_button,
SIGNAL("clicked()"),
self.explode)
self.connect(self.rebuild_button,
SIGNAL("clicked()"),
self.rebuild)
self.cancel_button.clicked.connect(self.reject)
self.explode_button.clicked.connect(self.explode)
self.rebuild_button.clicked.connect(self.rebuild)
# Position update dialog overlaying top left of app window
parent_loc = parent.pos()
self.move(parent_loc.x(),parent_loc.y())
def cancel(self):
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.cancel()")
return QDialog.reject(self)
def cleanup(self):
'''
Kill the file browser
'''
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.cleanup()")
# Delete directory containing exploded ePub
if self._exploded is not None:
if DEBUG:
prints(" removing exploded dir\n %s" % self._exploded)
shutil.rmtree(self._exploded, ignore_errors=True)
@ -78,37 +55,17 @@ class TweakEpub(QDialog, Ui_Dialog):
Generic subprocess launch of native file browser
User can use right-click to 'Open with ...'
'''
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.display_exploded()")
'''
if isosx:
cmd = 'open %s' % self._exploded
elif iswindows:
cmd = 'start explorer.exe /e,/root,%s' % self._exploded
else:
# *** Kovid - need proper linux invocation here ***
cmd = '<linux command to open native file browser>'
# *** Kovid - need a way of launching this process than can be killed in cleanup() ***
self._file_browser_proc = subprocess.Popen(cmd, shell=True)
'''
open_local_file(self._exploded)
def explode(self):
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.explode()")
def explode(self, *args):
if self._exploded is None:
if DEBUG:
prints(" exploding %s" % self._epub)
self._exploded = PersistentTemporaryDirectory("_exploded", prefix='')
zipextract(self._epub, self._exploded)
self.display_exploded()
self.rebuild_button.setEnabled(True)
self.explode_button.setEnabled(False)
def rebuild(self):
if DEBUG:
prints("gui2.dialogs.tweak_epub:TweakEpub.rebuild()")
def rebuild(self, *args):
self._output = os.path.join(self._exploded, 'rebuilt.epub')
with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf:
# Write mimetype
@ -120,7 +77,8 @@ class TweakEpub(QDialog, Ui_Dialog):
if fn in exclude_files:
continue
absfn = os.path.join(root, fn)
zfn = absfn[len(self._exploded) + len(os.sep):]
zfn = os.path.relpath(absfn,
self._exploded).replace(os.sep, '/')
zf.write(absfn, zfn)
return QDialog.accept(self)

View File

@ -9,8 +9,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>161</width>
<height>132</height>
<width>382</width>
<height>242</height>
</rect>
</property>
<property name="windowTitle">
@ -22,65 +22,66 @@
<property name="modal">
<bool>false</bool>
</property>
<widget class="QWidget" name="verticalLayoutWidget">
<property name="geometry">
<rect>
<x>10</x>
<y>10</y>
<width>141</width>
<height>110</height>
</rect>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QPushButton" name="explode_button">
<property name="statusTip">
<string>Display contents of exploded ePub</string>
</property>
<property name="text">
<string>Explode ePub</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="rebuild_button">
<property name="enabled">
<bool>false</bool>
</property>
<property name="statusTip">
<string>Rebuild ePub from exploded contents</string>
</property>
<property name="text">
<string>Rebuild ePub</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="cancel_button">
<property name="statusTip">
<string>Discard changes</string>
</property>
<property name="text">
<string>Cancel</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0">
<widget class="QPushButton" name="explode_button">
<property name="statusTip">
<string>Display contents of exploded ePub</string>
</property>
<property name="text">
<string>&amp;Explode ePub</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QPushButton" name="rebuild_button">
<property name="enabled">
<bool>false</bool>
</property>
<property name="statusTip">
<string>Rebuild ePub from exploded contents</string>
</property>
<property name="text">
<string>&amp;Rebuild ePub</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QPushButton" name="cancel_button">
<property name="statusTip">
<string>Discard changes</string>
</property>
<property name="text">
<string>&amp;Cancel</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/window-close.png</normaloff>:/images/window-close.png</iconset>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>First, explode the epub. Then edit is contents by right clicking on the individual files and selecting the editor of your choice. When you are done, click rebuild epub and the epub in your calibre library will be updated with the changes you have made.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections/>
</ui>

View File

@ -217,6 +217,10 @@ def fetch_scheduled_recipe(arg):
if 'output_profile' in ps:
recs.append(('output_profile', ps['output_profile'],
OptionRecommendation.HIGH))
if ps['output_profile'] == 'kindle':
recs.append(('no_inline_toc', True,
OptionRecommendation.HIGH))
lf = load_defaults('look_and_feel')
if lf.get('base_font_size', 0.0) != 0.0:
recs.append(('base_font_size', lf['base_font_size'],

View File

@ -184,7 +184,7 @@ class ContentServer(object):
if path and os.path.exists(path):
updated = fromtimestamp(os.stat(path).st_mtime)
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
return fmt.read()
return fmt
# }}}