merge from trunk
46
.bzrignore
@ -35,3 +35,49 @@ nbproject/
|
||||
.settings/
|
||||
*.DS_Store
|
||||
calibre_plugins/
|
||||
recipes/.git
|
||||
recipes/.gitignore
|
||||
recipes/README
|
||||
recipes/katalog_egazeciarz.recipe
|
||||
recipes/tv_axnscifi.recipe
|
||||
recipes/tv_comedycentral.recipe
|
||||
recipes/tv_discoveryscience.recipe
|
||||
recipes/tv_foxlife.recipe
|
||||
recipes/tv_fox.recipe
|
||||
recipes/tv_hbo.recipe
|
||||
recipes/tv_kinopolska.recipe
|
||||
recipes/tv_nationalgeographic.recipe
|
||||
recipes/tv_polsat2.recipe
|
||||
recipes/tv_polsat.recipe
|
||||
recipes/tv_tv4.recipe
|
||||
recipes/tv_tvn7.recipe
|
||||
recipes/tv_tvn.recipe
|
||||
recipes/tv_tvp1.recipe
|
||||
recipes/tv_tvp2.recipe
|
||||
recipes/tv_tvphd.recipe
|
||||
recipes/tv_tvphistoria.recipe
|
||||
recipes/tv_tvpkultura.recipe
|
||||
recipes/tv_tvppolonia.recipe
|
||||
recipes/tv_tvpuls.recipe
|
||||
recipes/tv_viasathistory.recipe
|
||||
recipes/icons/tv_axnscifi.png
|
||||
recipes/icons/tv_comedycentral.png
|
||||
recipes/icons/tv_discoveryscience.png
|
||||
recipes/icons/tv_foxlife.png
|
||||
recipes/icons/tv_fox.png
|
||||
recipes/icons/tv_hbo.png
|
||||
recipes/icons/tv_kinopolska.png
|
||||
recipes/icons/tv_nationalgeographic.png
|
||||
recipes/icons/tv_polsat2.png
|
||||
recipes/icons/tv_polsat.png
|
||||
recipes/icons/tv_tv4.png
|
||||
recipes/icons/tv_tvn7.png
|
||||
recipes/icons/tv_tvn.png
|
||||
recipes/icons/tv_tvp1.png
|
||||
recipes/icons/tv_tvp2.png
|
||||
recipes/icons/tv_tvphd.png
|
||||
recipes/icons/tv_tvphistoria.png
|
||||
recipes/icons/tv_tvpkultura.png
|
||||
recipes/icons/tv_tvppolonia.png
|
||||
recipes/icons/tv_tvpuls.png
|
||||
recipes/icons/tv_viasathistory.png
|
||||
|
17
COPYRIGHT
@ -28,6 +28,12 @@ License: LGPL-2.1+
|
||||
The full text of the LGPL is distributed as in
|
||||
/usr/share/common-licenses/LGPL-2.1 on Debian systems.
|
||||
|
||||
Files: src/calibre/utils/fonts/woff/*
|
||||
Copyright: Jonathan Kew?
|
||||
License: LGPL-2.1
|
||||
The full text of the LGPL is distributed as in
|
||||
/usr/share/common-licenses/LGPL-2.1 on Debian systems.
|
||||
|
||||
Files: src/calibre/ebooks/hyphenate.py
|
||||
Copyright: Copyright (C) 1990, 2004, 2005 Gerard D.C. Kuiken.
|
||||
License: other
|
||||
@ -41,6 +47,12 @@ License: Apache 2.0
|
||||
The full text of the Apache 2.0 license is available at:
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Files: resources/viewer/mathjax/*
|
||||
Copyright: Unknown
|
||||
License: Apache 2.0
|
||||
The full text of the Apache 2.0 license is available at:
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Files: /src/cherrypy/*
|
||||
Copyright: Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org)
|
||||
Copyright: Copyright (C) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
||||
@ -396,8 +408,9 @@ License: other
|
||||
|
||||
Liberation Fonts
|
||||
-----------------
|
||||
calibre includes a copy of the liberation fonts, available from
|
||||
https://calibre-ebook.com/downloads/liberation-fonts
|
||||
calibre includes a copy of the liberation fonts in TTF format, licensed under
|
||||
the SIL Open Font License, Version 1.1, and available from
|
||||
https://fedorahosted.org/liberation-fonts/
|
||||
|
||||
BSD License (for all the BSD licensed code indicated above)
|
||||
-----------------------------------------------------------
|
||||
|
1618
Changelog.old.yaml
2861
Changelog.yaml
@ -9,7 +9,7 @@ the file are big-endian.
|
||||
Layout
|
||||
------
|
||||
|
||||
bytes content comments
|
||||
bytes content comments
|
||||
|
||||
4 00010001 Format identifier. Value of 65537 little-endian.
|
||||
4 start of next The offset after ending location of the first header.
|
||||
@ -25,7 +25,7 @@ Starts next sequence
|
||||
2 unknown Always 32
|
||||
N second header String containing the page mapping header
|
||||
4*N padding The first number given in the page mapping header indicates the number of 0 bytes.
|
||||
4*N page list
|
||||
4*N page list
|
||||
|
||||
|
||||
Content Header
|
||||
@ -44,6 +44,14 @@ Example:
|
||||
{"contentGuid":"d8c14b0","asin":"B000JML5VM","cdeType":"EBOK","fileRevisionId":"1296874359405"}
|
||||
|
||||
|
||||
In devices with KF8 support, we're seeing an extended content header (which seems to be required by some FW versions for KF8 files, like FW 3.4):
|
||||
|
||||
format Mobi version. MOBI_8 for KF8, MOBI_7 for legacy mobi files.
|
||||
acr Palm DB name
|
||||
|
||||
Example:
|
||||
{"contentGuid":"f2fc7597","asin":"B003M68YKM","cdeType":"EBOK","format":"MOBI_8","fileRevisionId":"1342776186889","acr":"CR!1F5WDHWWVN4Y78MA87Z13H9K6RKE"}
|
||||
|
||||
Page Mapping Header
|
||||
-------------------
|
||||
|
||||
|
BIN
icons/install.ico
Normal file
After Width: | Height: | Size: 25 KiB |
2862
imgsrc/font.svg
Normal file
After Width: | Height: | Size: 119 KiB |
237
imgsrc/mimetypes/azw2.svg
Normal file
@ -0,0 +1,237 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
version="1.0"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg2606"
|
||||
inkscape:version="0.48.3.1 r9886"
|
||||
sodipodi:docname="azw2.svg"
|
||||
inkscape:export-filename="/home/niluje/Patchland/calibre/imgsrc/mimetypes/azw2.png"
|
||||
inkscape:export-xdpi="90"
|
||||
inkscape:export-ydpi="90">
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1025"
|
||||
id="namedview45"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.84375"
|
||||
inkscape:cx="-11.118644"
|
||||
inkscape:cy="42.305085"
|
||||
inkscape:window-x="-2"
|
||||
inkscape:window-y="-3"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg2606" />
|
||||
<defs
|
||||
id="defs2608">
|
||||
<linearGradient
|
||||
id="linearGradient10207">
|
||||
<stop
|
||||
id="stop10209"
|
||||
style="stop-color:#a2a2a2;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop10211"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
x1="96"
|
||||
y1="104"
|
||||
x2="88.000198"
|
||||
y2="96.000198"
|
||||
id="XMLID_12_"
|
||||
gradientUnits="userSpaceOnUse">
|
||||
<stop
|
||||
id="stop83"
|
||||
style="stop-color:#888a85;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop85"
|
||||
style="stop-color:#8c8e89;stop-opacity:1"
|
||||
offset="0.0072" />
|
||||
<stop
|
||||
id="stop87"
|
||||
style="stop-color:#abaca9;stop-opacity:1"
|
||||
offset="0.0673" />
|
||||
<stop
|
||||
id="stop89"
|
||||
style="stop-color:#c5c6c4;stop-opacity:1"
|
||||
offset="0.1347" />
|
||||
<stop
|
||||
id="stop91"
|
||||
style="stop-color:#dbdbda;stop-opacity:1"
|
||||
offset="0.2652576" />
|
||||
<stop
|
||||
id="stop93"
|
||||
style="stop-color:#ebebeb;stop-opacity:1"
|
||||
offset="0.37646064" />
|
||||
<stop
|
||||
id="stop95"
|
||||
style="stop-color:#f7f7f6;stop-opacity:1"
|
||||
offset="0.48740286" />
|
||||
<stop
|
||||
id="stop97"
|
||||
style="stop-color:#fdfdfd;stop-opacity:1"
|
||||
offset="0.6324091" />
|
||||
<stop
|
||||
id="stop99"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
cx="102"
|
||||
cy="112.3047"
|
||||
r="139.55859"
|
||||
id="XMLID_8_"
|
||||
gradientUnits="userSpaceOnUse">
|
||||
<stop
|
||||
id="stop41"
|
||||
style="stop-color:#b7b8b9;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop47"
|
||||
style="stop-color:#ececec;stop-opacity:1"
|
||||
offset="0.18851049" />
|
||||
<stop
|
||||
id="stop49"
|
||||
style="stop-color:#fafafa;stop-opacity:1"
|
||||
offset="0.25718147" />
|
||||
<stop
|
||||
id="stop51"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="0.30111277" />
|
||||
<stop
|
||||
id="stop53"
|
||||
style="stop-color:#fafafa;stop-opacity:1"
|
||||
offset="0.53130001" />
|
||||
<stop
|
||||
id="stop55"
|
||||
style="stop-color:#ebecec;stop-opacity:1"
|
||||
offset="0.84490001" />
|
||||
<stop
|
||||
id="stop57"
|
||||
style="stop-color:#e1e2e3;stop-opacity:1"
|
||||
offset="1" />
|
||||
</radialGradient>
|
||||
<filter
|
||||
x="-0.19200002"
|
||||
y="-0.19199999"
|
||||
width="1.3839999"
|
||||
height="1.3839999"
|
||||
color-interpolation-filters="sRGB"
|
||||
id="filter6697">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur6699"
|
||||
stdDeviation="1.9447689" />
|
||||
</filter>
|
||||
<clipPath
|
||||
id="clipPath7084">
|
||||
<path
|
||||
d="m 72,88 -32,32 -8,0 0,-40 40,0 0,8 z"
|
||||
id="path7086"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none" />
|
||||
</clipPath>
|
||||
<radialGradient
|
||||
cx="102"
|
||||
cy="112.3047"
|
||||
r="139.55859"
|
||||
id="radialGradient9437"
|
||||
xlink:href="#XMLID_8_"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1,0,0,0.9996653,2e-6,0.00301608)" />
|
||||
<linearGradient
|
||||
x1="98.617439"
|
||||
y1="106.41443"
|
||||
x2="91.228737"
|
||||
y2="99.254974"
|
||||
id="linearGradient10213"
|
||||
xlink:href="#linearGradient10207"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<filter
|
||||
color-interpolation-filters="sRGB"
|
||||
id="filter2770">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur2772"
|
||||
stdDeviation="2.0786429" />
|
||||
</filter>
|
||||
</defs>
|
||||
<metadata
|
||||
id="metadata2611">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1">
|
||||
<path
|
||||
d="m 16,8 0,112 c 0,0 63.15625,0 63.15625,0 l 0.03125,0 c 3e-6,0 11.90625,-9.90625 17.40625,-15.40625 C 102.09375,99.09375 112,87.1875 112,87.1875 L 112,87.15625 112,8 16,8 z"
|
||||
transform="matrix(1.0416667,0,0,1.0267857,-2.6666667,-1.2142891)"
|
||||
id="path7865"
|
||||
style="opacity:0.5;fill:#000000;fill-opacity:1;filter:url(#filter2770)" />
|
||||
<path
|
||||
d="M 16.000001,8 16,120 c 0,0 63.146418,0 63.146418,0 L 112,87.14642 112,8 16.000001,8 z"
|
||||
id="path34"
|
||||
style="fill:#ffffff;fill-opacity:1" />
|
||||
<path
|
||||
d="m 18.000002,9.0000034 c -0.551,0 -1,0.44885 -1,0.999665 l 0,107.9638516 c 0,0.55181 0.449,0.99966 1,0.99966 l 59.171997,0 c 0.263,0 2.76268,0.11813 2.948681,-0.0688 L 110.707,88.094202 C 110.894,87.907264 111,85.40942 111,85.146508 l 0,-75.1468396 c 0,-0.550815 -0.448,-0.999665 -1,-0.999665 l -91.999998,0 z"
|
||||
id="path59"
|
||||
style="fill:url(#radialGradient9437);fill-opacity:1" />
|
||||
<path
|
||||
d="m 41.879531,115.98249 c 0,0 24.309609,-24.309614 24.309609,-24.309614 0,0 -9.35314,2.913124 -19.60314,2.913124 0,10.25 -4.706469,21.39649 -4.706469,21.39649 z"
|
||||
transform="translate(40,0)"
|
||||
clip-path="url(#clipPath7084)"
|
||||
id="path5540"
|
||||
style="opacity:0.4;fill:#000000;fill-opacity:1;filter:url(#filter6697)" />
|
||||
<path
|
||||
d="m 79.172,120 c 0,0 11.914,-9.914 17.414,-15.414 5.5,-5.5 15.414,-17.414 15.414,-17.414 0,0 -13.75,8.828 -24,8.828 0,10.25 -8.828,24 -8.828,24 z"
|
||||
id="path14523"
|
||||
style="fill:url(#linearGradient10213);fill-opacity:1" />
|
||||
<text
|
||||
x="63.980469"
|
||||
y="32.160156"
|
||||
id="text3772"
|
||||
xml:space="preserve"
|
||||
style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:DejaVu Sans;-inkscape-font-specification:DejaVu Sans"
|
||||
sodipodi:linespacing="125%"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan3021"
|
||||
x="63.980469"
|
||||
y="32.160156">Kindlet</tspan></text>
|
||||
<path
|
||||
d="m 68.227,60.477999 c 0,2.157 0.052,3.954 -1.035,5.874 -0.88,1.561 -2.279,2.517 -3.833,2.517 -2.121,0 -3.366,-1.62 -3.366,-4.015 C 59.993,60.14 64.225,59.283 68.226,59.283 v 1.194999 z m 5.579,13.496 c -0.365,0.332 -0.896,0.352 -1.307,0.132 -1.838,-1.528 -2.167,-2.231 -3.174,-3.69 -3.035,3.094 -5.188,4.023 -9.123,4.023 -4.663,0 -8.284,-2.876 -8.284,-8.629 0,-4.49 2.433,-7.543999 5.899,-9.044999 3.005,-1.317 7.202,-1.556 10.41,-1.914 v -0.723 c 0,-1.313 0.104,-2.875 -0.671,-4.012 -0.674,-1.021 -1.968,-1.437 -3.106,-1.437 -2.111,0 -3.99,1.078 -4.45,3.321 -0.097,0.498 -0.46,0.991 -0.962,1.017 l -5.364,-0.581 c -0.456,-0.102 -0.958,-0.463 -0.828,-1.155 1.233,-6.511 7.109,-8.475 12.378,-8.475 2.693,0 6.215,0.719 8.335,2.757 2.692997,2.515 2.431997,5.869 2.431997,9.524 v 8.622999 c 0,2.596 1.081,3.732 2.091,5.128 0.354,0.503 0.434,1.103 -0.018,1.473 -1.131,0.949 -3.138997,2.693 -4.243997,3.676 l -0.014,-0.013 z"
|
||||
id="path4047"
|
||||
style="fill-rule:evenodd" />
|
||||
<path
|
||||
d="m 99.325111,79.885297 c -8.716894,6.432161 -21.357191,9.853809 -32.243032,9.853809 -15.251419,0 -28.989193,-5.636741 -39.38419,-15.021522 -0.815557,-0.738364 -0.08726,-1.746902 0.89191,-1.173831 C 39.807066,80.071565 53.675732,84 68,84 c 9.664184,0 20.284886,-2.004491 30.058986,-6.151079 1.474215,-0.623415 2.709284,0.97246 1.266125,2.036376 z"
|
||||
id="path3858"
|
||||
style="fill:#ff9201;fill-rule:evenodd" />
|
||||
<path
|
||||
d="m 104,76 c -1.11342,-1.426386 -7.371903,-0.676274 -10.179364,-0.337298 -0.853315,0.09817 -0.984206,-0.641874 -0.217315,-1.184739 4.990671,-3.505554 13.168059,-2.491141 14.119539,-1.318987 0.95736,1.187256 -0.25087,9.384779 -4.92858,13.293915 -0.71907,0.604117 -1.40373,0.286117 -1.08573,-0.510142 C 102.75988,83.311486 105.11761,77.427225 104,76 z"
|
||||
id="path3860"
|
||||
style="fill:#ff9201;fill-rule:evenodd" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 9.1 KiB |
237
imgsrc/mimetypes/azw3.svg
Normal file
@ -0,0 +1,237 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
version="1.0"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg2606"
|
||||
inkscape:version="0.48.3.1 r9886"
|
||||
sodipodi:docname="azw3.svg"
|
||||
inkscape:export-filename="/home/niluje/Patchland/calibre/imgsrc/mimetypes/azw3.png"
|
||||
inkscape:export-xdpi="90"
|
||||
inkscape:export-ydpi="90">
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1025"
|
||||
id="namedview45"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.84375"
|
||||
inkscape:cx="-11.118644"
|
||||
inkscape:cy="42.305085"
|
||||
inkscape:window-x="-2"
|
||||
inkscape:window-y="-3"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg2606" />
|
||||
<defs
|
||||
id="defs2608">
|
||||
<linearGradient
|
||||
id="linearGradient10207">
|
||||
<stop
|
||||
id="stop10209"
|
||||
style="stop-color:#a2a2a2;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop10211"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
x1="96"
|
||||
y1="104"
|
||||
x2="88.000198"
|
||||
y2="96.000198"
|
||||
id="XMLID_12_"
|
||||
gradientUnits="userSpaceOnUse">
|
||||
<stop
|
||||
id="stop83"
|
||||
style="stop-color:#888a85;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop85"
|
||||
style="stop-color:#8c8e89;stop-opacity:1"
|
||||
offset="0.0072" />
|
||||
<stop
|
||||
id="stop87"
|
||||
style="stop-color:#abaca9;stop-opacity:1"
|
||||
offset="0.0673" />
|
||||
<stop
|
||||
id="stop89"
|
||||
style="stop-color:#c5c6c4;stop-opacity:1"
|
||||
offset="0.1347" />
|
||||
<stop
|
||||
id="stop91"
|
||||
style="stop-color:#dbdbda;stop-opacity:1"
|
||||
offset="0.2652576" />
|
||||
<stop
|
||||
id="stop93"
|
||||
style="stop-color:#ebebeb;stop-opacity:1"
|
||||
offset="0.37646064" />
|
||||
<stop
|
||||
id="stop95"
|
||||
style="stop-color:#f7f7f6;stop-opacity:1"
|
||||
offset="0.48740286" />
|
||||
<stop
|
||||
id="stop97"
|
||||
style="stop-color:#fdfdfd;stop-opacity:1"
|
||||
offset="0.6324091" />
|
||||
<stop
|
||||
id="stop99"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
cx="102"
|
||||
cy="112.3047"
|
||||
r="139.55859"
|
||||
id="XMLID_8_"
|
||||
gradientUnits="userSpaceOnUse">
|
||||
<stop
|
||||
id="stop41"
|
||||
style="stop-color:#b7b8b9;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop47"
|
||||
style="stop-color:#ececec;stop-opacity:1"
|
||||
offset="0.18851049" />
|
||||
<stop
|
||||
id="stop49"
|
||||
style="stop-color:#fafafa;stop-opacity:1"
|
||||
offset="0.25718147" />
|
||||
<stop
|
||||
id="stop51"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="0.30111277" />
|
||||
<stop
|
||||
id="stop53"
|
||||
style="stop-color:#fafafa;stop-opacity:1"
|
||||
offset="0.53130001" />
|
||||
<stop
|
||||
id="stop55"
|
||||
style="stop-color:#ebecec;stop-opacity:1"
|
||||
offset="0.84490001" />
|
||||
<stop
|
||||
id="stop57"
|
||||
style="stop-color:#e1e2e3;stop-opacity:1"
|
||||
offset="1" />
|
||||
</radialGradient>
|
||||
<filter
|
||||
x="-0.19200002"
|
||||
y="-0.19199999"
|
||||
width="1.3839999"
|
||||
height="1.3839999"
|
||||
color-interpolation-filters="sRGB"
|
||||
id="filter6697">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur6699"
|
||||
stdDeviation="1.9447689" />
|
||||
</filter>
|
||||
<clipPath
|
||||
id="clipPath7084">
|
||||
<path
|
||||
d="m 72,88 -32,32 -8,0 0,-40 40,0 0,8 z"
|
||||
id="path7086"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none" />
|
||||
</clipPath>
|
||||
<radialGradient
|
||||
cx="102"
|
||||
cy="112.3047"
|
||||
r="139.55859"
|
||||
id="radialGradient9437"
|
||||
xlink:href="#XMLID_8_"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1,0,0,0.9996653,2e-6,0.00301608)" />
|
||||
<linearGradient
|
||||
x1="98.617439"
|
||||
y1="106.41443"
|
||||
x2="91.228737"
|
||||
y2="99.254974"
|
||||
id="linearGradient10213"
|
||||
xlink:href="#linearGradient10207"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<filter
|
||||
color-interpolation-filters="sRGB"
|
||||
id="filter2770">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur2772"
|
||||
stdDeviation="2.0786429" />
|
||||
</filter>
|
||||
</defs>
|
||||
<metadata
|
||||
id="metadata2611">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title />
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1">
|
||||
<path
|
||||
d="m 16,8 0,112 c 0,0 63.15625,0 63.15625,0 l 0.03125,0 c 3e-6,0 11.90625,-9.90625 17.40625,-15.40625 C 102.09375,99.09375 112,87.1875 112,87.1875 L 112,87.15625 112,8 16,8 z"
|
||||
transform="matrix(1.0416667,0,0,1.0267857,-2.6666667,-1.2142891)"
|
||||
id="path7865"
|
||||
style="opacity:0.5;fill:#000000;fill-opacity:1;filter:url(#filter2770)" />
|
||||
<path
|
||||
d="M 16.000001,8 16,120 c 0,0 63.146418,0 63.146418,0 L 112,87.14642 112,8 16.000001,8 z"
|
||||
id="path34"
|
||||
style="fill:#ffffff;fill-opacity:1" />
|
||||
<path
|
||||
d="m 18.000002,9.0000034 c -0.551,0 -1,0.44885 -1,0.999665 l 0,107.9638516 c 0,0.55181 0.449,0.99966 1,0.99966 l 59.171997,0 c 0.263,0 2.76268,0.11813 2.948681,-0.0688 L 110.707,88.094202 C 110.894,87.907264 111,85.40942 111,85.146508 l 0,-75.1468396 c 0,-0.550815 -0.448,-0.999665 -1,-0.999665 l -91.999998,0 z"
|
||||
id="path59"
|
||||
style="fill:url(#radialGradient9437);fill-opacity:1" />
|
||||
<path
|
||||
d="m 41.879531,115.98249 c 0,0 24.309609,-24.309614 24.309609,-24.309614 0,0 -9.35314,2.913124 -19.60314,2.913124 0,10.25 -4.706469,21.39649 -4.706469,21.39649 z"
|
||||
transform="translate(40,0)"
|
||||
clip-path="url(#clipPath7084)"
|
||||
id="path5540"
|
||||
style="opacity:0.4;fill:#000000;fill-opacity:1;filter:url(#filter6697)" />
|
||||
<path
|
||||
d="m 79.172,120 c 0,0 11.914,-9.914 17.414,-15.414 5.5,-5.5 15.414,-17.414 15.414,-17.414 0,0 -13.75,8.828 -24,8.828 0,10.25 -8.828,24 -8.828,24 z"
|
||||
id="path14523"
|
||||
style="fill:url(#linearGradient10213);fill-opacity:1" />
|
||||
<text
|
||||
x="64.392578"
|
||||
y="32.103516"
|
||||
id="text3772"
|
||||
xml:space="preserve"
|
||||
style="font-size:28px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:DejaVu Sans;-inkscape-font-specification:DejaVu Sans"
|
||||
sodipodi:linespacing="125%"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan3026"
|
||||
x="64.392578"
|
||||
y="32.103516">KF8</tspan></text>
|
||||
<path
|
||||
d="m 68.227,60.477999 c 0,2.157 0.052,3.954 -1.035,5.874 -0.88,1.561 -2.279,2.517 -3.833,2.517 -2.121,0 -3.366,-1.62 -3.366,-4.015 C 59.993,60.14 64.225,59.283 68.226,59.283 v 1.194999 z m 5.579,13.496 c -0.365,0.332 -0.896,0.352 -1.307,0.132 -1.838,-1.528 -2.167,-2.231 -3.174,-3.69 -3.035,3.094 -5.188,4.023 -9.123,4.023 -4.663,0 -8.284,-2.876 -8.284,-8.629 0,-4.49 2.433,-7.543999 5.899,-9.044999 3.005,-1.317 7.202,-1.556 10.41,-1.914 v -0.723 c 0,-1.313 0.104,-2.875 -0.671,-4.012 -0.674,-1.021 -1.968,-1.437 -3.106,-1.437 -2.111,0 -3.99,1.078 -4.45,3.321 -0.097,0.498 -0.46,0.991 -0.962,1.017 l -5.364,-0.581 c -0.456,-0.102 -0.958,-0.463 -0.828,-1.155 1.233,-6.511 7.109,-8.475 12.378,-8.475 2.693,0 6.215,0.719 8.335,2.757 2.692997,2.515 2.431997,5.869 2.431997,9.524 v 8.622999 c 0,2.596 1.081,3.732 2.091,5.128 0.354,0.503 0.434,1.103 -0.018,1.473 -1.131,0.949 -3.138997,2.693 -4.243997,3.676 l -0.014,-0.013 z"
|
||||
id="path4047"
|
||||
style="fill-rule:evenodd" />
|
||||
<path
|
||||
d="m 99.325111,79.885297 c -8.716894,6.432161 -21.357191,9.853809 -32.243032,9.853809 -15.251419,0 -28.989193,-5.636741 -39.38419,-15.021522 -0.815557,-0.738364 -0.08726,-1.746902 0.89191,-1.173831 C 39.807066,80.071565 53.675732,84 68,84 c 9.664184,0 20.284886,-2.004491 30.058986,-6.151079 1.474215,-0.623415 2.709284,0.97246 1.266125,2.036376 z"
|
||||
id="path3858"
|
||||
style="fill:#ff9201;fill-rule:evenodd" />
|
||||
<path
|
||||
d="m 104,76 c -1.11342,-1.426386 -7.371903,-0.676274 -10.179364,-0.337298 -0.853315,0.09817 -0.984206,-0.641874 -0.217315,-1.184739 4.990671,-3.505554 13.168059,-2.491141 14.119539,-1.318987 0.95736,1.187256 -0.25087,9.384779 -4.92858,13.293915 -0.71907,0.604117 -1.40373,0.286117 -1.08573,-0.510142 C 102.75988,83.311486 105.11761,77.427225 104,76 z"
|
||||
id="path3860"
|
||||
style="fill:#ff9201;fill-rule:evenodd" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 9.1 KiB |
@ -8,10 +8,37 @@
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
version="1.0"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg2606">
|
||||
id="svg2606"
|
||||
inkscape:version="0.48.3.1 r9886"
|
||||
sodipodi:docname="mobi.svg"
|
||||
inkscape:export-filename="/home/niluje/Patchland/calibre/imgsrc/mimetypes/mobi.png"
|
||||
inkscape:export-xdpi="90"
|
||||
inkscape:export-ydpi="90">
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1025"
|
||||
id="namedview45"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.84375"
|
||||
inkscape:cx="64"
|
||||
inkscape:cy="64"
|
||||
inkscape:window-x="-2"
|
||||
inkscape:window-y="-3"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg2606" />
|
||||
<defs
|
||||
id="defs2608">
|
||||
<linearGradient
|
||||
@ -184,15 +211,16 @@
|
||||
id="path14523"
|
||||
style="fill:url(#linearGradient10213);fill-opacity:1" />
|
||||
<text
|
||||
x="32"
|
||||
y="32"
|
||||
x="64.902344"
|
||||
y="32.103516"
|
||||
id="text3772"
|
||||
xml:space="preserve"
|
||||
style="font-size:28px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#000000;fill-opacity:1;stroke:none;font-family:FreeSans;-inkscape-font-specification:FreeSans"><tspan
|
||||
x="32"
|
||||
y="32"
|
||||
style="font-size:28px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:DejaVu Sans;-inkscape-font-specification:DejaVu Sans"
|
||||
sodipodi:linespacing="125%"><tspan
|
||||
x="64.902344"
|
||||
y="32.103516"
|
||||
id="tspan3774"
|
||||
style="font-size:28px;fill:#000000;fill-opacity:1">mobi</tspan></text>
|
||||
style="font-size:28px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;font-family:DejaVu Sans;-inkscape-font-specification:DejaVu Sans">mobi</tspan></text>
|
||||
<path
|
||||
d="m 68.227,60.477999 c 0,2.157 0.052,3.954 -1.035,5.874 -0.88,1.561 -2.279,2.517 -3.833,2.517 -2.121,0 -3.366,-1.62 -3.366,-4.015 C 59.993,60.14 64.225,59.283 68.226,59.283 v 1.194999 z m 5.579,13.496 c -0.365,0.332 -0.896,0.352 -1.307,0.132 -1.838,-1.528 -2.167,-2.231 -3.174,-3.69 -3.035,3.094 -5.188,4.023 -9.123,4.023 -4.663,0 -8.284,-2.876 -8.284,-8.629 0,-4.49 2.433,-7.543999 5.899,-9.044999 3.005,-1.317 7.202,-1.556 10.41,-1.914 v -0.723 c 0,-1.313 0.104,-2.875 -0.671,-4.012 -0.674,-1.021 -1.968,-1.437 -3.106,-1.437 -2.111,0 -3.99,1.078 -4.45,3.321 -0.097,0.498 -0.46,0.991 -0.962,1.017 l -5.364,-0.581 c -0.456,-0.102 -0.958,-0.463 -0.828,-1.155 1.233,-6.511 7.109,-8.475 12.378,-8.475 2.693,0 6.215,0.719 8.335,2.757 2.692997,2.515 2.431997,5.869 2.431997,9.524 v 8.622999 c 0,2.596 1.081,3.732 2.091,5.128 0.354,0.503 0.434,1.103 -0.018,1.473 -1.131,0.949 -3.138997,2.693 -4.243997,3.676 l -0.014,-0.013 z"
|
||||
id="path4047"
|
||||
|
Before Width: | Height: | Size: 8.1 KiB After Width: | Height: | Size: 9.3 KiB |
237
imgsrc/mimetypes/tpz.svg
Normal file
@ -0,0 +1,237 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
version="1.0"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg2606"
|
||||
inkscape:version="0.48.3.1 r9886"
|
||||
sodipodi:docname="tpz.svg"
|
||||
inkscape:export-filename="/home/niluje/Patchland/calibre/imgsrc/mimetypes/tpz.png"
|
||||
inkscape:export-xdpi="90"
|
||||
inkscape:export-ydpi="90">
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1025"
|
||||
id="namedview45"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.84375"
|
||||
inkscape:cx="-11.118644"
|
||||
inkscape:cy="42.305085"
|
||||
inkscape:window-x="-2"
|
||||
inkscape:window-y="-3"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg2606" />
|
||||
<defs
|
||||
id="defs2608">
|
||||
<linearGradient
|
||||
id="linearGradient10207">
|
||||
<stop
|
||||
id="stop10209"
|
||||
style="stop-color:#a2a2a2;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop10211"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
x1="96"
|
||||
y1="104"
|
||||
x2="88.000198"
|
||||
y2="96.000198"
|
||||
id="XMLID_12_"
|
||||
gradientUnits="userSpaceOnUse">
|
||||
<stop
|
||||
id="stop83"
|
||||
style="stop-color:#888a85;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop85"
|
||||
style="stop-color:#8c8e89;stop-opacity:1"
|
||||
offset="0.0072" />
|
||||
<stop
|
||||
id="stop87"
|
||||
style="stop-color:#abaca9;stop-opacity:1"
|
||||
offset="0.0673" />
|
||||
<stop
|
||||
id="stop89"
|
||||
style="stop-color:#c5c6c4;stop-opacity:1"
|
||||
offset="0.1347" />
|
||||
<stop
|
||||
id="stop91"
|
||||
style="stop-color:#dbdbda;stop-opacity:1"
|
||||
offset="0.2652576" />
|
||||
<stop
|
||||
id="stop93"
|
||||
style="stop-color:#ebebeb;stop-opacity:1"
|
||||
offset="0.37646064" />
|
||||
<stop
|
||||
id="stop95"
|
||||
style="stop-color:#f7f7f6;stop-opacity:1"
|
||||
offset="0.48740286" />
|
||||
<stop
|
||||
id="stop97"
|
||||
style="stop-color:#fdfdfd;stop-opacity:1"
|
||||
offset="0.6324091" />
|
||||
<stop
|
||||
id="stop99"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="1" />
|
||||
</linearGradient>
|
||||
<radialGradient
|
||||
cx="102"
|
||||
cy="112.3047"
|
||||
r="139.55859"
|
||||
id="XMLID_8_"
|
||||
gradientUnits="userSpaceOnUse">
|
||||
<stop
|
||||
id="stop41"
|
||||
style="stop-color:#b7b8b9;stop-opacity:1"
|
||||
offset="0" />
|
||||
<stop
|
||||
id="stop47"
|
||||
style="stop-color:#ececec;stop-opacity:1"
|
||||
offset="0.18851049" />
|
||||
<stop
|
||||
id="stop49"
|
||||
style="stop-color:#fafafa;stop-opacity:1"
|
||||
offset="0.25718147" />
|
||||
<stop
|
||||
id="stop51"
|
||||
style="stop-color:#ffffff;stop-opacity:1"
|
||||
offset="0.30111277" />
|
||||
<stop
|
||||
id="stop53"
|
||||
style="stop-color:#fafafa;stop-opacity:1"
|
||||
offset="0.53130001" />
|
||||
<stop
|
||||
id="stop55"
|
||||
style="stop-color:#ebecec;stop-opacity:1"
|
||||
offset="0.84490001" />
|
||||
<stop
|
||||
id="stop57"
|
||||
style="stop-color:#e1e2e3;stop-opacity:1"
|
||||
offset="1" />
|
||||
</radialGradient>
|
||||
<filter
|
||||
x="-0.19200002"
|
||||
y="-0.19199999"
|
||||
width="1.3839999"
|
||||
height="1.3839999"
|
||||
color-interpolation-filters="sRGB"
|
||||
id="filter6697">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur6699"
|
||||
stdDeviation="1.9447689" />
|
||||
</filter>
|
||||
<clipPath
|
||||
id="clipPath7084">
|
||||
<path
|
||||
d="m 72,88 -32,32 -8,0 0,-40 40,0 0,8 z"
|
||||
id="path7086"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none" />
|
||||
</clipPath>
|
||||
<radialGradient
|
||||
cx="102"
|
||||
cy="112.3047"
|
||||
r="139.55859"
|
||||
id="radialGradient9437"
|
||||
xlink:href="#XMLID_8_"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(1,0,0,0.9996653,2e-6,0.00301608)" />
|
||||
<linearGradient
|
||||
x1="98.617439"
|
||||
y1="106.41443"
|
||||
x2="91.228737"
|
||||
y2="99.254974"
|
||||
id="linearGradient10213"
|
||||
xlink:href="#linearGradient10207"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<filter
|
||||
color-interpolation-filters="sRGB"
|
||||
id="filter2770">
|
||||
<feGaussianBlur
|
||||
id="feGaussianBlur2772"
|
||||
stdDeviation="2.0786429" />
|
||||
</filter>
|
||||
</defs>
|
||||
<metadata
|
||||
id="metadata2611">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1">
|
||||
<path
|
||||
d="m 16,8 0,112 c 0,0 63.15625,0 63.15625,0 l 0.03125,0 c 3e-6,0 11.90625,-9.90625 17.40625,-15.40625 C 102.09375,99.09375 112,87.1875 112,87.1875 L 112,87.15625 112,8 16,8 z"
|
||||
transform="matrix(1.0416667,0,0,1.0267857,-2.6666667,-1.2142891)"
|
||||
id="path7865"
|
||||
style="opacity:0.5;fill:#000000;fill-opacity:1;filter:url(#filter2770)" />
|
||||
<path
|
||||
d="M 16.000001,8 16,120 c 0,0 63.146418,0 63.146418,0 L 112,87.14642 112,8 16.000001,8 z"
|
||||
id="path34"
|
||||
style="fill:#ffffff;fill-opacity:1" />
|
||||
<path
|
||||
d="m 18.000002,9.0000034 c -0.551,0 -1,0.44885 -1,0.999665 l 0,107.9638516 c 0,0.55181 0.449,0.99966 1,0.99966 l 59.171997,0 c 0.263,0 2.76268,0.11813 2.948681,-0.0688 L 110.707,88.094202 C 110.894,87.907264 111,85.40942 111,85.146508 l 0,-75.1468396 c 0,-0.550815 -0.448,-0.999665 -1,-0.999665 l -91.999998,0 z"
|
||||
id="path59"
|
||||
style="fill:url(#radialGradient9437);fill-opacity:1" />
|
||||
<path
|
||||
d="m 41.879531,115.98249 c 0,0 24.309609,-24.309614 24.309609,-24.309614 0,0 -9.35314,2.913124 -19.60314,2.913124 0,10.25 -4.706469,21.39649 -4.706469,21.39649 z"
|
||||
transform="translate(40,0)"
|
||||
clip-path="url(#clipPath7084)"
|
||||
id="path5540"
|
||||
style="opacity:0.4;fill:#000000;fill-opacity:1;filter:url(#filter6697)" />
|
||||
<path
|
||||
d="m 79.172,120 c 0,0 11.914,-9.914 17.414,-15.414 5.5,-5.5 15.414,-17.414 15.414,-17.414 0,0 -13.75,8.828 -24,8.828 0,10.25 -8.828,24 -8.828,24 z"
|
||||
id="path14523"
|
||||
style="fill:url(#linearGradient10213);fill-opacity:1" />
|
||||
<text
|
||||
x="64.703125"
|
||||
y="32.175781"
|
||||
id="text3772"
|
||||
xml:space="preserve"
|
||||
style="font-size:28px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:DejaVu Sans;-inkscape-font-specification:DejaVu Sans"
|
||||
sodipodi:linespacing="125%"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan3021"
|
||||
x="64.703125"
|
||||
y="32.175781">Topaz</tspan></text>
|
||||
<path
|
||||
d="m 68.227,60.477999 c 0,2.157 0.052,3.954 -1.035,5.874 -0.88,1.561 -2.279,2.517 -3.833,2.517 -2.121,0 -3.366,-1.62 -3.366,-4.015 C 59.993,60.14 64.225,59.283 68.226,59.283 v 1.194999 z m 5.579,13.496 c -0.365,0.332 -0.896,0.352 -1.307,0.132 -1.838,-1.528 -2.167,-2.231 -3.174,-3.69 -3.035,3.094 -5.188,4.023 -9.123,4.023 -4.663,0 -8.284,-2.876 -8.284,-8.629 0,-4.49 2.433,-7.543999 5.899,-9.044999 3.005,-1.317 7.202,-1.556 10.41,-1.914 v -0.723 c 0,-1.313 0.104,-2.875 -0.671,-4.012 -0.674,-1.021 -1.968,-1.437 -3.106,-1.437 -2.111,0 -3.99,1.078 -4.45,3.321 -0.097,0.498 -0.46,0.991 -0.962,1.017 l -5.364,-0.581 c -0.456,-0.102 -0.958,-0.463 -0.828,-1.155 1.233,-6.511 7.109,-8.475 12.378,-8.475 2.693,0 6.215,0.719 8.335,2.757 2.692997,2.515 2.431997,5.869 2.431997,9.524 v 8.622999 c 0,2.596 1.081,3.732 2.091,5.128 0.354,0.503 0.434,1.103 -0.018,1.473 -1.131,0.949 -3.138997,2.693 -4.243997,3.676 l -0.014,-0.013 z"
|
||||
id="path4047"
|
||||
style="fill-rule:evenodd" />
|
||||
<path
|
||||
d="m 99.325111,79.885297 c -8.716894,6.432161 -21.357191,9.853809 -32.243032,9.853809 -15.251419,0 -28.989193,-5.636741 -39.38419,-15.021522 -0.815557,-0.738364 -0.08726,-1.746902 0.89191,-1.173831 C 39.807066,80.071565 53.675732,84 68,84 c 9.664184,0 20.284886,-2.004491 30.058986,-6.151079 1.474215,-0.623415 2.709284,0.97246 1.266125,2.036376 z"
|
||||
id="path3858"
|
||||
style="fill:#ff9201;fill-rule:evenodd" />
|
||||
<path
|
||||
d="m 104,76 c -1.11342,-1.426386 -7.371903,-0.676274 -10.179364,-0.337298 -0.853315,0.09817 -0.984206,-0.641874 -0.217315,-1.184739 4.990671,-3.505554 13.168059,-2.491141 14.119539,-1.318987 0.95736,1.187256 -0.25087,9.384779 -4.92858,13.293915 -0.71907,0.604117 -1.40373,0.286117 -1.08573,-0.510142 C 102.75988,83.311486 105.11761,77.427225 104,76 z"
|
||||
id="path3860"
|
||||
style="fill:#ff9201;fill-rule:evenodd" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 9.1 KiB |
@ -60,7 +60,7 @@ htmlhelp:
|
||||
|
||||
latex:
|
||||
mkdir -p .build/latex .build/doctrees
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) .build/latex
|
||||
$(SPHINXBUILD) -b mylatex $(ALLSPHINXOPTS) .build/latex
|
||||
@echo
|
||||
@echo "Build finished; the LaTeX files are in .build/latex."
|
||||
@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
|
||||
|
140
manual/catalogs.rst
Normal file
@ -0,0 +1,140 @@
|
||||
|
||||
.. include:: global.rst
|
||||
|
||||
.. _catalog_tut:
|
||||
|
||||
Creating AZW3 • EPUB • MOBI Catalogs
|
||||
=====================================
|
||||
|
||||
|app|'s Create catalog feature enables you to create a catalog of your library in a variety of formats. This help file describes cataloging options when generating a catalog in AZW3, EPUB and MOBI formats.
|
||||
|
||||
.. contents::
|
||||
:depth: 1
|
||||
:local:
|
||||
|
||||
Selecting books to catalog
|
||||
-------------------------------
|
||||
|
||||
If you want *all* of your library cataloged, remove any search or filtering criteria in the main window. With a single book selected, all books in your library will be candidates for inclusion in the generated catalog. Individual books may be excluded by various criteria; see the :ref:`excluded_genres` section below for more information.
|
||||
|
||||
If you want only *some* of your library cataloged, you have two options:
|
||||
|
||||
* Create a multiple selection of the books you want cataloged. With more than one book selected in |app|'s main window, only the selected books will be cataloged.
|
||||
* Use the Search field or the Tag Browser to filter the displayed books. Only the displayed books will be cataloged.
|
||||
|
||||
To begin catalog generation, select the menu item :guilabel:`Convert books > Create a catalog of the books in your calibre library`. You may also add a :guilabel:`Create Catalog` button to a toolbar in :guilabel:`Preferences > Interface > Toolbars` for easier access to the Generate catalog dialog.
|
||||
|
||||
.. image:: images/catalog_options.png
|
||||
:alt: Catalog options
|
||||
:align: center
|
||||
|
||||
In :guilabel:`Catalog options`, select **AZW3, EPUB or MOBI** as the Catalog format. In the :guilabel:`Catalog title` field, provide a name that will be used for the generated catalog. If a catalog of the same name and format already exists, it will be replaced with the newly-generated catalog.
|
||||
|
||||
.. image:: images/catalog_send_to_device.png
|
||||
:alt: Catalog send to device
|
||||
:align: center
|
||||
|
||||
Enabling :guilabel:`Send catalog to device automatically` will download the generated catalog to a connected device upon completion.
|
||||
|
||||
Included sections
|
||||
-------------------
|
||||
|
||||
.. image:: images/included_sections.png
|
||||
:alt: Included sections
|
||||
:align: center
|
||||
|
||||
Sections enabled by a checkmark will be included in the generated catalog:
|
||||
|
||||
* :guilabel:`Authors` - all books, sorted by author, presented in a list format. Non-series books are listed before series books.
|
||||
* :guilabel:`Titles` - all books, sorted by title, presented in a list format.
|
||||
* :guilabel:`Series` - all books that are part of a series, sorted by series, presented in a list format.
|
||||
* :guilabel:`Genres` - individual genres presented in a list, sorted by Author and Series.
|
||||
* :guilabel:`Recently` Added - all books, sorted in reverse chronological order. List includes books added in the last 30 days, then a month-by-month listing of added books.
|
||||
* :guilabel:`Descriptions` - detailed description page for each book, including a cover thumbnail and comments. Sorted by author, with non-series books listed before series books.
|
||||
|
||||
Prefixes
|
||||
---------
|
||||
|
||||
.. image:: images/prefix_rules.png
|
||||
:alt: Prefix rules
|
||||
:align: center
|
||||
|
||||
Prefix rules allow you to add a prefix to book listings when certain criteria are met. For example, you might want to mark books you've read with a checkmark, or books on your wishlist with an X.
|
||||
|
||||
The checkbox in the first column enables the rule. :guilabel:`Name` is a rule name that you provide. :guilabel:`Field` is either :guilabel:`Tags` or a custom column from your library. :guilabel:`Value` is the content of :guilabel:`Field` to match. When a prefix rule is satisfied, the book will be marked with the selected :guilabel:`Prefix`.
|
||||
|
||||
Three prefix rules have been specified in the example above:
|
||||
|
||||
1. :guilabel:`Read book` specifies that a book with any date in a custom column named :guilabel:`Last read` will be prefixed with a checkmark symbol.
|
||||
2. :guilabel:`Wishlist` item specifies that any book with a :guilabel:`Wishlist` tag will be prefixed with an X symbol.
|
||||
3. :guilabel:`Library` books specifies that any book with a value of True (or Yes) in a custom column :guilabel:`Available in Library` will be prefixed with a double arrow symbol.
|
||||
|
||||
The first matching rule supplies the prefix. Disabled or incomplete rules are ignored.
|
||||
|
||||
Excluded books
|
||||
-----------------
|
||||
|
||||
.. image:: images/excluded_books.png
|
||||
:alt: Excluded books
|
||||
:align: center
|
||||
|
||||
Exclusion rules allow you to specify books that will not be cataloged.
|
||||
|
||||
The checkbox in the first column enables the rule. :guilabel:`Name` is a rule name that you provide. :guilabel:`Field` is either :guilabel:`Tags` or a custom column in your library. :guilabel:`Value` is the content of :guilabel:`Field` to match. When an exclusion rule is satisfied, the book will be excluded from the generated catalog.
|
||||
|
||||
Two exclusion rules have been specified in the example above:
|
||||
|
||||
1. The :guilabel:`Catalogs` rule specifies that any book with a :guilabel:`Catalog` tag will be excluded from the generated catalog.
|
||||
2. The :guilabel:`Archived` Books rule specifies that any book with a value of :guilabel:`Archived` in the custom column :guilabel:`Status` will be excluded from the generated catalog.
|
||||
|
||||
All rules are evaluated for every book. Disabled or incomplete rules are ignored.
|
||||
|
||||
.. _excluded_genres:
|
||||
|
||||
Excluded genres
|
||||
---------------
|
||||
|
||||
.. image:: images/excluded_genres.png
|
||||
:alt: Excluded genres
|
||||
:align: center
|
||||
|
||||
When the catalog is generated, tags in your database are used as genres. For example, you may use the tags ``Fiction`` and ``Nonfiction``. These tags become genres in the generated catalog, with books listed under their respective genre lists based on their assigned tags. A book will be listed in every genre section for which it has a corresponding tag.
|
||||
|
||||
You may be using certain tags for other purposes, perhaps a + to indicate a read book, or a bracketed tag like ``[Amazon Freebie]`` to indicate a book's source. The :guilabel:`Excluded genres` regex allows you to specify tags that you don't want used as genres in the generated catalog. The default exclusion regex pattern ``\[.+\]\+`` excludes any tags of the form ``[tag]``, as well as excluding ``+``, the default tag for read books, from being used as genres in the generated catalog.
|
||||
|
||||
You can also use an exact tag name in a regex. For example, ``[Amazon Freebie]`` or ``[Project Gutenberg]``. If you want to list multiple exact tags for exclusion, put a pipe (vertical bar) character between them: ``[Amazon Freebie]|[Project Gutenberg]``.
|
||||
|
||||
:guilabel:`Results of regex` shows you which tags will be excluded when the catalog is built, based on the tags in your database and the regex pattern you enter. The results are updated as you modify the regex pattern.
|
||||
|
||||
Other options
|
||||
--------------
|
||||
|
||||
.. image:: images/other_options.png
|
||||
:alt: Other options
|
||||
:align: center
|
||||
|
||||
:guilabel:`Catalog cover` specifies whether to generate a new cover or use an existing cover. It is possible to create a custom cover for your catalogs - see :ref:`Custom catalog covers` for more information. If you have created a custom cover that you want to reuse, select :guilabel:`Use existing cover`. Otherwise, select :guilabel:`Generate new cover`.
|
||||
|
||||
:guilabel:`Extra Description note` specifies a custom column's contents to be inserted into the Description page, next to the cover thumbnail. For example, you might want to display the date you last read a book using a :guilabel:`Last Read` custom column. For advanced use of the Description note feature, see `this post in the calibre forum <http://www.mobileread.com/forums/showpost.php?p=1335767&postcount=395>`_.
|
||||
|
||||
:guilabel:`Thumb width` specifies a width preference for cover thumbnails included with Descriptions pages. Thumbnails are cached to improve performance.To experiment with different widths, try generating a catalog with just a few books until you've determined your preferred width, then generate your full catalog. The first time a catalog is generated with a new thumbnail width, performance will be slower, but subsequent builds of the catalog will take advantage of the thumbnail cache.
|
||||
|
||||
:guilabel:`Merge with Comments` specifies a custom column whose content will be non-destructively merged with the Comments metadata during catalog generation. For example, you might have a custom column :guilabel:`Author Bio` that you'd like to append to the Comments metadata. You can choose to insert the custom column contents *before or after* the Comments section, and optionally separate the appended content with a horizontal rule separator. Eligible custom column types include ``text, comments, and composite``.
|
||||
|
||||
.. _Custom catalog covers:
|
||||
|
||||
Custom catalog covers
|
||||
-----------------------
|
||||
|
||||
.. |cc| image:: images/custom_cover.png
|
||||
|
||||
|cc| With the `Generate Cover plugin <http://www.mobileread.com/forums/showthread.php?t=124219>`_ installed, you can create custom covers for your catalog.
|
||||
To install the plugin, go to :guilabel:`Preferences > Advanced > Plugins > Get new plugins`.
|
||||
|
||||
Additional help resources
|
||||
---------------------------
|
||||
|
||||
For more information on |app|'s Catalog feature, see the MobileRead forum sticky `Creating Catalogs - Start here <http://www.mobileread.com/forums/showthread.php?t=118556>`_, where you can find information on how to customize the catalog templates, and how to submit a bug report.
|
||||
|
||||
To ask questions or discuss calibre's Catalog feature with other users, visit the MobileRead forum `Calibre Catalogs <http://www.mobileread.com/forums/forumdisplay.php?f=238>`_.
|
||||
|
@ -14,10 +14,10 @@
|
||||
import sys, os
|
||||
|
||||
# If your extensions are in another directory, add it here.
|
||||
sys.path.append(os.path.abspath('../src'))
|
||||
sys.path.append(os.path.abspath('.'))
|
||||
__appname__ = os.environ.get('__appname__', 'calibre')
|
||||
__version__ = os.environ.get('__version__', '0.0.0')
|
||||
import init_calibre
|
||||
init_calibre
|
||||
from calibre.constants import __appname__, __version__
|
||||
import custom
|
||||
custom
|
||||
# General configuration
|
||||
@ -154,7 +154,8 @@ latex_font_size = '10pt'
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title, author, document class [howto/manual]).
|
||||
#latex_documents = []
|
||||
latex_documents = [('index', 'calibre.tex', 'calibre User Manual',
|
||||
'Kovid Goyal', 'manual', False)]
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#latex_preamble = ''
|
||||
@ -164,3 +165,11 @@ latex_font_size = '10pt'
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_use_modindex = True
|
||||
|
||||
latex_logo = 'resources/logo.png'
|
||||
latex_show_pagerefs = True
|
||||
latex_show_urls = 'footnote'
|
||||
latex_elements = {
|
||||
'papersize':'letterpaper',
|
||||
'fontenc':r'\usepackage[T2A,T1]{fontenc}'
|
||||
}
|
||||
|
@ -710,3 +710,35 @@ EPUB from the ZIP file are::
|
||||
|
||||
Note that because this file explores the potential of EPUB, most of the advanced formatting is not going to work on readers less capable than |app|'s built-in EPUB viewer.
|
||||
|
||||
|
||||
Convert ODT documents
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|app| can directly convert ODT (OpenDocument Text) files. You should use styles to format your document and minimize the use of direct formatting.
|
||||
When inserting images into your document you need to anchor them to the paragraph, images anchored to a page will all end up in the front of the conversion.
|
||||
|
||||
To enable automatic detection of chapters, you need to mark them with the build-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6' ('Heading 1' equates to the HTML tag <h1>, 'Heading 2' to <h2> etc). When you convert in |app| you can enter which style you used into the 'Detect chapters at' box. Example:
|
||||
|
||||
* If you mark Chapters with style 'Heading 2', you have to set the 'Detect chapters at' box to ``//h:h2``
|
||||
* For a nested TOC with Sections marked with 'Heading 2' and the Chapters marked with 'Heading 3' you need to enter ``//h:h2|//h:h3``. On the Convert - TOC page set the 'Level 1 TOC' box to ``//h:h2`` and the 'Level 2 TOC' box to ``//h:h3``.
|
||||
|
||||
Well-known document properties (Title, Keywords, Description, Creator) are recognized and |app| will use the first image (not to small, and with good aspect-ratio) as the cover image.
|
||||
|
||||
There is also an advanced property conversion mode, which is activated by setting the custom property ``opf.metadata`` ('Yes or No' type) to Yes in your ODT document (File->Properties->Custom Properties).
|
||||
If this property is detected by |app|, the following custom properties are recognized (``opf.authors`` overrides document creator)::
|
||||
|
||||
opf.titlesort
|
||||
opf.authors
|
||||
opf.authorsort
|
||||
opf.publisher
|
||||
opf.pubdate
|
||||
opf.isbn
|
||||
opf.language
|
||||
opf.series
|
||||
opf.seriesindex
|
||||
|
||||
In addition to this, you can specify the picture to use as the cover by naming it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no picture with this name is found, the 'smart' method is used.
|
||||
As the cover detection might result in double covers in certain output formats, the process will remove the paragraph (only if the only content is the cover!) from the document. But this works only with the named picture!
|
||||
|
||||
To disable cover detection you can set the custom property ``opf.nocover`` ('Yes or No' type) to Yes in advanced mode.
|
||||
|
||||
|
@ -15,7 +15,7 @@ Here, we will teach you how to create your own plugins to add new features to |a
|
||||
:depth: 2
|
||||
:local:
|
||||
|
||||
.. note:: This only applies to calibre releases >= 0.7.53
|
||||
.. note:: This only applies to calibre releases >= 0.8.60
|
||||
|
||||
Anatomy of a |app| plugin
|
||||
---------------------------
|
||||
@ -32,11 +32,15 @@ and enter the following Python code into it:
|
||||
.. literalinclude:: plugin_examples/helloworld/__init__.py
|
||||
:lines: 10-
|
||||
|
||||
That's all. To add this code to |app| as a plugin, simply create a zip file with::
|
||||
That's all. To add this code to |app| as a plugin, simply run the following in
|
||||
the directory in which you created :file:`__init__.py`::
|
||||
|
||||
zip plugin.zip __init__.py
|
||||
calibre-customize -b .
|
||||
|
||||
Add this plugin to |app| via :guilabel:`Preferences->Plugins`.
|
||||
.. note::
|
||||
On OS X you have to first install the |app| command line tools, by
|
||||
going to :guilabel:`Preferences->Miscellaneous` and clicking the
|
||||
:guilabel:`Install command line tools` button.
|
||||
|
||||
You can download the Hello World plugin from
|
||||
`helloworld_plugin.zip <http://calibre-ebook.com/downloads/helloworld_plugin.zip>`_.
|
||||
@ -178,6 +182,10 @@ The plugin API
|
||||
As you may have noticed above, a plugin in |app| is a class. There are different classes for the different types of plugins in |app|.
|
||||
Details on each class, including the base class of all plugins can be found in :ref:`plugins`.
|
||||
|
||||
Your plugin is almost certainly going to use code from |app|. To learn
|
||||
how to find various bits of functionality in the
|
||||
|app| code base, read the section on the |app| :ref:`code_layout`.
|
||||
|
||||
Debugging plugins
|
||||
-------------------
|
||||
|
||||
@ -191,14 +199,12 @@ When running from the command line, debug output will be printed to the console,
|
||||
|
||||
You can insert print statements anywhere in your plugin code, they will be output in debug mode. Remember, this is python, you really shouldn't need anything more than print statements to debug ;) I developed all of |app| using just this debugging technique.
|
||||
|
||||
It can get tiresome to keep re-adding a plugin to calibre to test small changes. The plugin zip files are stored in the calibre config directory in plugins/ (goto Preferences->Misc and click open config directory to see the config directory).
|
||||
You can quickly test changes to your plugin by using the following command
|
||||
line::
|
||||
|
||||
Once you've located the zip file of your plugin you can then directly update it with your changes instead of re-adding it each time. To do so from the command line, in the directory that contains your plugin source code, use::
|
||||
calibre-debug -s; calibre-customize -b /path/to/your/plugin/directory; calibre
|
||||
|
||||
calibre -s; sleep 4s; zip -R /path/to/plugin/zip/file.zip *; calibre
|
||||
|
||||
This will shutdown a running calibre. Wait for the shutdown to complete, then update your plugin files and relaunch calibre.
|
||||
It relies on the freely available zip command line tool.
|
||||
This will shutdown a running calibre, wait for the shutdown to complete, then update your plugin in |app| and relaunch |app|.
|
||||
|
||||
More plugin examples
|
||||
----------------------
|
||||
|
@ -14,6 +14,7 @@ from sphinx.util.console import bold
|
||||
sys.path.append(os.path.abspath('../../../'))
|
||||
from calibre.linux import entry_points
|
||||
from epub import EPUBHelpBuilder
|
||||
from latex import LaTeXHelpBuilder
|
||||
|
||||
def substitute(app, doctree):
|
||||
pass
|
||||
@ -251,6 +252,7 @@ def template_docs(app):
|
||||
def setup(app):
|
||||
app.add_config_value('kovid_epub_cover', None, False)
|
||||
app.add_builder(EPUBHelpBuilder)
|
||||
app.add_builder(LaTeXHelpBuilder)
|
||||
app.connect('doctree-read', substitute)
|
||||
app.connect('builder-inited', generate_docs)
|
||||
app.connect('build-finished', finished)
|
||||
|
@ -30,6 +30,7 @@ Environment variables
|
||||
* ``CALIBRE_OVERRIDE_DATABASE_PATH`` - allows you to specify the full path to metadata.db. Using this variable you can have metadata.db be in a location other than the library folder. Useful if your library folder is on a networked drive that does not support file locking.
|
||||
* ``CALIBRE_DEVELOP_FROM`` - Used to run from a calibre development environment. See :ref:`develop`.
|
||||
* ``CALIBRE_OVERRIDE_LANG`` - Used to force the language used by the interface (ISO 639 language code)
|
||||
* ``CALIBRE_NO_NATIVE_FILEDIALOGS`` - Causes calibre to not use native file dialogs for selecting files/directories.
|
||||
* ``SYSFS_PATH`` - Use if sysfs is mounted somewhere other than /sys
|
||||
* ``http_proxy`` - Used on linux to specify an HTTP proxy
|
||||
|
||||
|
81
manual/develop.rst
Executable file → Normal file
@ -6,9 +6,9 @@ Setting up a |app| development environment
|
||||
===========================================
|
||||
|
||||
|app| is completely open source, licensed under the `GNU GPL v3 <http://www.gnu.org/copyleft/gpl.html>`_.
|
||||
This means that you are free to download and modify the program to your heart's content. In this section,
|
||||
you will learn how to get a |app| development environment set up on the operating system of your choice.
|
||||
|app| is written primarily in `Python <http://www.python.org>`_ with some C/C++ code for speed and system interfacing.
|
||||
This means that you are free to download and modify the program to your heart's content. In this section,
|
||||
you will learn how to get a |app| development environment set up on the operating system of your choice.
|
||||
|app| is written primarily in `Python <http://www.python.org>`_ with some C/C++ code for speed and system interfacing.
|
||||
Note that |app| is not compatible with Python 3 and requires at least Python 2.7.
|
||||
|
||||
.. contents:: Contents
|
||||
@ -20,16 +20,18 @@ Design philosophy
|
||||
|
||||
|app| has its roots in the Unix world, which means that its design is highly modular.
|
||||
The modules interact with each other via well defined interfaces. This makes adding new features and fixing
|
||||
bugs in |app| very easy, resulting in a frenetic pace of development. Because of its roots, |app| has a
|
||||
bugs in |app| very easy, resulting in a frenetic pace of development. Because of its roots, |app| has a
|
||||
comprehensive command line interface for all its functions, documented in :ref:`cli`.
|
||||
|
||||
The modular design of |app| is expressed via ``Plugins``. There is a :ref:`tutorial <customize>` on writing |app| plugins.
|
||||
For example, adding support for a new device to |app| typically involves writing less than a 100 lines of code in the form of
|
||||
a device driver plugin. You can browse the
|
||||
`built-in drivers <http://bazaar.launchpad.net/%7Ekovid/calibre/trunk/files/head%3A/src/calibre/devices/>`_. Similarly, adding support
|
||||
for new conversion formats involves writing input/output format plugins. Another example of the modular design is the :ref:`recipe system <news>` for
|
||||
a device driver plugin. You can browse the
|
||||
`built-in drivers <http://bazaar.launchpad.net/%7Ekovid/calibre/trunk/files/head%3A/src/calibre/devices/>`_. Similarly, adding support
|
||||
for new conversion formats involves writing input/output format plugins. Another example of the modular design is the :ref:`recipe system <news>` for
|
||||
fetching news. For more examples of plugins designed to add features to |app|, see the `plugin index <http://www.mobileread.com/forums/showthread.php?p=1362767#post1362767>`_.
|
||||
|
||||
.. _code_layout:
|
||||
|
||||
Code layout
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
@ -70,13 +72,21 @@ After installing Bazaar, you can get the |app| source code with the command::
|
||||
|
||||
bzr branch lp:calibre
|
||||
|
||||
On Windows you will need the complete path name, that will be something like :file:`C:\\Program Files\\Bazaar\\bzr.exe`. To update a branch
|
||||
to the latest code, use the command::
|
||||
On Windows you will need the complete path name, that will be something like :file:`C:\\Program Files\\Bazaar\\bzr.exe`.
|
||||
|
||||
To update a branch to the latest code, use the command::
|
||||
|
||||
bzr merge
|
||||
|
||||
The calibre repository is huge so the branch operation above takes along time (about an hour). If you want to get the code faster, the sourcecode for the latest release is always available as an
|
||||
`archive <http://status.calibre-ebook.com/dist/src>`_.
|
||||
|app| is a very large project with a very long source control history, so the
|
||||
above can take a while (10mins to an hour depending on your internet speed).
|
||||
|
||||
If you want to get the code faster, the sourcecode for the latest release is
|
||||
always available as an `archive <http://status.calibre-ebook.com/dist/src>`_.
|
||||
You can also use bzr to just download the source code, without the history,
|
||||
using::
|
||||
|
||||
bzr branch --stacked lp:calibre
|
||||
|
||||
Submitting your changes to be included
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -91,15 +101,15 @@ this, make your changes, then run::
|
||||
This will create a :file:`my-changes` file in the current directory,
|
||||
simply attach that to a ticket on the |app| `bug tracker <https://bugs.launchpad.net/calibre>`_.
|
||||
|
||||
If you plan to do a lot of development on |app|, then the best method is to create a
|
||||
If you plan to do a lot of development on |app|, then the best method is to create a
|
||||
`Launchpad <http://launchpad.net>`_ account. Once you have an account, you can use it to register
|
||||
your bzr branch created by the `bzr branch` command above. First run the
|
||||
following command to tell bzr about your launchpad account::
|
||||
|
||||
bzr launchpad-login your_launchpad_username
|
||||
|
||||
Now, you have to setup SSH access to Launchpad. First create an SSH public/private keypair. Then upload
|
||||
the public key to Launchpad by going to your Launchpad account page. Instructions for setting up the
|
||||
Now, you have to setup SSH access to Launchpad. First create an SSH public/private keypair. Then upload
|
||||
the public key to Launchpad by going to your Launchpad account page. Instructions for setting up the
|
||||
private key in bzr are at http://bazaar-vcs.org/Bzr_and_SSH. Now you can upload your branch to the |app|
|
||||
project in Launchpad by following the instructions at https://help.launchpad.net/Code/UploadingABranch.
|
||||
Whenever you commit changes to your branch with the command::
|
||||
@ -107,8 +117,8 @@ Whenever you commit changes to your branch with the command::
|
||||
bzr commit -m "Comment describing your change"
|
||||
|
||||
Kovid can merge it directly from your branch into the main |app| source tree. You should also keep an eye on the |app|
|
||||
`development forum <http://www.mobileread.com/forums/forumdisplay.php?f=240>`. Before making major changes, you should
|
||||
discuss them in the forum or contact Kovid directly (his email address is all over the source code).
|
||||
`development forum <http://www.mobileread.com/forums/forumdisplay.php?f=240>`_. Before making major changes, you should
|
||||
discuss them in the forum or contact Kovid directly (his email address is all over the source code).
|
||||
|
||||
Windows development environment
|
||||
---------------------------------
|
||||
@ -118,12 +128,12 @@ the previously checked out |app| code directory. For example::
|
||||
|
||||
cd C:\Users\kovid\work\calibre
|
||||
|
||||
calibre is the directory that contains the src and resources sub-directories.
|
||||
calibre is the directory that contains the src and resources sub-directories.
|
||||
|
||||
The next step is to set the environment variable ``CALIBRE_DEVELOP_FROM`` to the absolute path of the src directory.
|
||||
So, following the example above, it would be ``C:\Users\kovid\work\calibre\src``. `Here is a short
|
||||
guide <http://docs.python.org/using/windows.html#excursus-setting-environment-variables>`_ to setting environment
|
||||
variables on Windows.
|
||||
variables on Windows.
|
||||
|
||||
Once you have set the environment variable, open a new command prompt and check that it was correctly set by using
|
||||
the command::
|
||||
@ -134,7 +144,7 @@ Setting this environment variable means that |app| will now load all its Python
|
||||
|
||||
That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src\\calibre\\__init__.py`
|
||||
in your favorite editor and add the line::
|
||||
|
||||
|
||||
print ("Hello, world!")
|
||||
|
||||
near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``.
|
||||
@ -149,24 +159,25 @@ the previously checked out |app| code directory, for example::
|
||||
|
||||
calibre is the directory that contains the src and resources sub-directories. Ensure you have installed the |app| commandline tools via :guilabel:`Preferences->Advanced->Miscellaneous` in the |app| GUI.
|
||||
|
||||
The next step is to set the environment variable ``CALIBRE_DEVELOP_FROM`` to the absolute path of the src directory.
|
||||
So, following the example above, it would be ``/Users/kovid/work/calibre/src``. Apple
|
||||
`documentation <http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPRuntimeConfig/Articles/EnvironmentVars.html#//apple_ref/doc/uid/20002093-BCIJIJBH>`_
|
||||
on how to set environment variables.
|
||||
The next step is to create a bash script that will set the environment variable ``CALIBRE_DEVELOP_FROM`` to the absolute path of the src directory when running calibre in debug mode.
|
||||
|
||||
Once you have set the environment variable, open a new Terminal and check that it was correctly set by using
|
||||
the command::
|
||||
Create a plain text file::
|
||||
|
||||
echo $CALIBRE_DEVELOP_FROM
|
||||
#!/bin/sh
|
||||
export CALIBRE_DEVELOP_FROM="/Users/kovid/work/calibre/src"
|
||||
calibre-debug -g
|
||||
|
||||
Setting this environment variable means that |app| will now load all its Python code from the specified location.
|
||||
Save this file as ``/usr/bin/calibre-develop``, then set its permissions so that it can be executed::
|
||||
|
||||
That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src/calibre/__init__.py`
|
||||
in your favorite editor and add the line::
|
||||
|
||||
print ("Hello, world!")
|
||||
chmod +x /usr/bin/calibre-develop
|
||||
|
||||
near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``.
|
||||
Once you have done this, run::
|
||||
|
||||
calibre-develop
|
||||
|
||||
You should see some diagnostic information in the Terminal window as calibre
|
||||
starts up, and you should see an asterisk after the version number in the GUI
|
||||
window, indicating that you are running from source.
|
||||
|
||||
Linux development environment
|
||||
------------------------------
|
||||
@ -181,11 +192,11 @@ Install the |app| using the binary installer. Then open a terminal and change to
|
||||
|
||||
cd /home/kovid/work/calibre
|
||||
|
||||
calibre is the directory that contains the src and resources sub-directories.
|
||||
calibre is the directory that contains the src and resources sub-directories.
|
||||
|
||||
The next step is to set the environment variable ``CALIBRE_DEVELOP_FROM`` to the absolute path of the src directory.
|
||||
So, following the example above, it would be ``/home/kovid/work/calibre/src``. How to set environment variables depends on
|
||||
your Linux distribution and what shell you are using.
|
||||
your Linux distribution and what shell you are using.
|
||||
|
||||
Once you have set the environment variable, open a new terminal and check that it was correctly set by using
|
||||
the command::
|
||||
@ -196,7 +207,7 @@ Setting this environment variable means that |app| will now load all its Python
|
||||
|
||||
That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src/calibre/__init__.py`
|
||||
in your favorite editor and add the line::
|
||||
|
||||
|
||||
print ("Hello, world!")
|
||||
|
||||
near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``.
|
||||
|
138
manual/faq.rst
@ -69,8 +69,8 @@ If you have a hand edited TOC in the input document, you can use the TOC detecti
|
||||
|
||||
Finally, I encourage you to ditch the content TOC and only have a metadata TOC in your ebooks. Metadata TOCs will give the people reading your ebooks a much superior navigation experience (except on the Kindle, where they are essentially the same as a content TOC).
|
||||
|
||||
The covers for my MOBI files have stopped showing up in Kindle for PC/Kindle for Android/etc.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The covers for my MOBI files have stopped showing up in Kindle for PC/Kindle for Android/iPad etc.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This is caused by a bug in the Amazon software. You can work around it by going
|
||||
to Preferences->Output Options->MOBI output and setting the "Enable sharing
|
||||
@ -126,9 +126,11 @@ Device Integration
|
||||
|
||||
What devices does |app| support?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
At the moment |app| has full support for the SONY PRS line, Barnes & Noble Nook line, Cybook Gen 3/Opus, Amazon Kindle line, Entourage Edge, Longshine ShineBook, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, PocketBook line, Italica, eClicto, Iriver Story, Airis dBook, Hanvon N515, Binatone Readme, Teclast K3 and clones, SpringDesign Alex, Kobo Reader, various Android phones and the iPhone/iPad. In addition, using the :guilabel:`Connect to folder` function you can use it with any ebook reader that exports itself as a USB disk.
|
||||
|
||||
There is also a special ``User Defined`` device plugin that can be used to connect to arbitrary devices that present their memory as disk drives. See the device plugin ``Preferences -> Plugins -> Device Plugins -> User Defined`` and ``Preferences -> Miscelleaneous -> Get information to setup the user defined device`` for more information.
|
||||
|app| can directly connect to all the major (and most of the minor) ebook reading devices,
|
||||
smarthphones, tablets, etc.
|
||||
In addition, using the :guilabel:`Connect to folder` function you can use it with any ebook reader that exports itself as a USB disk.
|
||||
You can even connect to Apple devices (via iTunes), using the :guilabel:`Connect to iTunes`
|
||||
function.
|
||||
|
||||
.. _devsupport:
|
||||
|
||||
@ -159,8 +161,8 @@ Follow these steps to find the problem:
|
||||
* Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time.
|
||||
* If you are connecting an Apple iDevice (iPad, iPod Touch, iPhone), use the 'Connect to iTunes' method in the 'Getting started' instructions in `Calibre + Apple iDevices: Start here <http://www.mobileread.com/forums/showthread.php?t=118559>`_.
|
||||
* Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website <http://calibre-ebook.com/download>`_.
|
||||
* Ensure your operating system is seeing the device. That is, the device should be mounted as a disk, that you can access using Windows explorer or whatever the file management program on your computer is. On Windows your device **must have been assigned a drive letter**, like K:.
|
||||
* In calibre, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled.
|
||||
* Ensure your operating system is seeing the device. That is, the device should show up in Windows Explorer (in Windows) or Finder (in OS X).
|
||||
* In |app|, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled.
|
||||
* If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker <http://bugs.calibre-ebook.com>`_.
|
||||
|
||||
My device is non-standard or unusual. What can I do to connect to it?
|
||||
@ -282,36 +284,73 @@ Use the 'Connect to iTunes' method in the 'Getting started' instructions in `Cal
|
||||
|
||||
This method only works on Windows XP and higher, and OS X 10.5 and higher. Linux is not supported (iTunes is not available in linux) and OS X 10.4 is not supported.
|
||||
|
||||
How do I use |app| with my Android phone/tablet?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
How do I use |app| with my Android phone/tablet or Kindle Fire HD?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air.
|
||||
**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. Some devices may have a setting to put them in "disk mode" or "USB Transfer mode" that is needed before they act as USB disks.
|
||||
There are two ways that you can connect your Android device to calibre. Using a USB cable -- or wirelessly, over the air.
|
||||
The first step to using an Android device is installing an ebook reading
|
||||
application on it. There are many free and paid ebook reading applications for
|
||||
Android: Some examples (in no particular order):
|
||||
`FBReader <https://play.google.com/store/apps/details?id=org.geometerplus.zlibrary.ui.android&hl=en>`_,
|
||||
`Moon+ <https://play.google.com/store/apps/details?id=com.flyersoft.moonreader&hl=en>`_,
|
||||
`Mantano <https://play.google.com/store/apps/details?id=com.mantano.reader.android.lite&hl=en>`_,
|
||||
`Aldiko <https://play.google.com/store/apps/details?id=com.aldiko.android&hl=en>`_,
|
||||
`Kindle <https://play.google.com/store/apps/details?id=com.amazon.kindle&feature=related_apps>`_.
|
||||
|
||||
Using a USB cable
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
First install either the WordPlayer or Aldiko ebook reading apps from the Android Marketplace onto your phone. Then simply plug your phone into the computer with a USB cable. |app| should automatically detect the phone and then you can transfer books to it by clicking the Send to Device button. |app| does not have support for every single android device out there, so if your device is not automatically detected, follow the instructions at :ref:`devsupport` to get your device supported in |app|.
|
||||
Simply plug your device into the computer with a USB cable. |app| should
|
||||
automatically detect the device and then you can transfer books to it by
|
||||
clicking the Send to Device button. |app| does not have support for every
|
||||
single android device out there, so if your device is not automatically
|
||||
detected, follow the instructions at :ref:`devsupport` to get your device
|
||||
supported in |app|.
|
||||
|
||||
.. note:: With newer Android devices, the USB connection is only supported on
|
||||
Windows Vista and newer and Linux. If you are on Windows XP or OS X,
|
||||
you should use one of the wireless connection methods.
|
||||
|
||||
Over the air
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
The easiest way to browse your |app| collection on your Android device is by using the calibre content server, which makes your collection available over the net. First perform the following steps in |app|
|
||||
The easiest way to transfer books wirelessly to your Android device is to use
|
||||
the `Calibre Companion <http://www.multipie.co.uk/calibre-companion/>`_
|
||||
Android app. This app is maintained by a core calibre developer and allows
|
||||
|app| to connect to your Android device wirelessly, just as though you
|
||||
plugged in the device with a USB cable. You can browse files on the device
|
||||
in |app| and use the :guilabel:`Send to device` button to transfer files to
|
||||
your device wirelessly.
|
||||
|
||||
* Set the Preferred Output Format in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
|
||||
* Set the output profile to Tablet (this will work for phones as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
|
||||
* Convert the books you want to read on your device to EPUB format by selecting them and clicking the Convert button.
|
||||
|app| also has a builtin web server, the :guilabel:`Content Server`.
|
||||
You can browse your |app| collection on your Android device is by using the
|
||||
calibre content server, which makes your collection available over the net.
|
||||
First perform the following steps in |app|
|
||||
|
||||
* Set the :guilabel:`Preferred Output Format` in |app| to EPUB for normal Android devices or MOBI for Kindles (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
|
||||
* Convert the books you want to read on your device to EPUB/MOBI format by selecting them and clicking the Convert button.
|
||||
* Turn on the Content Server in |app|'s preferences and leave |app| running.
|
||||
|
||||
Now on your Android device, open the browser and browse to
|
||||
|
||||
http://192.168.1.2:8080/
|
||||
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If your local network supports the use of computer names, you can replace the IP address with the network name of the computer. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port.
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running
|
||||
|app|. If your local network supports the use of computer names, you can
|
||||
replace the IP address with the network name of the computer. If you have
|
||||
changed the port the |app| content server is running on, you will have to
|
||||
change ``8080`` as well to the new port.
|
||||
|
||||
The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. You can now browse your book collection and download books from |app| to your device to open with whatever ebook reading software you have on your android device.
|
||||
The local IP address is the IP address you computer is assigned on your home
|
||||
network. A quick Google search will tell you how to find out your local IP
|
||||
address. You can now browse your book collection and download books from |app|
|
||||
to your device to open with whatever ebook reading software you have on your
|
||||
android device.
|
||||
|
||||
Some reading programs support browsing the Calibre library directly. For example, in Aldiko, click My Catalogs, then + to add a catalog, then give the catalog a title such as "Calibre" and provide the URL listed above. You can now browse the Calibre library and download directly into the reading software.
|
||||
Some reading programs support browsing the Calibre library directly. For
|
||||
example, in Aldiko, click My Catalogs, then + to add a catalog, then give the
|
||||
catalog a title such as "Calibre" and provide the URL listed above. You can now
|
||||
browse the Calibre library and download directly into the reading software.
|
||||
|
||||
Can I access my |app| books using the web browser in my Kindle or other reading device?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -395,7 +434,7 @@ any |app| developers will ever feel motivated enough to support it. There is how
|
||||
that allows you to create collections on your Kindle from the |app| metadata. It is available
|
||||
`from here <http://www.mobileread.com/forums/showthread.php?t=118635>`_.
|
||||
|
||||
.. note:: Amazon have removed the ability to manipulate collections completely in their newer models, like the Kindle Touch and Kindle Fire, making even the above plugin useless. If you really want the ability to manage collections on your Kindle via a USB connection, we encourage you to complain to Amazon about it, or get a reader where this is supported, like the SONY Readers.
|
||||
.. note:: Amazon have removed the ability to manipulate collections completely in their newer models, like the Kindle Touch and Kindle Fire, making even the above plugin useless. If you really want the ability to manage collections on your Kindle via a USB connection, we encourage you to complain to Amazon about it, or get a reader where this is supported, like the SONY or Kobo Readers.
|
||||
|
||||
I am getting an error when I try to use |app| with my Kobo Touch?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -515,8 +554,29 @@ There can be two reasons why |app| is showing a empty list of books:
|
||||
|
||||
* Your |app| library folder changed its location. This can happen if it was on an external disk and the drive letter for that disk changed. Or if you accidentally moved the folder. In this case, |app| cannot find its library and so starts up with an empty library instead. To remedy this, do a right-click on the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Switch/create library. Click the little blue icon to select the new location of your |app| library and click OK.
|
||||
|
||||
* Your metadata.db file was deleted/corrupted. In this case, you can ask |app| to rebuild the metadata.db from its backups. Click-and-hold the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Library maintenance->Restore database. |app| will automatically rebuild metadata.db.
|
||||
* Your metadata.db file was deleted/corrupted. In this case, you can ask |app| to rebuild the metadata.db from its backups. Right click the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Library maintenance->Restore database. |app| will automatically rebuild metadata.db.
|
||||
|
||||
I am getting errors with my calibre library on a networked drive/NAS?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
**Do not put your calibre library on a networked drive**.
|
||||
|
||||
A filesystem is a complex beast. Most network filesystems lack various
|
||||
filesystem features that |app| uses. Some dont support file locking, some dont
|
||||
support hardlinking, some are just flaky. Additionally, |app| is a single user
|
||||
application, if you accidentally run two copies of |app| on the same networked
|
||||
library, bad things will happen. Finally, different OSes impose different
|
||||
limitations on filesystems, so if you share your networked drive across OSes,
|
||||
once again, bad things *will happen*.
|
||||
|
||||
Consider using the |app| Content Server to make your books available on other
|
||||
computers. Run |app| on a single computer and access it via the Content Server
|
||||
or a Remote Desktop solution.
|
||||
|
||||
If you must share the actual library, use a file syncing tool like
|
||||
DropBox or rsync or Microsoft SkyDrive instead of a networked drive. Even with
|
||||
these tools there is danger of data corruption/loss, so only do this if you are
|
||||
willing to live with that risk.
|
||||
|
||||
Content From The Web
|
||||
---------------------
|
||||
@ -589,17 +649,24 @@ If it still wont launch, start a command prompt (press the windows key and R; th
|
||||
|
||||
Post any output you see in a help message on the `Forum <http://www.mobileread.com/forums/forumdisplay.php?f=166>`_.
|
||||
|
||||
|app| freezes when I click on anything?
|
||||
|app| freezes/crashes occasionally?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are three possible things I know of, that can cause this:
|
||||
|
||||
* You recently connected an external monitor or TV to your computer. In this case, whenever |app| opens a new window like the edit metadata window or the conversion dialog, it appears on the second monitor where you dont notice it and so you think |app| has frozen. Disconnect your second monitor and restart calibre.
|
||||
* You recently connected an external monitor or TV to your computer. In
|
||||
this case, whenever |app| opens a new window like the edit metadata
|
||||
window or the conversion dialog, it appears on the second monitor where
|
||||
you dont notice it and so you think |app| has frozen. Disconnect your
|
||||
second monitor and restart calibre.
|
||||
|
||||
* You are using a Wacom branded mouse. There is an incompatibility between Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom mouse.
|
||||
|
||||
* Sometimes if some software has installed lots of new files in your fonts folder, |app| can crash until it finishes indexing them. Just start |app|, then leave it alone for about 20 minutes, without clicking on anything. After that you should be able to use |app| as normal.
|
||||
* You are using a Wacom branded mouse. There is an incompatibility between
|
||||
Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
|
||||
mouse.
|
||||
|
||||
* If you use RoboForm, it is known to cause |app| to crash. Add |app| to
|
||||
the blacklist of programs inside RoboForm to fix this. Or uninstall
|
||||
RoboForm.
|
||||
|
||||
|app| is not starting on OS X?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -634,7 +701,20 @@ If you still cannot get the installer to work and you are on windows, you can us
|
||||
My antivirus program claims |app| is a virus/trojan?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Your antivirus program is wrong. Antivirus programs use heuristics, patterns of code that "looks suspicuous" to detect viruses. It's rather like racial profiling. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
|
||||
The first thing to check is that you are downloading |app| from the official
|
||||
website: `<http://calibre-ebook.com/download>`_. |app| is a very popular program
|
||||
and unscrupulous people try to setup websites offering it for download to fool
|
||||
the unwary.
|
||||
|
||||
If you have the official download and your antivirus program is still claiming
|
||||
|app| is a virus, then, your antivirus program is wrong. Antivirus programs use
|
||||
heuristics, patterns of code that "look suspicious" to detect viruses. It's
|
||||
rather like racial profiling. |app| is a completely open source product. You
|
||||
can actually browse the source code yourself (or hire someone to do it for you)
|
||||
to verify that it is not a virus. Please report the false identification to
|
||||
whatever company you buy your antivirus software from. If the antivirus program
|
||||
is preventing you from downloading/installing |app|, disable it temporarily,
|
||||
install |app| and then re-enable it.
|
||||
|
||||
How do I backup |app|?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -645,8 +725,8 @@ You can switch |app| to using a backed up library folder by simply clicking the
|
||||
|
||||
If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
|
||||
|
||||
How do I use purchased EPUB books with |app|?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
How do I use purchased EPUB books with |app| (or what do I do with .acsm files)?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Most purchased EPUB books have `DRM <http://drmfree.calibre-ebook.com/about#drm>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your ebook reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" ebook. The ebook file will be stored in the folder "My Digital Editions", from where you can add it to |app|.
|
||||
|
||||
I am getting a "Permission Denied" error?
|
||||
|
@ -49,9 +49,9 @@ Add books
|
||||
|
||||
1. **Add books from a single directory**: Opens a file chooser dialog and allows you to specify which books in a directory should be added. This action is *context sensitive*, i.e. it depends on which :ref:`catalog <catalogs>` you have selected. If you have selected the :guilabel:`Library`, books will be added to the library. If you have selected the ebook reader device, the books will be uploaded to the device, and so on.
|
||||
|
||||
2. **Add books from directories, including sub-directories (One book per directory, assumes every ebook file is the same book in a different format)**: Allows you to choose a directory. The directory and all its sub-directories are scanned recursively, and any ebooks found are added to the library. |app| assumes that each directory contains a single book. All ebook files in a directory are assumed to be the same book in different formats. This action is the inverse of the :ref:`Save to disk <save_to_disk_multiple>` action, i.e. you can :guilabel:`Save to disk`, delete the books and re-add them with no lost information except for the date.
|
||||
2. **Add books from directories, including sub-directories (One book per directory, assumes every ebook file is the same book in a different format)**: Allows you to choose a directory. The directory and all its sub-directories are scanned recursively, and any ebooks found are added to the library. |app| assumes that each directory contains a single book. All ebook files in a directory are assumed to be the same book in different formats. This action is the inverse of the :ref:`Save to disk <save_to_disk_multiple>` action, i.e. you can :guilabel:`Save to disk`, delete the books and re-add them with no lost information except for the date (this assumes you have not changed any of the setting for the Save to disk action).
|
||||
|
||||
3. **Add books from directories, including sub-directories (Multiple books per directory, assumes every ebook file is a different book)**: Allows you to choose a directory. The directory and all its sub-directories are scanned recursively and any ebooks found are added to the library. |app| assumes that each directory contains many books. All ebook files with the same name in a directory are assumed to be the same book in different formats. Ebooks with different names are added as different books. This action is the inverse of the :ref:`Save to disk <save_to_disk_single>` action, i.e. you can :guilabel:`Save to disk`, delete the books and re-add them with no lost information except for the date.
|
||||
3. **Add books from directories, including sub-directories (Multiple books per directory, assumes every ebook file is a different book)**: Allows you to choose a directory. The directory and all its sub-directories are scanned recursively and any ebooks found are added to the library. |app| assumes that each directory contains many books. All ebook files with the same name in a directory are assumed to be the same book in different formats. Ebooks with different names are added as different books.
|
||||
|
||||
4. **Add empty book. (Book Entry with no formats)**: Allows you to create a blank book record. This can be used to then manually fill out the information about a book that you may not have yet in your collection.
|
||||
|
||||
@ -103,10 +103,9 @@ The :guilabel:`Convert books` action has three variations, accessed by doing a r
|
||||
3. **Create a catalog of the books in your calibre library**: Allows you to generate a complete listing of the books in your library, including all metadata,
|
||||
in several formats such as XML, CSV, BiBTeX, EPUB and MOBI. The catalog will contain all the books currently showing in the library view.
|
||||
This allows you to use the search features to limit the books to be catalogued. In addition, if you select multiple books using the mouse,
|
||||
only those books will be added to the catalog. If you generate the catalog in an ebook format such as EPUB or MOBI,
|
||||
only those books will be added to the catalog. If you generate the catalog in an ebook format such as EPUB, MOBI or AZW3,
|
||||
the next time you connect your ebook reader the catalog will be automatically sent to the device.
|
||||
For more information on how catalogs work, read the `catalog creation tutorial <http://www.mobileread.com/forums/showthread.php?p=755468#post755468>`_
|
||||
at MobileRead.
|
||||
For more information on how catalogs work, read the :ref:`catalog_tut`.
|
||||
|
||||
.. _view:
|
||||
|
||||
@ -548,7 +547,7 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
|
||||
- Toggle jobs list
|
||||
* - :kbd:`Alt+Shift+B`
|
||||
- Toggle Cover Browser
|
||||
* - :kbd:`Alt+Shift+B`
|
||||
* - :kbd:`Alt+Shift+D`
|
||||
- Toggle Book Details panel
|
||||
* - :kbd:`Alt+Shift+T`
|
||||
- Toggle Tag Browser
|
||||
|
BIN
manual/images/catalog_options.png
Normal file
After Width: | Height: | Size: 33 KiB |
BIN
manual/images/catalog_send_to_device.png
Normal file
After Width: | Height: | Size: 13 KiB |
BIN
manual/images/custom_cover.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
manual/images/excluded_books.png
Normal file
After Width: | Height: | Size: 28 KiB |
BIN
manual/images/excluded_genres.png
Normal file
After Width: | Height: | Size: 25 KiB |
BIN
manual/images/included_sections.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
manual/images/lorentz.png
Normal file
After Width: | Height: | Size: 7.1 KiB |
BIN
manual/images/other_options.png
Normal file
After Width: | Height: | Size: 40 KiB |
BIN
manual/images/prefix_rules.png
Normal file
After Width: | Height: | Size: 38 KiB |
Before Width: | Height: | Size: 85 KiB |
BIN
manual/images/sg_pref.png
Normal file
After Width: | Height: | Size: 70 KiB |
@ -17,7 +17,7 @@ To get started with more advanced usage, you should read about the :ref:`Graphic
|
||||
|
||||
.. only:: online
|
||||
|
||||
**An ebook version of this user manual is available in** `EPUB format <calibre.epub>`_ and `AZW3 (Kindle Fire) format <calibre.azw3>`_.
|
||||
**An ebook version of this user manual is available in** `EPUB format <calibre.epub>`_, `AZW3 (Kindle Fire) format <calibre.azw3>`_ and `PDF format <calibre.pdf>`_.
|
||||
|
||||
Sections
|
||||
------------
|
||||
|
25
manual/latex.py
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
|
||||
from sphinx.builders.latex import LaTeXBuilder
|
||||
|
||||
class LaTeXHelpBuilder(LaTeXBuilder):
|
||||
name = 'mylatex'
|
||||
|
||||
def finish(self):
|
||||
LaTeXBuilder.finish(self)
|
||||
self.info('Fixing Cyrillic characters...')
|
||||
tex = os.path.join(self.outdir, 'calibre.tex')
|
||||
with open(tex, 'r+b') as f:
|
||||
raw = f.read().replace(b'Михаил Горбачёв',
|
||||
br'{\fontencoding{T2A}\selectfont Михаил Горбачёв}')
|
||||
f.seek(0)
|
||||
f.write(raw)
|
102
manual/mathjax.html
Normal file
@ -0,0 +1,102 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<!-- Copyright (c) 2012 Design Science, Inc. -->
|
||||
<head>
|
||||
<title>Math Test Page</title>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
|
||||
|
||||
<!-- This script tag is needed to make calibre's ebook-viewer recpgnize that this file needs math typesetting -->
|
||||
<script type="text/x-mathjax-config">
|
||||
// This line adds numbers to all equations automatically, unless explicitly suppressed.
|
||||
MathJax.Hub.Config({ TeX: { equationNumbers: {autoNumber: "all"} } });
|
||||
</script>
|
||||
|
||||
<style>
|
||||
h1 {text-align:center}
|
||||
h2 {
|
||||
font-weight: bold;
|
||||
background-color: #DDDDDD;
|
||||
padding: .2em .5em;
|
||||
margin-top: 1.5em;
|
||||
border-top: 3px solid #666666;
|
||||
border-bottom: 2px solid #999999;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Sample Equations</h1>
|
||||
|
||||
<h2>The Lorenz Equations</h2>
|
||||
|
||||
<p>
|
||||
\begin{align}
|
||||
\dot{x} & = \sigma(y-x) \label{lorenz}\\
|
||||
\dot{y} & = \rho x - y - xz \\
|
||||
\dot{z} & = -\beta z + xy
|
||||
\end{align}
|
||||
</p>
|
||||
|
||||
<h2>The Cauchy-Schwarz Inequality</h2>
|
||||
|
||||
<p>\[
|
||||
\left( \sum_{k=1}^n a_k b_k \right)^{\!\!2} \leq
|
||||
\left( \sum_{k=1}^n a_k^2 \right) \left( \sum_{k=1}^n b_k^2 \right)
|
||||
\]</p>
|
||||
|
||||
<h2>A Cross Product Formula</h2>
|
||||
|
||||
<p>\[
|
||||
\mathbf{V}_1 \times \mathbf{V}_2 =
|
||||
\begin{vmatrix}
|
||||
\mathbf{i} & \mathbf{j} & \mathbf{k} \\
|
||||
\frac{\partial X}{\partial u} & \frac{\partial Y}{\partial u} & 0 \\
|
||||
\frac{\partial X}{\partial v} & \frac{\partial Y}{\partial v} & 0 \\
|
||||
\end{vmatrix}
|
||||
\]</p>
|
||||
|
||||
<h2>The probability of getting \(k\) heads when flipping \(n\) coins is:</h2>
|
||||
|
||||
<p>\[P(E) = {n \choose k} p^k (1-p)^{ n-k} \]</p>
|
||||
|
||||
<h2>An Identity of Ramanujan</h2>
|
||||
|
||||
<p>\[
|
||||
\frac{1}{(\sqrt{\phi \sqrt{5}}-\phi) e^{\frac25 \pi}} =
|
||||
1+\frac{e^{-2\pi}} {1+\frac{e^{-4\pi}} {1+\frac{e^{-6\pi}}
|
||||
{1+\frac{e^{-8\pi}} {1+\ldots} } } }
|
||||
\]</p>
|
||||
|
||||
<h2>A Rogers-Ramanujan Identity</h2>
|
||||
|
||||
<p>\[
|
||||
1 + \frac{q^2}{(1-q)}+\frac{q^6}{(1-q)(1-q^2)}+\cdots =
|
||||
\prod_{j=0}^{\infty}\frac{1}{(1-q^{5j+2})(1-q^{5j+3})},
|
||||
\quad\quad \text{for $|q|<1$}.
|
||||
\]</p>
|
||||
|
||||
<h2>Maxwell's Equations</h2>
|
||||
|
||||
<p>
|
||||
\begin{align}
|
||||
\nabla \times \vec{\mathbf{B}} -\, \frac1c\, \frac{\partial\vec{\mathbf{E}}}{\partial t} & = \frac{4\pi}{c}\vec{\mathbf{j}} \\
|
||||
\nabla \cdot \vec{\mathbf{E}} & = 4 \pi \rho \\
|
||||
\nabla \times \vec{\mathbf{E}}\, +\, \frac1c\, \frac{\partial\vec{\mathbf{B}}}{\partial t} & = \vec{\mathbf{0}} \\
|
||||
\nabla \cdot \vec{\mathbf{B}} & = 0
|
||||
\end{align}
|
||||
</p>
|
||||
|
||||
<h2>In-line Mathematics</h2>
|
||||
|
||||
<p>While display equations look good for a page of samples, the
|
||||
ability to mix math and text in a paragraph is also important. This
|
||||
expression \(\sqrt{3x-1}+(1+x)^2\) is an example of an inline equation. As
|
||||
you see, equations can be used this way as well, without unduly
|
||||
disturbing the spacing between lines.</p>
|
||||
|
||||
<h2>References to equations</h2>
|
||||
|
||||
<p>Here is a reference to the Lorenz Equations (\ref{lorenz}). Clicking on the equation number will take you back to the equation.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -30,7 +30,7 @@ Lets pick a couple of feeds that look interesting:
|
||||
#. Business Travel: http://feeds.portfolio.com/portfolio/businesstravel
|
||||
#. Tech Observer: http://feeds.portfolio.com/portfolio/thetechobserver
|
||||
|
||||
I got the URLs by clicking the little orange RSS icon next to each feed name. To make |app| download the feeds and convert them into an ebook, you should click the :guilabel:`Fetch news` button and then the :guilabel:`Add a custom news source` menu item. A dialog similar to that shown below should open up.
|
||||
I got the URLs by clicking the little orange RSS icon next to each feed name. To make |app| download the feeds and convert them into an ebook, you should right click the :guilabel:`Fetch news` button and then the :guilabel:`Add a custom news source` menu item. A dialog similar to that shown below should open up.
|
||||
|
||||
.. image:: images/custom_news.png
|
||||
:align: center
|
||||
|
BIN
manual/resources/mathjax.epub
Normal file
@ -65,7 +65,7 @@ You create the custom column in the usual way, using Preferences -> Add your own
|
||||
|
||||
Then after restarting |app|, you must tell |app| that the column is to be treated as a hierarchy. Go to Preferences -> Look and Feel -> Tag Browser and enter the lookup name "#genre" into the "Categories with hierarchical items" box. Press Apply, and you are done with setting up.
|
||||
|
||||
.. image:: images/sg_pref.jpg
|
||||
.. image:: images/sg_pref.png
|
||||
:align: center
|
||||
|
||||
At the point there are no genres in the column. We are left with the last step: how to apply a genre to a book. A genre does not exist in |app| until it appears on at least one book. To learn how to apply a genre for the first time, we must go into some detail about what a genre looks like in the metadata for a book.
|
||||
@ -108,10 +108,10 @@ After creating the saved search, you can use it as a restriction.
|
||||
Useful Template Functions
|
||||
-------------------------
|
||||
|
||||
You might want to use the genre information in a template, such as with save to disk or send to device. The question might then be "How do I get the outermost genre name or names?" An |app| template function, subitems, is provided to make doing this easier.
|
||||
You might want to use the genre information in a template, such as with save to disk or send to device. The question might then be "How do I get the outermost genre name or names?" A |app| template function, subitems, is provided to make doing this easier.
|
||||
|
||||
For example, assume you want to add the outermost genre level to the save-to-disk template to make genre folders, as in "History/The Gathering Storm - Churchill, Winston". To do this, you must extract the first level of the hierarchy and add it to the front along with a slash to indicate that it should make a folder. The template below accomplishes this::
|
||||
|
||||
{#genre:subitems(0,1)||/}{title} - {authors}
|
||||
|
||||
See :ref:`The |app| template language <templatelangcalibre>` for more information templates and the subitem function.
|
||||
See :ref:`The template language <templatelangcalibre>` for more information templates and the :func:`subitems` function.
|
||||
|
@ -240,9 +240,11 @@ The following functions are available in addition to those described in single-f
|
||||
* ``and(value, value, ...)`` -- returns the string "1" if all values are not empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
|
||||
* ``add(x, y)`` -- returns x + y. Throws an exception if either x or y are not numbers.
|
||||
* ``assign(id, val)`` -- assigns val to id, then returns val. id must be an identifier, not an expression
|
||||
* ``approximate_formats()`` -- return a comma-separated list of formats that at one point were associated with the book. There is no guarantee that the list is correct, although it probably is. This function can be called in template program mode using the template ``{:'approximate_formats()'}``. Note that format names are always uppercase, as in EPUB.
|
||||
* ``booksize()`` -- returns the value of the |app| 'size' field. Returns '' if there are no formats.
|
||||
* ``cmp(x, y, lt, eq, gt)`` -- compares x and y after converting both to numbers. Returns ``lt`` if x < y. Returns ``eq`` if x == y. Otherwise returns ``gt``.
|
||||
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
||||
* ``current_library_path() -- `` eturn the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_path()'}``..
|
||||
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
||||
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode.
|
||||
@ -269,6 +271,8 @@ The following functions are available in addition to those described in single-f
|
||||
ap : use a 12-hour clock instead of a 24-hour clock, with 'ap' replaced by the localized string for am or pm.
|
||||
AP : use a 12-hour clock instead of a 24-hour clock, with 'AP' replaced by the localized string for AM or PM.
|
||||
iso : the date with time and timezone. Must be the only format present.
|
||||
|
||||
You might get unexpected results if the date you are formatting contains localized month names, which can happen if you changed the format tweaks to contain MMMM. In this case, instead of using something like ``{pubdate:format_date(yyyy)}``, write the template using template program mode as in ``{:'format_date(raw_field('pubdate'),'yyyy')'}``.
|
||||
|
||||
* finish_formatting(val, fmt, prefix, suffix) -- apply the format, prefix, and suffix to a value in the same way as done in a template like ``{series_index:05.2f| - |- }``. This function is provided to ease conversion of complex single-function- or template-program-mode templates to :ref:`general program mode <general_mode>` (see below) to take advantage of GPM template compilation. For example, the following program produces the same output as the above template::
|
||||
|
||||
@ -284,6 +288,7 @@ The following functions are available in addition to those described in single-f
|
||||
)
|
||||
|
||||
* ``formats_modtimes(date_format)`` -- return a comma-separated list of colon_separated items representing modification times for the formats of a book. The date_format parameter specifies how the date is to be formatted. See the date_format function for details. You can use the select function to get the mod time for a specific format. Note that format names are always uppercase, as in EPUB.
|
||||
* ``formats_paths()`` -- return a comma-separated list of colon_separated items representing full path to the formats of a book. You can use the select function to get the path for a specific format. Note that format names are always uppercase, as in EPUB.
|
||||
* ``formats_sizes()`` -- return a comma-separated list of colon_separated items representing sizes in bytes of the formats of a book. You can use the select function to get the size for a specific format. Note that format names are always uppercase, as in EPUB.
|
||||
* ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string
|
||||
* ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
|
||||
|
@ -59,10 +59,10 @@
|
||||
<a href="http://calibre-ebook.com"><img class="logo" src="{{ pathto('_static/' + logo, 1) }}" alt="Logo"/></a>
|
||||
</p>
|
||||
|
||||
<form action="https://www.paypal.com/cgi-bin/webscr" method="post">
|
||||
<form action="https://www.paypal.com/cgi-bin/webscr" method="post" title="Contribute to support calibre development">
|
||||
<input type="hidden" name="cmd" value="_s-xclick" />
|
||||
<input type="hidden" name="hosted_button_id" value="AF4H3B8QVDG6N" />
|
||||
<input type="image" src="http://manual.calibre-ebook.com/simple_donate_button.gif" border="0" name="submit" alt="Donate to support calibre development" style="border:0pt" />
|
||||
<input type="image" src="http://manual.calibre-ebook.com/simple_donate_button.gif" border="0" name="submit" alt="Contribute to support calibre development" style="border:0pt" />
|
||||
<img alt="" border="0" src="https://www.paypalobjects.com/en_GB/i/scr/pixel.gif" width="1" height="1" />
|
||||
</form>
|
||||
<hr/>
|
||||
|
@ -18,4 +18,6 @@ Here you will find tutorials to get you started using |app|'s more advanced feat
|
||||
regexp
|
||||
server
|
||||
creating_plugins
|
||||
typesetting_math
|
||||
catalogs
|
||||
|
||||
|
70
manual/typesetting_math.rst
Normal file
@ -0,0 +1,70 @@
|
||||
|
||||
.. include:: global.rst
|
||||
|
||||
.. _typesetting_math:
|
||||
|
||||
|
||||
Typesetting Math in ebooks
|
||||
============================
|
||||
|
||||
The |app| ebook viewer has the ability to display math embedded in ebooks (ePub
|
||||
and HTML files). You can typeset the math directly with TeX or MathML or
|
||||
AsciiMath. The |app| viewer uses the excellent `MathJax
|
||||
<http://www.mathjax.org>`_ library to do this. This is a brief tutorial on
|
||||
creating ebooks with math in them that work well with the |app| viewer.
|
||||
|
||||
.. note::
|
||||
This only applies to calibre version 0.8.66 and newer
|
||||
|
||||
A simple HTML file with mathematics
|
||||
-------------------------------------
|
||||
|
||||
You can write mathematics inline inside a simple HTML file and the |app| viewer
|
||||
will render it into properly typeset mathematics. In the example below, we use
|
||||
TeX notation for mathematics. You will see that you can use normal TeX
|
||||
commands, with the small caveat that ampersands and less than and greater than
|
||||
signs have to be written as & < and > respectively.
|
||||
|
||||
The first step is to tell |app| that this will contains maths. You do this by
|
||||
adding the following snippet of code to the <head> section of the HTML file::
|
||||
|
||||
<script type="text/x-mathjax-config"></script>
|
||||
|
||||
That's it, now you can type mathematics just as you would in a .tex file. For
|
||||
example, here are Lorentz's equations::
|
||||
|
||||
<h2>The Lorenz Equations</h2>
|
||||
|
||||
<p>
|
||||
\begin{align}
|
||||
\dot{x} & = \sigma(y-x) \\
|
||||
\dot{y} & = \rho x - y - xz \\
|
||||
\dot{z} & = -\beta z + xy
|
||||
\end{align}
|
||||
</p>
|
||||
|
||||
This snippet looks like the following screen shot in the |app| viewer.
|
||||
|
||||
.. figure:: images/lorentz.png
|
||||
:align: center
|
||||
|
||||
:guilabel:`The Lorenz Equations`
|
||||
|
||||
The complete HTML file, with more equations and inline mathematics is
|
||||
reproduced below. You can convert this HTML file to EPUB in |app| to end up
|
||||
with an ebook you can distribute easily to other people.
|
||||
|
||||
.. only:: online
|
||||
|
||||
Here is the generated EPUB file: `mathjax.epub <_static/mathjax.epub>`_.
|
||||
|
||||
.. literalinclude:: mathjax.html
|
||||
:language: html
|
||||
|
||||
More information
|
||||
-----------------
|
||||
|
||||
Since the |app| viewer uses the MathJax library to render mathematics, the best
|
||||
place to find out more about math in ebooks and get help is the `MathJax
|
||||
website <http://www.mathjax.org>`_.
|
||||
|
@ -1,6 +1,7 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.rs
|
||||
@ -21,26 +22,29 @@ class Ser24Sata(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'sr'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
body{font-family: serif1, serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
feeds = [
|
||||
(u'Vesti' , u'http://www.24sata.rs/rss/vesti.xml' ),
|
||||
(u'Sport' , u'http://www.24sata.rs/rss/sport.xml' ),
|
||||
(u'Šou' , u'http://www.24sata.rs/rss/sou.xml' ),
|
||||
(u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'),
|
||||
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
article = url.partition('#')[0]
|
||||
article_id = article.partition('id=')[2]
|
||||
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
||||
|
||||
dpart, spart, apart = url.rpartition('/')
|
||||
return dpart + '/print/' + apart
|
||||
|
@ -2,41 +2,70 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
|
||||
title = u'Aachener Nachrichten'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 1
|
||||
__author__ = 'schuster' #AGE update 2012-11-28
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.an-online.de/einwaage/images/an_logo.png'
|
||||
masthead_url = 'http://www.an-online.de/einwaage/images/an_logo.png'
|
||||
extra_css = '''
|
||||
.fliesstext_detail:{margin-bottom:10%;}
|
||||
.headline_1:{margin-bottom:25%;}
|
||||
b{font-family:Arial,Helvetica,sans-serif; font-weight:200;font-size:large;}
|
||||
a{font-family:Arial,Helvetica,sans-serif; font-weight:400;font-size:large;}
|
||||
ll{font-family:Arial,Helvetica,sans-serif; font-weight:100;font-size:large;}
|
||||
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||
dd{font-family:Arial,Helvetica,sans-serif;font-size:large;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
language = 'de'
|
||||
|
||||
# cover_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||
masthead_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='span', attrs={'class':['fliesstext_detail', 'headline_1', 'autor_detail']}),
|
||||
dict(id=['header-logo'])
|
||||
]
|
||||
dict(name='article', attrs={'class':['single']})
|
||||
]
|
||||
|
||||
feeds = [(u'Euregio', u'http://www.an-online.de/an/rss/Euregio.xml'),
|
||||
(u'Aachen', u'http://www.an-online.de/an/rss/Aachen.xml'),
|
||||
(u'Nordkreis', u'http://www.an-online.de/an/rss/Nordkreis.xml'),
|
||||
(u'Düren', u'http://www.an-online.de/an/rss/Dueren.xml'),
|
||||
(u'Eiffel', u'http://www.an-online.de/an/rss/Eifel.xml'),
|
||||
(u'Eschweiler', u'http://www.an-online.de/an/rss/Eschweiler.xml'),
|
||||
(u'Geilenkirchen', u'http://www.an-online.de/an/rss/Geilenkirchen.xml'),
|
||||
(u'Heinsberg', u'http://www.an-online.de/an/rss/Heinsberg.xml'),
|
||||
(u'Jülich', u'http://www.an-online.de/an/rss/Juelich.xml'),
|
||||
(u'Stolberg', u'http://www.an-online.de/an/rss/Stolberg.xml'),
|
||||
(u'Ratgebenr', u'http://www.an-online.de/an/rss/Ratgeber.xml')]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':["clearfix navi-wrapper"]}),
|
||||
dict(name='div', attrs={'id':["article_actions"]}),
|
||||
dict(name='style', attrs={'type':["text/css"]}),
|
||||
dict(name='aside'),
|
||||
dict(name='a', attrs={'class':["btn btn-action"]})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Lokales - Euregio', u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
|
||||
(u'Lokales - Aachen', u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
|
||||
(u'Lokales - Nordkreis', u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
|
||||
(u'Lokales - Düren', u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
|
||||
(u'Lokales - Eiffel', u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
|
||||
(u'Lokales - Eschweiler', u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
|
||||
(u'Lokales - Geilenkirchen', u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
|
||||
(u'Lokales - Heinsberg', u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
|
||||
(u'Lokales - Jülich', u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
|
||||
(u'Lokales - Stolberg', u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
|
||||
(u'News - Politik', u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
|
||||
(u'News - Aus aller Welt', u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
|
||||
(u'News - Wirtschaft', u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
|
||||
(u'News - Kultur', u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
|
||||
(u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'),
|
||||
(u'News - Digital', u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
|
||||
(u'News - Wissenschaft', u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
|
||||
(u'News - Hochschule', u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
|
||||
(u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'),
|
||||
(u'News - Kurioses', u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
|
||||
(u'News - Musik', u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
|
||||
(u'News - Tagesthema', u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
|
||||
(u'News - Newsticker', u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
|
||||
(u'Sport - Aktuell', u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
|
||||
(u'Sport - Fußball', u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
|
||||
(u'Sport - Bundesliga', u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
|
||||
(u'Sport - Alemannia Aachen', u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
|
||||
(u'Sport - Volleyball', u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
|
||||
(u'Sport - Chio', u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
|
||||
(u'Dossier - Kinderuni', u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
|
||||
(u'Dossier - Karlspreis', u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
|
||||
(u'Dossier - Ritterorden', u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
|
||||
(u'Dossier - ZAB-Aachen', u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
|
||||
(u'Dossier - Karneval', u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
|
||||
(u'Ratgeber - Geld', u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
|
||||
(u'Ratgeber - Recht', u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
|
||||
(u'Ratgeber - Gesundheit', u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
|
||||
(u'Ratgeber - Familie', u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
|
||||
(u'Ratgeber - Livestyle', u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
|
||||
(u'Ratgeber - Reisen', u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
|
||||
(u'Ratgeber - Bauen und Wohnen', u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
|
||||
(u'Ratgeber - Bildung und Beruf', u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
|
||||
]
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
abc.com.py
|
||||
'''
|
||||
@ -7,7 +7,7 @@ abc.com.py
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ABC_py(BasicNewsRecipe):
|
||||
title = 'ABC digital'
|
||||
title = 'ABC Color'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Paraguay y el resto del mundo'
|
||||
publisher = 'ABC'
|
||||
@ -15,12 +15,16 @@ class ABC_py(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_PY'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.abc.com.py/plantillas/img/abc-logo.png'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
|
||||
extra_css = """
|
||||
body{font-family: UnitSlabProMedium,"Times New Roman",serif }
|
||||
img{margin-bottom: 0.4em; display: block;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -29,21 +33,19 @@ class ABC_py(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['form','iframe','embed','object','link','base','table']),dict(attrs={'class':'toolbox'})]
|
||||
remove_tags_after = dict(attrs={'class':'date'})
|
||||
keep_only_tags = [dict(attrs={'class':'zcontent'})]
|
||||
remove_tags = [
|
||||
dict(name=['form','iframe','embed','object','link','base','table']),
|
||||
dict(attrs={'class':['es-carousel-wrapper']}),
|
||||
dict(attrs={'id':['tools','article-banner-1']})
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'id':'article'})]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Ultimo momento' , u'http://www.abc.com.py/ultimo-momento.xml' )
|
||||
,(u'Nacionales' , u'http://www.abc.com.py/nacionales.xml' )
|
||||
,(u'Internacionales' , u'http://www.abc.com.py/internacionales.xml' )
|
||||
,(u'Deportes' , u'http://www.abc.com.py/deportes.xml' )
|
||||
,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos.xml' )
|
||||
,(u'Ciencia y Tecnologia', u'http://www.abc.com.py/ciencia-y-tecnologia.xml')
|
||||
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml' )
|
||||
,(u'Nacionales' , u'http://www.abc.com.py/nacionales/rss.xml' )
|
||||
,(u'Mundo' , u'http://www.abc.com.py/internacionales/rss.xml')
|
||||
,(u'Deportes' , u'http://www.abc.com.py/deportes/rss.xml' )
|
||||
,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos/rss.xml' )
|
||||
,(u'TecnoCiencia' , u'http://www.abc.com.py/ciencia/rss.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -3,7 +3,7 @@ import re
|
||||
class Adventure_zone(BasicNewsRecipe):
|
||||
title = u'Adventure Zone'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Adventure zone - adventure games from A to Z'
|
||||
description = u'Adventure zone - adventure games from A to Z'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
no_stylesheets = True
|
||||
@ -11,7 +11,9 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
index='http://www.adventure-zone.info/fusion/'
|
||||
use_embedded_content=False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<tbody\>'), lambda match: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||
remove_tags_after= dict(id='comments')
|
||||
@ -52,6 +54,11 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
footer=soup.find(attrs={'class':'news-footer middle-border'})
|
||||
r = soup.find(name='td', attrs={'class':'capmain'})
|
||||
if r:
|
||||
r.name='h1'
|
||||
for item in soup.findAll(name=['tr', 'td']):
|
||||
item.name='div'
|
||||
if footer and len(footer('a'))>=2:
|
||||
footer('a')[1].extract()
|
||||
for item in soup.findAll(style=True):
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.adventuregamers.com
|
||||
'''
|
||||
@ -14,24 +14,24 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
publisher = 'Adventure Gamers'
|
||||
category = 'news, games, adventure, technology'
|
||||
oldest_article = 10
|
||||
delay = 10
|
||||
#delay = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.adventuregamers.com'
|
||||
extra_css = """
|
||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||
.pageheader_title{font-size: xx-large; color: #394128}
|
||||
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
||||
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
||||
.score_bg {display: inline; width: 100%; margin-bottom: 2em}
|
||||
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
|
||||
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
|
||||
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
|
||||
.score_header{font-size: large; color: #50544A}
|
||||
.bodytext{display: block}
|
||||
body{font-family: Helvetica,Arial,sans-serif}
|
||||
.score_header{font-size: large; color: #50544A}
|
||||
img{margin-bottom: 1em;}
|
||||
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
@ -41,35 +41,38 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'content_middle'})
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'cleft_inn'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','form'])
|
||||
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
|
||||
dict(name=['object','link','embed','form','iframe','meta'])
|
||||
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/scoring'})
|
||||
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/policies'})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'bodytext'})]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
||||
feeds = [(u'Articles', u'http://www.adventuregamers.com/rss/')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if '/videos/' in url or '/hypeometer/' in url:
|
||||
return None
|
||||
return url
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
|
||||
pager = soup.find('div', attrs={'class':'pagination_big'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
nextpage = soup.find('a', attrs={'class':'next-page'})
|
||||
if nextpage:
|
||||
nexturl = nextpage['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
pager.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -78,7 +81,7 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
for item in soup.findAll('div', attrs={'class':'floatright'}):
|
||||
item.extract()
|
||||
self.append_page(soup, soup.body, 3)
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
||||
pager = soup.find('div',attrs={'class':'pagination_big'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010 - 2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.aif.ru
|
||||
'''
|
||||
@ -19,12 +19,19 @@ class AIF_ru(BasicNewsRecipe):
|
||||
encoding = 'cp1251'
|
||||
language = 'ru'
|
||||
publication_type = 'magazine'
|
||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} '
|
||||
keep_only_tags = [dict(name='div',attrs={'id':'inner'})]
|
||||
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png'
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
|
||||
img{display: block}
|
||||
"""
|
||||
keep_only_tags = [
|
||||
dict(name='div',attrs={'class':['content-header', 'zoom']})
|
||||
,dict(name='div',attrs={'id':'article-text'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['iframe','object','link','base','input','img'])
|
||||
,dict(name='div',attrs={'class':'photo'})
|
||||
,dict(name='p',attrs={'class':'resizefont'})
|
||||
dict(name=['iframe','object','link','base','input','meta'])
|
||||
,dict(name='div',attrs={'class':'in-topic'})
|
||||
]
|
||||
|
||||
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]
|
||||
|
@ -17,18 +17,14 @@ class Aksiyon (BasicNewsRecipe):
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
|
||||
|
||||
auto_cleanup = True
|
||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
ignore_duplicate_articles = { 'title', 'url' }
|
||||
remove_empty_feeds= True
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
||||
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||
( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
||||
( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
||||
( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
||||
@ -37,17 +33,15 @@ class Aksiyon (BasicNewsRecipe):
|
||||
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
||||
( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
||||
( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
||||
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
||||
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
||||
( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
||||
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
|
||||
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
#def print_version(self, url):
|
||||
#return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
akter.co.rs
|
||||
'''
|
||||
@ -8,7 +8,7 @@ import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Akter(BasicNewsRecipe):
|
||||
title = 'AKTER'
|
||||
title = 'AKTER - Nedeljnik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
|
||||
publisher = 'Akter Media Group d.o.o.'
|
||||
@ -18,61 +18,37 @@ class Akter(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
|
||||
masthead_url = 'http://www.akter.co.rs/gfx/logoneover.png'
|
||||
language = 'sr'
|
||||
publication_type = 'magazine'
|
||||
remove_empty_feeds = True
|
||||
PREFIX = 'http://www.akter.co.rs'
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||
.color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
|
||||
border-left: 1px solid #D00000; color: #D00000}
|
||||
img{margin-bottom: 0.8em} """
|
||||
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
||||
img{margin-bottom: 0.8em; display: block;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [
|
||||
(u'Politika' , u'http://www.akter.co.rs/index.php/politikaprint.html' )
|
||||
,(u'Ekonomija' , u'http://www.akter.co.rs/index.php/ekonomijaprint.html')
|
||||
,(u'Life&Style' , u'http://www.akter.co.rs/index.php/lsprint.html' )
|
||||
,(u'Sport' , u'http://www.akter.co.rs/index.php/sportprint.html' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})]
|
||||
feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?tmpl=component&print=1&page='
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll(attrs={'class':['sectiontableentry1','sectiontableentry2']}):
|
||||
link = item.find('a')
|
||||
url = self.PREFIX + link['href']
|
||||
title = self.tag_to_string(link)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
dpart, spart, apart = url.rpartition('/')
|
||||
return dpart + '/print-' + apart
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.akter.co.rs/weekly.html')
|
||||
divt = soup.find('div', attrs={'class':'lastissue'})
|
||||
if divt:
|
||||
imgt = divt.find('img')
|
||||
if imgt:
|
||||
return 'http://www.akter.co.rs' + imgt['src']
|
||||
return None
|
||||
|
||||
|
44
recipes/akter_dnevnik.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
akter.co.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Akter(BasicNewsRecipe):
|
||||
title = 'AKTER - Dnevnik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'AKTER - Najnovije vesti iz Srbije'
|
||||
publisher = 'Akter Media Group d.o.o.'
|
||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.akter.co.rs/gfx/logodnover.png'
|
||||
language = 'sr'
|
||||
publication_type = 'magazine'
|
||||
remove_empty_feeds = True
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
||||
img{margin-bottom: 0.8em; display: block;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})]
|
||||
feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')]
|
||||
|
||||
def print_version(self, url):
|
||||
dpart, spart, apart = url.rpartition('/')
|
||||
return dpart + '/print-' + apart
|
69
recipes/aktualne.cz.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class aktualneRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'aktualne.cz'
|
||||
publisher = u'Centrum holdings'
|
||||
description = 'aktuálně.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
|
||||
(u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
|
||||
(u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
|
||||
(u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
|
||||
(u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
|
||||
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
|
||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||
remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
|
||||
dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
|
||||
dict(name='div', attrs={'class':'itemcomment id0'}),
|
||||
dict(name='div', attrs={'class':'hlavicka'}),
|
||||
dict(name='div', attrs={'class':'hlavni-menu'}),
|
||||
dict(name='div', attrs={'class':'top-standard-brand-obal'}),
|
||||
dict(name='div', attrs={'class':'breadcrumb'}),
|
||||
dict(name='div', attrs={'id':'start-standard'}),
|
||||
dict(name='div', attrs={'id':'forum'}),
|
||||
dict(name='span', attrs={'class':'akce'}),
|
||||
dict(name='span', attrs={'class':'odrazka vetsi'}),
|
||||
dict(name='div', attrs={'class':'boxP'}),
|
||||
dict(name='div', attrs={'class':'box2'})]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
||||
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
def encoding(self, source):
|
||||
if source.newurl.find('blog.aktualne') >= 0:
|
||||
enc = 'utf-8'
|
||||
else:
|
||||
enc = 'iso-8859-2'
|
||||
self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
|
||||
return source.decode(enc, 'replace')
|
||||
|
@ -12,21 +12,26 @@ class anan(BasicNewsRecipe):
|
||||
|
||||
title = 'Anandtech'
|
||||
description = 'comprehensive Hardware Tests'
|
||||
__author__ = 'Oliver Niesner'
|
||||
__author__ = 'Oliver Niesner' # 2012-09-20 AGE: update
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
timefmt = ' [%d %b %Y]'
|
||||
oldest_article = 7 # 2012-09-20 AGE: update
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
remove_tags=[dict(name='a', attrs={'style':'width:110px; margin-top:0px;text-align:center;'}),
|
||||
dict(name='a', attrs={'style':'width:110px; margin-top:0px; margin-right:20px;text-align:center;'})]
|
||||
cover_url = 'http://www.anandtech.com/content/images/globals/header_logo.png' # 2012-09-20 AGE: new
|
||||
masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png' # 2012-09-20 AGE: update
|
||||
|
||||
|
||||
remove_tags=[
|
||||
dict(name='a', attrs={'class': 'bluebutton noprint'}),
|
||||
dict(name='img', attrs={'alt': 'header'}),
|
||||
] # 2012-09-20 AGE: update
|
||||
|
||||
feeds = [ ('Anandtech', 'http://www.anandtech.com/rss/')]
|
||||
|
||||
def print_version(self,url):
|
||||
return url.replace('/show/', '/print/')
|
||||
|
||||
|
||||
return url.replace('0Cshow0C', '0Cprint0C') # 2012-09-20 AGE: update
|
48
recipes/antyweb.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AntywebRecipe(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
title = u'Antyweb'
|
||||
category = u'News'
|
||||
description = u'Blog o internecie i nowych technologiach'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
auto_cleanup = False
|
||||
no_stylesheets=True
|
||||
use_embedded_content = False
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 3
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
|
||||
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
|
||||
remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
|
||||
remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
|
||||
]
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
@ -10,6 +10,8 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
# auto_cleanup_keep = '//td[@class="ap-smallphoto-td-image"]'
|
||||
max_articles_per_feed = 15
|
||||
|
||||
|
||||
@ -20,13 +22,13 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['body']}),
|
||||
dict(name='div', attrs={'class':['entry-content']}),
|
||||
]
|
||||
remove_tags = [dict(name='table', attrs={'class':['ap-video-table','ap-htmlfragment-table','ap-htmltable-table']}),
|
||||
dict(name='span', attrs={'class':['apCaption','tabletitle']}),
|
||||
dict(name='td', attrs={'bgcolor':['#333333']}),
|
||||
]
|
||||
#keep_only_tags = [ dict(name='table', attrs={'class':['ap-story-table hnews hentry item']}),
|
||||
##dict(name='div', attrs={'class':['entry-content']}),
|
||||
#]
|
||||
#remove_tags = [dict(name='td', attrs={'class':['ap-mediabox-td']}),
|
||||
#dict(name='table', attrs={'class':['ap-htmltable-table', 'ap-htmltable-table', 'ap-mediabox-table']}),
|
||||
##dict(name='td', attrs={'bgcolor':['#333333']}),
|
||||
#]
|
||||
extra_css = '''
|
||||
.headline{font-family:Verdana,Arial,Helvetica,sans-serif;font-weight:bold;}
|
||||
.bline{color:#003366;}
|
||||
|
27
recipes/app_funds.recipe
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
appfunds.blogspot.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class app_funds(BasicNewsRecipe):
|
||||
title = u'APP Funds'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='Blog inwestora dla inwestorów i oszczędzających'
|
||||
INDEX='http://appfunds.blogspot.com'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
||||
|
@ -12,7 +12,11 @@ class AmericanProspect(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
|
||||
remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
|
||||
#keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
|
||||
#remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
|
||||
|
||||
|
35
recipes/arcadia.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Arcadia_BBS(BasicNewsRecipe):
|
||||
title = u'Arcadia'
|
||||
__author__ = 'Masahiro Hasegawa'
|
||||
language = 'ja'
|
||||
encoding = 'utf8'
|
||||
filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com',]
|
||||
timefmt = '[%Y/%m/%d]'
|
||||
remove_tags_before = dict(name='a', attrs={'name':'kiji'})
|
||||
|
||||
sid_list = [] #some sotory id
|
||||
|
||||
def parse_index(self):
|
||||
result = []
|
||||
for sid in self.sid_list:
|
||||
s_result = []
|
||||
soup = self.index_to_soup(
|
||||
'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d'
|
||||
% sid)
|
||||
sec = soup.findAll('a', attrs={'href':re.compile(r'.*?kiji')})
|
||||
for s in sec[:-2]:
|
||||
s_result.append(dict(title=s.string,
|
||||
url="http://www.mai-net.net" + s['href'],
|
||||
date=s.parent.parent.parent.findAll('td')[3].string[:-6],
|
||||
description='', content=''))
|
||||
result.append((s_result[0]['title'], s_result))
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
@ -31,55 +31,53 @@ class Arcamax(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['comics-header']}),
|
||||
dict(name='b', attrs={'class':['current']}),
|
||||
dict(name='article', attrs={'class':['comic']}),
|
||||
keep_only_tags = [dict(name='article', attrs={'class':['comic']}),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['comicfull' ]}),
|
||||
dict(name='div', attrs={'class':['calendar' ]}),
|
||||
dict(name='nav', attrs={'class':['calendar-nav' ]}),
|
||||
]
|
||||
#remove_tags = [dict(name='div', attrs={'id':['comicfull' ]}),
|
||||
#dict(name='div', attrs={'class':['calendar' ]}),
|
||||
#dict(name='nav', attrs={'class':['calendar-nav' ]}),
|
||||
#]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
######## COMICS - GENERAL ########
|
||||
#(u"9 Chickweed Lane", u"http://www.arcamax.com/ninechickweedlane"),
|
||||
#(u"Agnes", u"http://www.arcamax.com/agnes"),
|
||||
#(u"Andy Capp", u"http://www.arcamax.com/andycapp"),
|
||||
(u"BC", u"http://www.arcamax.com/bc"),
|
||||
#(u"Baby Blues", u"http://www.arcamax.com/babyblues"),
|
||||
#(u"Beetle Bailey", u"http://www.arcamax.com/beetlebailey"),
|
||||
(u"Blondie", u"http://www.arcamax.com/blondie"),
|
||||
#u"Boondocks", u"http://www.arcamax.com/boondocks"),
|
||||
#(u"Cathy", u"http://www.arcamax.com/cathy"),
|
||||
#(u"Daddys Home", u"http://www.arcamax.com/daddyshome"),
|
||||
(u"Dilbert", u"http://www.arcamax.com/dilbert"),
|
||||
#(u"Dinette Set", u"http://www.arcamax.com/thedinetteset"),
|
||||
(u"Dog Eat Doug", u"http://www.arcamax.com/dogeatdoug"),
|
||||
(u"Doonesbury", u"http://www.arcamax.com/doonesbury"),
|
||||
#(u"Dustin", u"http://www.arcamax.com/dustin"),
|
||||
(u"Family Circus", u"http://www.arcamax.com/familycircus"),
|
||||
(u"Garfield", u"http://www.arcamax.com/garfield"),
|
||||
#(u"Get Fuzzy", u"http://www.arcamax.com/getfuzzy"),
|
||||
#(u"Girls and Sports", u"http://www.arcamax.com/girlsandsports"),
|
||||
#(u"Hagar the Horrible", u"http://www.arcamax.com/hagarthehorrible"),
|
||||
#(u"Heathcliff", u"http://www.arcamax.com/heathcliff"),
|
||||
#(u"Jerry King Cartoons", u"http://www.arcamax.com/humorcartoon"),
|
||||
#(u"Luann", u"http://www.arcamax.com/luann"),
|
||||
#(u"Momma", u"http://www.arcamax.com/momma"),
|
||||
#(u"Mother Goose and Grimm", u"http://www.arcamax.com/mothergooseandgrimm"),
|
||||
(u"Mutts", u"http://www.arcamax.com/mutts"),
|
||||
#(u"Non Sequitur", u"http://www.arcamax.com/nonsequitur"),
|
||||
#(u"Pearls Before Swine", u"http://www.arcamax.com/pearlsbeforeswine"),
|
||||
#(u"Pickles", u"http://www.arcamax.com/pickles"),
|
||||
#(u"Red and Rover", u"http://www.arcamax.com/redandrover"),
|
||||
#(u"Rubes", u"http://www.arcamax.com/rubes"),
|
||||
#(u"Rugrats", u"http://www.arcamax.com/rugrats"),
|
||||
(u"Speed Bump", u"http://www.arcamax.com/speedbump"),
|
||||
(u"Wizard of Id", u"http://www.arcamax.com/wizardofid"),
|
||||
(u"Zits", u"http://www.arcamax.com/zits"),
|
||||
#(u"9 Chickweed Lane", #u"http://www.arcamax.com/thefunnies/ninechickweedlane"),
|
||||
#(u"Agnes", u"http://www.arcamax.com/thefunnies/agnes"),
|
||||
#(u"Andy Capp", #u"http://www.arcamax.com/thefunnies/andycapp"),
|
||||
(u"BC", u"http://www.arcamax.com/thefunnies/bc"),
|
||||
#(u"Baby Blues", #u"http://www.arcamax.com/thefunnies/babyblues"),
|
||||
#(u"Beetle Bailey", #u"http://www.arcamax.com/thefunnies/beetlebailey"),
|
||||
(u"Blondie", u"http://www.arcamax.com/thefunnies/blondie"),
|
||||
#u"Boondocks", u"http://www.arcamax.com/thefunnies/boondocks"),
|
||||
#(u"Cathy", u"http://www.arcamax.com/thefunnies/cathy"),
|
||||
#(u"Daddys Home", #u"http://www.arcamax.com/thefunnies/daddyshome"),
|
||||
(u"Dilbert", u"http://www.arcamax.com/thefunnies/dilbert"),
|
||||
#(u"Dinette Set", #u"http://www.arcamax.com/thefunnies/thedinetteset"),
|
||||
(u"Dog Eat Doug", u"http://www.arcamax.com/thefunnies/dogeatdoug"),
|
||||
(u"Doonesbury", u"http://www.arcamax.com/thefunnies/doonesbury"),
|
||||
#(u"Dustin", u"http://www.arcamax.com/thefunnies/dustin"),
|
||||
(u"Family Circus", u"http://www.arcamax.com/thefunnies/familycircus"),
|
||||
(u"Garfield", u"http://www.arcamax.com/thefunnies/garfield"),
|
||||
#(u"Get Fuzzy", #u"http://www.arcamax.com/thefunnies/getfuzzy"),
|
||||
#(u"Girls and Sports", #u"http://www.arcamax.com/thefunnies/girlsandsports"),
|
||||
#(u"Hagar the Horrible", #u"http://www.arcamax.com/thefunnies/hagarthehorrible"),
|
||||
#(u"Heathcliff", #u"http://www.arcamax.com/thefunnies/heathcliff"),
|
||||
#(u"Jerry King Cartoons", #u"http://www.arcamax.com/thefunnies/humorcartoon"),
|
||||
#(u"Luann", u"http://www.arcamax.com/thefunnies/luann"),
|
||||
#(u"Momma", u"http://www.arcamax.com/thefunnies/momma"),
|
||||
#(u"Mother Goose and Grimm", #u"http://www.arcamax.com/thefunnies/mothergooseandgrimm"),
|
||||
(u"Mutts", u"http://www.arcamax.com/thefunnies/mutts"),
|
||||
#(u"Non Sequitur", #u"http://www.arcamax.com/thefunnies/nonsequitur"),
|
||||
#(u"Pearls Before Swine", #u"http://www.arcamax.com/thefunnies/pearlsbeforeswine"),
|
||||
#(u"Pickles", u"http://www.arcamax.com/thefunnies/pickles"),
|
||||
#(u"Red and Rover", #u"http://www.arcamax.com/thefunnies/redandrover"),
|
||||
#(u"Rubes", u"http://www.arcamax.com/thefunnies/rubes"),
|
||||
#(u"Rugrats", u"http://www.arcamax.com/thefunnies/rugrats"),
|
||||
(u"Speed Bump", u"http://www.arcamax.com/thefunnies/speedbump"),
|
||||
(u"Wizard of Id", u"http://www.arcamax.com/thefunnies/wizardofid"),
|
||||
(u"Zits", u"http://www.arcamax.com/thefunnies/zits"),
|
||||
]:
|
||||
articles = self.make_links(url)
|
||||
if articles:
|
||||
@ -93,11 +91,11 @@ class Arcamax(BasicNewsRecipe):
|
||||
for page in pages:
|
||||
page_soup = self.index_to_soup(url)
|
||||
if page_soup:
|
||||
title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
|
||||
title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'columnheader'}).h1.contents[0])
|
||||
page_url = url
|
||||
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
|
||||
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
|
||||
date = self.tag_to_string(page_soup.find(name='b', attrs={'class':['current']}))
|
||||
prev_page_url = 'http://www.arcamax.com' + page_soup.find(name='a', attrs={'class':['prev']})['href']
|
||||
date = self.tag_to_string(page_soup.find(name='span', attrs={'class':['cur']}))
|
||||
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date': date})
|
||||
url = prev_page_url
|
||||
current_articles.reverse()
|
||||
@ -126,4 +124,5 @@ class Arcamax(BasicNewsRecipe):
|
||||
img {max-width:100%; min-width:100%;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
'''
|
||||
|
||||
|
@ -38,8 +38,10 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
self.timefmt = ' [%s]'%ds
|
||||
|
||||
cover = soup.find('img', src=True, attrs={'class':'cover'})
|
||||
|
||||
if cover is not None:
|
||||
self.cover_url = cover['src']
|
||||
self.cover_url = re.sub('\s','%20',re.sub('jpg.*','jpg',cover['src']))
|
||||
self.log(self.cover_url)
|
||||
|
||||
feeds = []
|
||||
seen_titles = set([])
|
||||
@ -47,18 +49,16 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
section_title = self.tag_to_string(section.find('h2'))
|
||||
self.log('Found section:', section_title)
|
||||
articles = []
|
||||
for post in section.findAll('div', attrs={'class':lambda x : x and
|
||||
'post' in x}):
|
||||
h = post.find(['h3', 'h4'])
|
||||
title = self.tag_to_string(h)
|
||||
for post in section.findAll('h3', attrs={'class':'headline'}):
|
||||
a = post.find('a', href=True)
|
||||
title = self.tag_to_string(a)
|
||||
if title in seen_titles:
|
||||
continue
|
||||
seen_titles.add(title)
|
||||
a = post.find('a', href=True)
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com'+url
|
||||
p = post.find('p', attrs={'class':'dek'})
|
||||
p = post.parent.find('p', attrs={'class':'dek'})
|
||||
desc = None
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
if p is not None:
|
||||
@ -69,19 +69,29 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
|
||||
poems = []
|
||||
self.log('Found section: Poems')
|
||||
pd = soup.find('h2', text='Poetry').parent.parent
|
||||
for poem in pd.findAll('h4'):
|
||||
title = self.tag_to_string(poem)
|
||||
url = poem.find('a')['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com' + url
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
poems.append({'title':title, 'url':url, 'description':'',
|
||||
'date':''})
|
||||
if poems:
|
||||
feeds.append(('Poems', poems))
|
||||
rightContent=soup.find('div', attrs = {'class':'rightContent'})
|
||||
for module in rightContent.findAll('div', attrs={'class':'module'}):
|
||||
section_title = self.tag_to_string(module.find('h2'))
|
||||
articles = []
|
||||
for post in module.findAll('div', attrs={'class':'post'}):
|
||||
a = post.find('a', href=True)
|
||||
title = self.tag_to_string(a)
|
||||
if title in seen_titles:
|
||||
continue
|
||||
seen_titles.add(title)
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com'+url
|
||||
p = post.parent.find('p', attrs={'class':'dek'})
|
||||
desc = None
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\t\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
|
||||
|
||||
return feeds
|
||||
|
||||
@ -100,4 +110,3 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
table.replaceWith(div)
|
||||
|
||||
return soup
|
||||
|
||||
|
30
recipes/autosport.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
|
||||
'''
|
||||
www.autosport.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class autosport(BasicNewsRecipe):
|
||||
title = u'Autosport'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'en_GB'
|
||||
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
|
||||
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
|
||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
|
||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
|
||||
keep_only_tags.append(dict(name = 'p'))
|
||||
|
||||
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
|
@ -1,45 +1,37 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = 'Original 2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__copyright__= 'Modified 2011, Josh Hall <jwtheiv@gmail.com>'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__copyright__ = '2012 Josh Hall<jwtheiv@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
www.baltimoresun.com
|
||||
'''
|
||||
|
||||
import urllib, re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BaltimoreSun(BasicNewsRecipe):
|
||||
|
||||
title = 'The Baltimore Sun'
|
||||
__author__ = 'Josh Hall'
|
||||
description = 'Politics, local and business news from Baltimore'
|
||||
language = 'en'
|
||||
description = 'Complete local news and blogs from Baltimore'
|
||||
language = 'en'
|
||||
version = 2.1
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
#masthead_url = 'http://www.baltimoresun.com/images/thirdpartylogo.gif'
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class':['story', 'entry']})
|
||||
remove_tags_after = [
|
||||
{'class':['photo_article',]},
|
||||
dict(name='div', attrs={'class':'shirttail-promo right clearfix'}),
|
||||
]
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 1
|
||||
|
||||
ignore_duplicate_articles = {'title'}
|
||||
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
|
||||
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
|
||||
]
|
||||
remove_tags_after = [{'class':['photo_article',]}]
|
||||
|
||||
match_regexps = [r'page=[0-9]+']
|
||||
|
||||
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer","article-promo"]},
|
||||
{'class':["entry-footer-left","entry-footer-right","shirttail-promo right clearfix","clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent","toppaginate","module","module-header","module-content"]},
|
||||
dict(name='font',attrs={'id':["cr-other-headlines"]}),
|
||||
dict(name=['iframe']),
|
||||
]
|
||||
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
|
||||
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
|
||||
dict(name='font',attrs={'id':["cr-other-headlines"]})]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
@ -53,8 +45,9 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
'''
|
||||
feeds = [
|
||||
## News ##
|
||||
(u'Top Headlines', u'http://www.baltimoresun.com/rss2.0.xml'),
|
||||
(u'Breaking News', u'http://www.baltimoresun.com/news/breaking/rss2.0.xml'),
|
||||
(u'Top Maryland', u'http://www.baltimoresun.com/news/maryland/rss2.0.xml'),
|
||||
@ -69,10 +62,10 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
(u'Local Politics', u'http://www.baltimoresun.com/news/maryland/politics/rss2.0.xml'),
|
||||
(u'Weather', u'http://www.baltimoresun.com/news/weather/rss2.0.xml'),
|
||||
#(u'Traffic', u'http://www.baltimoresun.com/features/commuting/rss2.0.xml'),
|
||||
(u'Nation/world', u'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
|
||||
(u'Nation/world', u'http://feeds.feedburner.com/baltimoresun/news/nationworld/rss2'),
|
||||
(u'Weird News', u'http://www.baltimoresun.com/news/offbeat/rss2.0.xml'),
|
||||
|
||||
|
||||
##Sports##
|
||||
(u'Top Sports', u'http://www.baltimoresun.com/sports/rss2.0.xml'),
|
||||
(u'Orioles/Baseball', u'http://www.baltimoresun.com/sports/orioles/rss2.0.xml'),
|
||||
(u'Ravens/Football', u'http://www.baltimoresun.com/sports/ravens/rss2.0.xml'),
|
||||
@ -85,6 +78,7 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
#(u'High School', u'http://www.baltimoresun.com/sports/high-school/rss2.0.xml'),
|
||||
#(u'Outdoors', u'http://www.baltimoresun.com/sports/outdoors/rss2.0.xml'),
|
||||
|
||||
## Entertainment ##
|
||||
(u'Celebrity News', u'http://www.baltimoresun.com/entertainment/celebrities/rss2.0.xml'),
|
||||
(u'Arts & Theater', u'http://www.baltimoresun.com/entertainment/arts/rss2.0.xml'),
|
||||
(u'Movies', u'http://www.baltimoresun.com/entertainment/movies/rss2.0.xml'),
|
||||
@ -92,14 +86,16 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
(u'Restaurants & Food', u'http://www.baltimoresun.com/entertainment/dining/rss2.0.xml'),
|
||||
(u'TV/Media', u'http://www.baltimoresun.com/entertainment/tv/rss2.0.xml'),
|
||||
|
||||
## Life ##
|
||||
(u'Health&Wellness', u'http://www.baltimoresun.com/health/rss2.0.xml'),
|
||||
(u'Home & Garden', u'http://www.baltimoresun.com/features/home-garden/rss2.0.xml'),
|
||||
(u'Living Green', u'http://www.baltimoresun.com/features/green/rss2.0.xml'),
|
||||
(u'Parenting', u'http://www.baltimoresun.com/features/parenting/rss2.0.xml'),
|
||||
(u'Fashion', u'http://www.baltimoresun.com/features/fashion/rss2.0.xml'),
|
||||
(u'Travel', u'http://www.baltimoresun.com/travel/rss2.0.xml'),
|
||||
(u'Faith', u'http://www.baltimoresun.com/features/faith/rss2.0.xml'),
|
||||
#(u'Faith', u'http://www.baltimoresun.com/features/faith/rss2.0.xml'),
|
||||
|
||||
## Business ##
|
||||
(u'Top Business', u'http://www.baltimoresun.com/business/rss2.0.xml'),
|
||||
(u'Technology', u'http://www.baltimoresun.com/business/technology/rss2.0.xml'),
|
||||
(u'Personal finance', u'http://www.baltimoresun.com/business/money/rss2.0.xml'),
|
||||
@ -109,12 +105,14 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
(u'Consumer Safety', u'http://www.baltimoresun.com/business/consumer-safety/rss2.0.xml'),
|
||||
(u'Investing', u'http://www.baltimoresun.com/business/money/rss2.0.xml'),
|
||||
|
||||
## Opinion##
|
||||
(u'Sun Editorials', u'http://www.baltimoresun.com/news/opinion/editorial/rss2.0.xml'),
|
||||
(u'Op/Ed', u'http://www.baltimoresun.com/news/opinion/oped/rss2.0.xml'),
|
||||
(u'Readers Respond', u'http://www.baltimoresun.com/news/opinion/readersrespond/'),
|
||||
|
||||
(u'Kevin Cowherd', 'http://www.baltimoresun.com/sports/bal-columnist-cowherd,0,6829726.columnist-rss2.0.xml'),
|
||||
(u'Jay Hancock', u'http://www.baltimoresun.com/business/money/bal-columnist-hancock,0,6673611.columnist-rss2.0.xml'),
|
||||
## Columnists ##
|
||||
(u'Kevin Cowherd', u'http://www.baltimoresun.com/sports/bal-columnist-cowherd,0,6829726.columnist-rss2.0.xml'),
|
||||
(u'Robert Ehrlich', u'http://www.baltimoresun.com/news/opinion/columnists/bal-columnist-ehrlich,0,1825227.columnist-rss2.0.xml'),
|
||||
(u'Jacques Kelly', u'http://www.baltimoresun.com/news/maryland/bal-columnist-kelly,0,1154701.columnist-rss2.0.xml'),
|
||||
(u'Marta H. Mossburg', u'http://www.baltimoresun.com/news/opinion/oped/bal-columnist-mossburg,0,7982155.columnist-rss2.0.xml'),
|
||||
(u'Mike Preston', u'http://www.baltimoresun.com/sports/bal-columnist-preston,0,6169796.columnist-rss2.0.xml'),
|
||||
@ -122,59 +120,80 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
(u'Dan Rodricks', u'http://www.baltimoresun.com/news/maryland/bal-columnist-rodricks,0,7089843.columnist-rss2.0.xml'),
|
||||
(u'Thomas F. Schaller', u'http://www.baltimoresun.com/news/opinion/columnists/bal-columnist-schaller,0,897397.columnist-rss2.0.xml'),
|
||||
(u'Peter Schmuck', u'http://www.baltimoresun.com/sports/bal-columnist-schmuck,0,7485088.columnist-rss2.0.xml'),
|
||||
(u'Ron Smith', u'http://www.baltimoresun.com/news/opinion/bal-columnist-ronsmith,0,3964803.columnist-rss2.0.xml'),
|
||||
|
||||
(u'Baltimore Crime Beat', u'http://weblogs.baltimoresun.com/news/crime/blog/index.xml'),
|
||||
(u'Getting There', u'http://weblogs.baltimoresun.com/news/traffic/index.xml'),
|
||||
(u'InsideEd', u'http://weblogs.baltimoresun.com/news/education/blog/index.xml'),
|
||||
(u'Maryland Politics', u'http://weblogs.baltimoresun.com/news/local/politics/index.xml'),
|
||||
(u'Maryland Weather', u'http://weblogs.marylandweather.com/index.xml'),
|
||||
(u'Second Opinion', u'http://weblogs.baltimoresun.com/news/opinion/index.xml'),
|
||||
(u'You Dont Say', u'http://weblogs.baltimoresun.com/news/mcintyre/blog/index.xml'),
|
||||
## News Blogs ##
|
||||
(u'Baltimore Crime Beat', u'http://baltimore.feedsportal.com/c/34255/f/623075/index.rss'),
|
||||
(u'InsideEd', u'http://www.baltimoresun.com/news/maryland/education/blog/rss2.0.xml'),
|
||||
(u'Maryland Politics', u'http://www.baltimoresun.com/news/maryland/politics/blog/rss2.0.xml'),
|
||||
(u'Maryland Weather', u'http://www.baltimoresun.com/news/weather/weather-blog/rss2.0.xml'),
|
||||
(u'Second Opinion', u'http://www.baltimoresun.com/news/opinion/second-opinion-blog/rss2.0.xml'),
|
||||
(u'Sun Investigates', u'http://www.baltimoresun.com/news/maryland/sun-investigates/rss2.0.xml'),
|
||||
(u'You Dont Say', u'http://www.baltimoresun.com/news/language-blog/rss2.0.xml'),
|
||||
|
||||
(u'BaltTech', u'http://weblogs.baltimoresun.com/news/technology/index.xml'),
|
||||
(u'Consuming Interests', u'http://weblogs.baltimoresun.com/business/consuminginterests/blog/index.xml'),
|
||||
(u'Jay Hancocks Blog', u'http://weblogs.baltimoresun.com/business/hancock/blog/index.xml'),
|
||||
(u'The Real Estate Wonk', u'http://weblogs.baltimoresun.com/business/realestate/blog/index.xml'),
|
||||
## Business Blogs ##
|
||||
(u'BaltTech', u'http://www.baltimoresun.com/business/technology/blog/rss2.0.xml'),
|
||||
(u'Consuming Interests', u'http://www.baltimoresun.com/business/consuming-interests-blog/rss2.0.xml'),
|
||||
(u'The Real Estate Wonk', u'http://www.baltimoresun.com/business/real-estate/wonk/rss2.0.xml'),
|
||||
|
||||
(u'Clef Notes', 'http://weblogs.baltimoresun.com/entertainment/classicalmusic/index.xml'),
|
||||
(u'Dining at Large', u'http://weblogs.baltimoresun.com/entertainment/dining/reviews/blog/index.xml'),
|
||||
(u'Midnight Sun', u'http://weblogs.baltimoresun.com/entertainment/midnight_sun/blog/index.xml'),
|
||||
(u'Mike Sragow Gets Reel', u'http://weblogs.baltimoresun.com/entertainment/movies/blog/index.xml'),
|
||||
(u'Read Street', u'http://weblogs.baltimoresun.com/entertainment/books/blog/index.xml'),
|
||||
(u'Reality Check', u'http://weblogs.baltimoresun.com/entertainment/realitycheck/blog/index.xml'),
|
||||
(u'Z on TV', u'http://weblogs.baltimoresun.com/entertainment/zontv/index.xml'),
|
||||
## Entertainment Blogs ##
|
||||
(u'Clef Notes & Drama Queens', 'http://weblogs.baltimoresun.com/entertainment/classicalmusic/index.xml'),
|
||||
(u'Baltimore Diner', u'http://baltimore.feedsportal.com/c/34255/f/623088/index.rss'),
|
||||
(u'Midnight Sun', u'http://www.baltimoresun.com/entertainment/music/midnight-sun-blog/rss2.0.xml'),
|
||||
(u'Read Street', u'http://www.baltimoresun.com/features/books/read-street/rss2.0.xml'),
|
||||
(u'Z on TV', u'http://www.baltimoresun.com/entertainment/tv/z-on-tv-blog/rss2.0.xml'),
|
||||
|
||||
## Life Blogs ##
|
||||
(u'BMore Green', u'http://weblogs.baltimoresun.com/features/green/index.xml'),
|
||||
(u'Charm City Moms', u'http://weblogs.baltimoresun.com/features/baltimoremomblog/index.xml'),
|
||||
(u'Exercists', u'http://weblogs.baltimoresun.com/health/fitness/index.xml'),
|
||||
(u'Garden Variety', 'http://weblogs.baltimoresun.com/features/gardening/index.xml'),
|
||||
#(u'In Good Faith', u'http://weblogs.baltimoresun.com/news/faith/index.xml'),
|
||||
(u'Picture of Health', u'http://weblogs.baltimoresun.com/health/index.xml'),
|
||||
(u'Baltimore Insider',u'http://www.baltimoresun.com/features/baltimore-insider-blog/rss2.0.xml'),
|
||||
(u'Homefront', u'http://www.baltimoresun.com/features/parenting/homefront/rss2.0.xml'),
|
||||
(u'Picture of Health', u'http://www.baltimoresun.com/health/blog/rss2.0.xml'),
|
||||
(u'Unleashed', u'http://weblogs.baltimoresun.com/features/mutts/blog/index.xml'),
|
||||
|
||||
## b the site blogs ##
|
||||
(u'Game Cache', u'http://www.baltimoresun.com/entertainment/bthesite/game-cache/rss2.0.xml'),
|
||||
(u'TV Lust', u'http://www.baltimoresun.com/entertainment/bthesite/tv-lust/rss2.0.xml'),
|
||||
|
||||
## Sports Blogs ##
|
||||
(u'Baltimore Sports Blitz', u'http://baltimore.feedsportal.com/c/34255/f/623097/index.rss'),
|
||||
#(u'Faceoff', u'http://weblogs.baltimoresun.com/sports/lacrosse/blog/index.xml'),
|
||||
#(u'MMA Stomping Grounds', u'http://weblogs.baltimoresun.com/sports/mma/blog/index.xml'),
|
||||
(u'Orioles Insider', u'http://weblogs.baltimoresun.com/sports/orioles/blog/index.xml'),
|
||||
#(u'Outdoors Girl', u'http://weblogs.baltimoresun.com/sports/outdoors/blog/index.xml'),
|
||||
(u'Ravens Insider', u'http://weblogs.baltimoresun.com/sports/ravens/blog/index.xml'),
|
||||
(u'Orioles Insider', u'http://baltimore.feedsportal.com/c/34255/f/623100/index.rss'),
|
||||
(u'Ravens Insider', u'http://www.baltimoresun.com/sports/ravens/ravens-insider/rss2.0.xml'),
|
||||
#(u'Recruiting Report', u'http://weblogs.baltimoresun.com/sports/college/recruiting/index.xml'),
|
||||
#(u'Ring Posts', u'http://weblogs.baltimoresun.com/sports/wrestling/blog/index.xml'),
|
||||
(u'The Schmuck Stops Here', u'http://weblogs.baltimoresun.com/sports/schmuck/index.xml'),
|
||||
(u'Toy Department', u'http://weblogs.baltimoresun.com/sports/thetoydepartment/index.xml'),
|
||||
(u'The Schmuck Stops Here', u'http://www.baltimoresun.com/sports/schmuck-blog/rss2.0.xml'),
|
||||
#(u'Tracking the Terps', u'http://weblogs.baltimoresun.com/sports/college/maryland_terps/blog/index.xml'),
|
||||
#(u'Varsity Letters', u'http://weblogs.baltimoresun.com/sports/highschool/varsityletters/index.xml'),
|
||||
(u'Virtual Vensanity', u'http://weblogs.baltimoresun.com/entertainment/bthesite/vensel/index.xml'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
ans = None
|
||||
try:
|
||||
s = article.summary
|
||||
ans = urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
if ans is None:
|
||||
ans = article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
if ans is not None:
|
||||
return ans.replace('?track=rss', '')
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
text = soup.find(text='click here to continue to article')
|
||||
if text:
|
||||
a = text.parent
|
||||
url = a.get('href')
|
||||
if url:
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
# Remove the navigation bar. It was kept until now to be able to follow
|
||||
# the links to further pages. But now we don't need them anymore.
|
||||
for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
|
||||
nav.extract()
|
||||
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
|
||||
|
50
recipes/bankier_pl.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
bankier.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class bankier(BasicNewsRecipe):
|
||||
title = u'Bankier.pl'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
|
||||
masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
|
||||
INDEX='http://bankier.pl/'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
simultaneous_downloads = 5
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
|
||||
remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
|
||||
#remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
|
||||
#remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
|
||||
|
||||
feeds = [
|
||||
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
|
||||
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
|
||||
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
|
||||
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
|
||||
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
|
||||
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
|
||||
]
|
||||
def print_version(self, url):
|
||||
segment = url.split('.')
|
||||
urlPart = segment[2]
|
||||
segments = urlPart.split('-')
|
||||
urlPart2 = segments[-1]
|
||||
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
|
||||
|
594
recipes/bbc_brasil.recipe
Normal file
@ -0,0 +1,594 @@
|
||||
##
|
||||
## Title: BBC News, Sport, and Blog Calibre Recipe
|
||||
## Contact: mattst - jmstanfield@gmail.com
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: mattst - jmstanfield@gmail.com
|
||||
##
|
||||
## Written: November 2011
|
||||
## Last Edited: 2011-11-19
|
||||
##
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
__copyright__ = 'mattst - jmstanfield@gmail.com'
|
||||
|
||||
|
||||
'''
|
||||
BBC News, Sport, and Blog Calibre Recipe
|
||||
'''
|
||||
|
||||
# Import the regular expressions module.
|
||||
import re
|
||||
|
||||
# Import the BasicNewsRecipe class which this class extends.
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class BBCBrasilRecipe(BasicNewsRecipe):
|
||||
|
||||
#
|
||||
# **** IMPORTANT USERS READ ME ****
|
||||
#
|
||||
# First select the feeds you want then scroll down below the feeds list
|
||||
# and select the values you want for the other user preferences, like
|
||||
# oldest_article and such like.
|
||||
#
|
||||
#
|
||||
# Select the BBC rss feeds which you want in your ebook.
|
||||
# Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
|
||||
#
|
||||
# Eg. ("News Home", "http://feeds.bbci.co.uk/... - include feed.
|
||||
# Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
|
||||
#
|
||||
# There are 68 feeds below which constitute the bulk of the available rss
|
||||
# feeds on the BBC web site. These include 5 blogs by editors and
|
||||
# correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
|
||||
# Wales, Scotland Business), and 7 Welsh language feeds.
|
||||
#
|
||||
# Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
|
||||
# so if "oldest_article = 1.5" (only articles published in the last 36 hours)
|
||||
# you may get some 'empty feeds' which will not then be included in the ebook.
|
||||
#
|
||||
# The 15 feeds currently selected below are simply my default ones.
|
||||
#
|
||||
# Note: With all 68 feeds selected, oldest_article set to 2,
|
||||
# max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
|
||||
# the ebook creation took 29 minutes on my speedy 100 mbps net connection,
|
||||
# fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
|
||||
# More realistically with 15 feeds selected, oldest_article set to 1.5,
|
||||
# max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
|
||||
# it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
|
||||
#
|
||||
# Select / de-select the feeds you want in your ebook.
|
||||
#
|
||||
feeds = [
|
||||
(u'Primeira P\xe1gina', u'http://www.bbc.co.uk/portuguese/index.xml'),
|
||||
(u'\xdaltimas Not\xedcias', u'http://www.bbc.co.uk/portuguese/ultimas_noticias/index.xml'),
|
||||
(u'Internacional', u'http://www.bbc.co.uk/portuguese/topicos/internacional/index.xml'),
|
||||
(u'Brasil', u'http://www.bbc.co.uk/portuguese/topicos/brasil/index.xml'),
|
||||
(u'Am\xe9rica Latina', u'http://www.bbc.co.uk/portuguese/topicos/america_latina/index.xml'),
|
||||
(u'Economia', u'http://www.bbc.co.uk/portuguese/topicos/economia/index.xml'),
|
||||
(u'Sa\xfade', u'http://www.bbc.co.uk/portuguese/topicos/saude/index.xml'),
|
||||
(u'Ci\xeancia e Tecnologia', u'http://www.bbc.co.uk/portuguese/topicos/ciencia_e_tecnologia/index.xml'),
|
||||
(u'Cultura', u'http://www.bbc.co.uk/portuguese/topicos/cultura/index.xml'),
|
||||
(u'V\xeddeos e Fotos', u'http://www.bbc.co.uk/portuguese/videos_e_fotos/index.xml'),
|
||||
(u'Especiais', u'http://www.bbc.co.uk/portuguese/especiais/index.xml')
|
||||
]
|
||||
|
||||
|
||||
# **** SELECT YOUR USER PREFERENCES ****
|
||||
|
||||
# Title to use for the ebook.
|
||||
#
|
||||
title = 'BBC Brasil'
|
||||
|
||||
# A brief description for the ebook.
|
||||
#
|
||||
description = u'Not\xedcias do Brasil e do mundo pela British Broadcasting Corporation'
|
||||
|
||||
# The max number of articles which may be downloaded from each feed.
|
||||
# I've never seen more than about 70 articles in a single feed in the
|
||||
# BBC feeds.
|
||||
#
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# The max age of articles which may be downloaded from each feed. This is
|
||||
# specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
|
||||
# half days). My default of 1.5 days is the last 36 hours, the point at
|
||||
# which I've decided 'news' becomes 'old news', but be warned this is not
|
||||
# so good for the blogs, technology, magazine, etc., and sports feeds.
|
||||
# You may wish to extend this to 2-5 but watch out ebook creation time will
|
||||
# increase as well. Setting this to 30 will get everything (AFAICT) as long
|
||||
# as max_articles_per_feed remains set high (except for 'Click' which is
|
||||
# v. low volume and its currently oldest article is 4th Feb 2011).
|
||||
#
|
||||
oldest_article = 1.5
|
||||
|
||||
# Number of simultaneous downloads. 20 is consistantly working fine on the
|
||||
# BBC News feeds with no problems. Speeds things up from the defualt of 5.
|
||||
# If you have a lot of feeds and/or have increased oldest_article above 2
|
||||
# then you may wish to try increasing simultaneous_downloads to 25-30,
|
||||
# Or, of course, if you are in a hurry. [I've not tried beyond 20.]
|
||||
#
|
||||
simultaneous_downloads = 20
|
||||
|
||||
# Timeout for fetching files from the server in seconds. The default of
|
||||
# 120 seconds, seems somewhat excessive.
|
||||
#
|
||||
timeout = 30
|
||||
|
||||
# The format string for the date shown on the ebook's first page.
|
||||
# List of all values: http://docs.python.org/library/time.html
|
||||
# Default in news.py has a leading space so that's mirrored here.
|
||||
# As with 'feeds' select/de-select by adding/removing the initial '#',
|
||||
# only one timefmt should be selected, here's a few to choose from.
|
||||
#
|
||||
timefmt = ' [%a, %d %b %Y]' # [Fri, 14 Nov 2011] (Calibre default)
|
||||
#timefmt = ' [%a, %d %b %Y %H:%M]' # [Fri, 14 Nov 2011 18:30]
|
||||
#timefmt = ' [%a, %d %b %Y %I:%M %p]' # [Fri, 14 Nov 2011 06:30 PM]
|
||||
#timefmt = ' [%d %b %Y]' # [14 Nov 2011]
|
||||
#timefmt = ' [%d %b %Y %H:%M]' # [14 Nov 2011 18.30]
|
||||
#timefmt = ' [%Y-%m-%d]' # [2011-11-14]
|
||||
#timefmt = ' [%Y-%m-%d-%H-%M]' # [2011-11-14-18-30]
|
||||
|
||||
|
||||
|
||||
#
|
||||
# **** IMPORTANT ****
|
||||
#
|
||||
# DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
|
||||
#
|
||||
# DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
|
||||
#
|
||||
# I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
|
||||
#
|
||||
# **** IMPORTANT ****
|
||||
#
|
||||
|
||||
|
||||
|
||||
# Author of this recipe.
|
||||
__author__ = 'Carlos Laviola'
|
||||
|
||||
language = 'pt_BR'
|
||||
|
||||
# Set tags.
|
||||
tags = 'news, sport, blog'
|
||||
|
||||
# Set publisher and publication type.
|
||||
publisher = 'BBC'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
# Disable stylesheets from site.
|
||||
no_stylesheets = True
|
||||
|
||||
# Specifies an override encoding for sites that have an incorrect charset
|
||||
# specified. Default of 'None' says to auto-detect. Some other BBC recipes
|
||||
# use 'utf8', which works fine (so use that if necessary) but auto-detecting
|
||||
# with None is working fine, so stick with that for robustness.
|
||||
encoding = None
|
||||
|
||||
# Sets whether a feed has full articles embedded in it. The BBC feeds do not.
|
||||
use_embedded_content = False
|
||||
|
||||
# Removes empty feeds - why keep them!?
|
||||
remove_empty_feeds = True
|
||||
|
||||
# Create a custom title which fits nicely in the Kindle title list.
|
||||
# Requires "import time" above class declaration, and replacing
|
||||
# title with custom_title in conversion_options (right column only).
|
||||
# Example of string below: "BBC News - 14 Nov 2011"
|
||||
#
|
||||
# custom_title = "BBC News - " + time.strftime('%d %b %Y')
|
||||
|
||||
'''
|
||||
# Conversion options for advanced users, but don't forget to comment out the
|
||||
# current conversion_options below. Avoid setting 'linearize_tables' as that
|
||||
# plays havoc with the 'old style' table based pages.
|
||||
#
|
||||
conversion_options = { 'title' : title,
|
||||
'comments' : description,
|
||||
'tags' : tags,
|
||||
'language' : language,
|
||||
'publisher' : publisher,
|
||||
'authors' : publisher,
|
||||
'smarten_punctuation' : True
|
||||
}
|
||||
'''
|
||||
|
||||
conversion_options = { 'smarten_punctuation' : True }
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \
|
||||
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published, .datestamp { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
||||
h2 { text-align: center; font-size: 150%; font-weight: bold; } \
|
||||
h3 { text-align: center; font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove various tag attributes to improve the look of the ebook pages.
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
|
||||
# cause a section of the ebook to start in an unsightly fashion or, more
|
||||
# frequently, a "<br />" will muck up the formatting of a correspondant's byline.
|
||||
# "<br />" and "<br clear/>" are far more frequently used on the table formatted
|
||||
# style of pages, and really spoil the look of the ebook pages.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
|
||||
|
||||
|
||||
# Create regular expressions for tag keeping and removal to make the matches more
|
||||
# robust against minor changes and errors in the HTML, Eg. double spaces, leading
|
||||
# and trailing spaces, missing hyphens, and such like.
|
||||
# Python regular expression ('re' class) page: http://docs.python.org/library/re.html
|
||||
|
||||
# ***************************************
|
||||
# Regular expressions for keep_only_tags:
|
||||
# ***************************************
|
||||
|
||||
# The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
|
||||
# page which contains the main text of the article. Match storybody variants: 'storybody',
|
||||
# 'story-body', 'story body','storybody ', etc.
|
||||
storybody_reg_exp = '^.*story[_ -]*body.*$'
|
||||
|
||||
# The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
|
||||
# and published date. This is one level above the usual news pages which have the title
|
||||
# and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
|
||||
# resulting in a lot of extra things to be removed by remove_tags.
|
||||
blq_content_reg_exp = '^.*blq[_ -]*content.*$'
|
||||
|
||||
# The BBC has an alternative page design structure, which I suspect is an out-of-date
|
||||
# design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
|
||||
# (travel), and in some sport pages. These alternative pages are table based (which is
|
||||
# why I think they are an out-of-date design) and account for -I'm guesstimaking- less
|
||||
# than 1% of all articles. They use a table class 'storycontent' to hold the article
|
||||
# and like blq_content (above) have required lots of extra removal by remove_tags.
|
||||
story_content_reg_exp = '^.*story[_ -]*content.*$'
|
||||
|
||||
# Keep the sections of the HTML which match the list below. The HTML page created by
|
||||
# Calibre will fill <body> with those sections which are matched. Note that the
|
||||
# blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
|
||||
# it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
|
||||
# will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
|
||||
# all). If they are the other way around in keep_only_tags then blq_content_reg_exp
|
||||
# will end up being discarded.
|
||||
keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
|
||||
|
||||
# ************************************
|
||||
# Regular expressions for remove_tags:
|
||||
# ************************************
|
||||
|
||||
# Regular expression to remove share-help and variant tags. The share-help class
|
||||
# is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
|
||||
# twitter, email. Removed to avoid page clutter.
|
||||
share_help_reg_exp = '^.*share[_ -]*help.*$'
|
||||
|
||||
# Regular expression to remove embedded-hyper and variant tags. This class is used to
|
||||
# display links to other BBC News articles on the same/similar subject.
|
||||
embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
|
||||
|
||||
# Regular expression to remove hypertabs and variant tags. This class is used to
|
||||
# display a tab bar at the top of an article which allows the user to switch to
|
||||
# an article (viewed on the same page) providing further info., 'in depth' analysis,
|
||||
# an editorial, a correspondant's blog entry, and such like. The ability to handle
|
||||
# a tab bar of this nature is currently beyond the scope of this recipe and
|
||||
# possibly of Calibre itself (not sure about that - TO DO - check!).
|
||||
hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
|
||||
|
||||
# Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
|
||||
# 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
|
||||
# This class is used to add additional info. boxes, or small lists, outside of
|
||||
# the main story. TO DO: Work out a way to incorporate these neatly.
|
||||
story_feature_reg_exp = '^.*story[_ -]*feature.*$'
|
||||
|
||||
# Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
|
||||
# 'videoInStoryC'. This class is used to embed video.
|
||||
video_reg_exp = '^.*video.*$'
|
||||
|
||||
# Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
|
||||
# This class is used to embed audio.
|
||||
audio_reg_exp = '^.*audio.*$'
|
||||
|
||||
# Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
|
||||
# This class is used to embed a photo slideshow. See also 'slideshow' below.
|
||||
picture_gallery_reg_exp = '^.*picture.*$'
|
||||
|
||||
# Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
|
||||
# This class is used to embed a slideshow (not necessarily photo) but both
|
||||
# 'slideshow' and 'pictureGallery' are used for slideshows.
|
||||
slideshow_reg_exp = '^.*slide[_ -]*show.*$'
|
||||
|
||||
# Regular expression to remove social-links and variant tags. This class is used to
|
||||
# display links to a BBC bloggers main page, used in various columnist's blogs
|
||||
# (Eg. Nick Robinson, Robert Preston).
|
||||
social_links_reg_exp = '^.*social[_ -]*links.*$'
|
||||
|
||||
# Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
|
||||
# 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
|
||||
# removed by 'story-feature' removal (as they are usually within them), but
|
||||
# not always. The quotation removed is always (AFAICT) in the article text
|
||||
# as well but a 2nd copy is placed in a quote tag to draw attention to it.
|
||||
# The quote class tags may or may not appear in div's.
|
||||
quote_reg_exp = '^.*quote.*$'
|
||||
|
||||
# Regular expression to remove hidden and variant tags, Eg. 'hidden'.
|
||||
# The purpose of these is unclear, they seem to be an internal link to a
|
||||
# section within the article, but the text of the link (Eg. 'Continue reading
|
||||
# the main story') never seems to be displayed anyway. Removed to avoid clutter.
|
||||
# The hidden class tags may or may not appear in div's.
|
||||
hidden_reg_exp = '^.*hidden.*$'
|
||||
|
||||
# Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
|
||||
# Used on the site to display text about registered users entering comments.
|
||||
comment_reg_exp = '^.*comment.*$'
|
||||
|
||||
# Regular expression to remove form and variant tags, Eg. 'comment-form'.
|
||||
# Used on the site to allow registered BBC users to fill in forms, typically
|
||||
# for entering comments about an article.
|
||||
form_reg_exp = '^.*form.*$'
|
||||
|
||||
# Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
|
||||
|
||||
#<div class="story-actions"> Used on sports pages for 'email' and 'print'.
|
||||
story_actions_reg_exp = '^.*story[_ -]*actions.*$'
|
||||
|
||||
#<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
|
||||
# social networking links).
|
||||
bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
|
||||
|
||||
#<div id="secondary-content" class="content-group">
|
||||
# NOTE: Don't remove class="content-group" that is needed.
|
||||
# Used on sports pages to link to 'similar stories'.
|
||||
secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
|
||||
|
||||
#<div id="featured-content" class="content-group">
|
||||
# NOTE: Don't remove class="content-group" that is needed.
|
||||
# Used on sports pages to link to pages like 'tables', 'fixtures', etc.
|
||||
featured_content_reg_exp = '^.*featured[_ -]*content.*$'
|
||||
|
||||
#<div id="navigation">
|
||||
# Used on sports pages to link to pages like 'tables', 'fixtures', etc.
|
||||
# Used sometimes instead of "featured-content" above.
|
||||
navigation_reg_exp = '^.*navigation.*$'
|
||||
|
||||
#<a class="skip" href="#blq-container-inner">Skip to top</a>
|
||||
# Used on sports pages to link to the top of the page.
|
||||
skip_reg_exp = '^.*skip.*$'
|
||||
|
||||
# Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
|
||||
# which are the alterative table design based pages. The purpose of some of these
|
||||
# is not entirely clear from the pages (which are a total mess!).
|
||||
|
||||
# Remove mapping based tags, Eg. <map id="world_map">
|
||||
# The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
|
||||
map_reg_exp = '^.*map.*$'
|
||||
|
||||
# Remove social bookmarking variation, called 'socialBookMarks'.
|
||||
social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
|
||||
|
||||
# Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
|
||||
blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
|
||||
|
||||
# Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
|
||||
# alongside 'socialBookMarks' whenever that appears. I am removing it as well
|
||||
# under the assumption that it can appear alone as well.
|
||||
sharesb_reg_exp = '^.*sharesb.*$'
|
||||
|
||||
# Remove class 'o'. The worst named user created css class of all time. The creator
|
||||
# should immediately be fired. I've seen it used to hold nothing at all but with
|
||||
# 20 or so empty lines in it. Also to hold a single link to another article.
|
||||
# Whatever it was designed to do it is not wanted by this recipe. Exact match only.
|
||||
o_reg_exp = '^o$'
|
||||
|
||||
# Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
|
||||
# use two reg expressions to make removing this (and variants) robust.
|
||||
promo_top_reg_exp = '^.*promotopbg.*$'
|
||||
promo_bottom_reg_exp = '^.*promobottombg.*$'
|
||||
|
||||
# Remove 'nlp', provides heading for link lists. Requires an exact match due to
|
||||
# risk of matching those letters in something needed, unless I see a variation
|
||||
# of 'nlp' used at a later date.
|
||||
nlp_reg_exp = '^nlp$'
|
||||
|
||||
# Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
|
||||
# has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
|
||||
# matching those letters in something needed.
|
||||
mva_or_mvb_reg_exp = '^mv[ab]$'
|
||||
|
||||
# Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
|
||||
mvtb_reg_exp = '^mvtb$'
|
||||
|
||||
# Remove 'blq-toplink', class to provide a link to the top of the page.
|
||||
blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
|
||||
|
||||
# Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
|
||||
# Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
|
||||
# use two reg expressions to make removing this (and variants) robust.
|
||||
prods_services_01_reg_exp = '^.*servicev4.*$'
|
||||
prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
|
||||
|
||||
# Remove -what I think is- some kind of navigation tools helper class, though I am
|
||||
# not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
|
||||
# frequently and it is not wanted. Have decided to use two reg expressions to make
|
||||
# removing this (and variants) robust.
|
||||
blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
|
||||
blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
|
||||
|
||||
# Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
|
||||
# need removing - I have no clue what it does other than it contains links.
|
||||
# Whatever it is - it is not part of the article and is not wanted.
|
||||
puffbox_reg_exp = '^.*puffbox.*$'
|
||||
|
||||
# Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
|
||||
sibtbg_reg_exp = '^.*sibtbg.*$'
|
||||
|
||||
# Remove 'storyextra' - links to relevant articles and external sites.
|
||||
storyextra_reg_exp = '^.*story[_ -]*extra.*$'
|
||||
|
||||
|
||||
remove_tags = [ dict(name='div', attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
|
||||
dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':'tools-container'}),
|
||||
dict(name='div', attrs={'class':'tools-container-end'}),
|
||||
dict(name='div', attrs={'class':'g-block story-body contextual-links'}),
|
||||
dict(name='div', attrs={'class':' g-w11 sidebar'})
|
||||
]
|
||||
|
||||
# Uses url to create and return the 'printer friendly' version of the url.
|
||||
# In other words the 'print this page' address of the page.
|
||||
#
|
||||
# There are 3 types of urls used in the BBC site's rss feeds. There is just
|
||||
# 1 type for the standard news while there are 2 used for sports feed urls.
|
||||
# Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
|
||||
# there is a major story of interest to 'everyone'. So even if no BBC sports
|
||||
# feeds are added to 'feeds' the logic of this method is still needed to avoid
|
||||
# blank / missing / empty articles which have an index title and then no body.
|
||||
def print_version(self, url):
|
||||
|
||||
# Handle sports page urls type 01:
|
||||
if (url.find("go/rss/-/sport1/") != -1):
|
||||
temp_url = url.replace("go/rss/-/", "")
|
||||
|
||||
# Handle sports page urls type 02:
|
||||
elif (url.find("go/rss/int/news/-/sport1/") != -1):
|
||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||
|
||||
# Handle regular news page urls:
|
||||
else:
|
||||
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||
|
||||
# Always add "?print=true" to the end of the url.
|
||||
print_url = temp_url + "?print=true"
|
||||
|
||||
return print_url
|
||||
|
||||
|
||||
# Remove articles in feeds based on a string in the article title or url.
|
||||
#
|
||||
# Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
|
||||
# thread, in post with title: "Remove articles from feed", see url:
|
||||
# http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
|
||||
# Many thanks and all credit to Starson17.
|
||||
#
|
||||
# Starson17's code has obviously been altered to suite my requirements.
|
||||
def parse_feeds(self):
|
||||
|
||||
# Call parent's method.
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
# Loop through all feeds.
|
||||
for feed in feeds:
|
||||
|
||||
# Loop through all articles in feed.
|
||||
for article in feed.articles[:]:
|
||||
|
||||
# Match key words and remove article if there's a match.
|
||||
|
||||
# Most BBC rss feed video only 'articles' use upper case 'VIDEO'
|
||||
# as a title prefix. Just match upper case 'VIDEO', so that
|
||||
# articles like 'Video game banned' won't be matched and removed.
|
||||
if 'VIDEO' in article.title:
|
||||
feed.articles.remove(article)
|
||||
|
||||
# Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
|
||||
# as a title prefix. Just match upper case 'AUDIO', so that
|
||||
# articles like 'Hi-Def audio...' won't be matched and removed.
|
||||
elif 'AUDIO' in article.title:
|
||||
feed.articles.remove(article)
|
||||
|
||||
# Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
|
||||
# 'In pictures', and 'in pictures', somewhere in their title.
|
||||
# Match any case of that phrase.
|
||||
elif 'IN PICTURES' in article.title.upper():
|
||||
feed.articles.remove(article)
|
||||
|
||||
# As above, but user contributed pictures. Match any case.
|
||||
elif 'YOUR PICTURES' in article.title.upper():
|
||||
feed.articles.remove(article)
|
||||
|
||||
# 'Sportsday Live' are articles which contain a constantly and
|
||||
# dynamically updated 'running commentary' during a live sporting
|
||||
# event. Match any case.
|
||||
elif 'SPORTSDAY LIVE' in article.title.upper():
|
||||
feed.articles.remove(article)
|
||||
|
||||
# Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
|
||||
# These are being matched below using 'Live - ' because removing all
|
||||
# articles with 'live' in their titles would remove some articles
|
||||
# that are in fact not live sports pages. Match any case.
|
||||
elif 'LIVE - ' in article.title.upper():
|
||||
feed.articles.remove(article)
|
||||
|
||||
# 'Quiz of the week' is a Flash player weekly news quiz. Match only
|
||||
# the 'Quiz of the' part in anticipation of monthly and yearly
|
||||
# variants. Match any case.
|
||||
elif 'QUIZ OF THE' in article.title.upper():
|
||||
feed.articles.remove(article)
|
||||
|
||||
# Remove articles with 'scorecards' in the url. These are BBC sports
|
||||
# pages which just display a cricket scorecard. The pages have a mass
|
||||
# of table and css entries to display the scorecards nicely. Probably
|
||||
# could make them work with this recipe, but might take a whole day
|
||||
# of work to sort out all the css - basically a formatting nightmare.
|
||||
elif 'scorecards' in article.url:
|
||||
feed.articles.remove(article)
|
||||
|
||||
return feeds
|
||||
|
||||
# End of class and file.
|
@ -1,6 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Benchmark_pl(BasicNewsRecipe):
|
||||
class BenchmarkPl(BasicNewsRecipe):
|
||||
title = u'Benchmark.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'benchmark.pl -IT site'
|
||||
@ -12,9 +12,9 @@ class Benchmark_pl(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets=True
|
||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'body'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
|
||||
INDEX= 'http://www.benchmark.pl'
|
||||
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
|
||||
|
@ -13,6 +13,7 @@ class Biolog_pl(BasicNewsRecipe):
|
||||
masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
#keeps_only_tags=[dict(id='main')]
|
||||
remove_tags_before=dict(id='main')
|
||||
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
|
||||
|
@ -1,14 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Birmingham post'
|
||||
description = 'News for Birmingham UK'
|
||||
timefmt = ''
|
||||
description = 'Author D.Asbury. News for Birmingham UK'
|
||||
#timefmt = ''
|
||||
# last update 8/9/12
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 12
|
||||
linearize_tables = True
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
#auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
@ -17,11 +20,12 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1',attrs={'id' : 'article-headline'}),
|
||||
dict(attrs={'id' : 'article-header'}),
|
||||
#dict(name='h1',attrs={'id' : 'article-header'}),
|
||||
dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
|
||||
dict(name='div',attrs={'class' : 'article-image full'}),
|
||||
dict(attrs={'clas' : 'art-o art-align-center otm-1 '}),
|
||||
dict(name='div',attrs={'class' : 'article main'}),
|
||||
dict(name='div',attrs={'class' : 'article-image full'}),
|
||||
dict(attrs={'clas' : 'art-o art-align-center otm-1 '}),
|
||||
dict(name='div',attrs={'class' : 'article main'}),
|
||||
#dict(name='p')
|
||||
#dict(attrs={'id' : 'three-col'})
|
||||
]
|
||||
@ -37,11 +41,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
body {font: sans-serif medium;}'
|
||||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
||||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
||||
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
||||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
||||
'''
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;text-align:center;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
55
recipes/blesk.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class bleskRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Blesk'
|
||||
publisher = u''
|
||||
description = 'blesk.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [
|
||||
(u'Zprávy', u'http://www.blesk.cz/rss/7'),
|
||||
(u'Blesk', u'http://www.blesk.cz/rss/1'),
|
||||
(u'Sex a tabu', u'http://www.blesk.cz/rss/2'),
|
||||
(u'Celebrity', u'http://www.blesk.cz/rss/5'),
|
||||
(u'Cestování', u'http://www.blesk.cz/rss/12')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id':['boxContent']})
|
||||
remove_tags_after = dict(name='div', attrs={'class':['artAuthors']})
|
||||
remove_tags = [dict(name='div', attrs={'class':['link_clanek']}),
|
||||
dict(name='div', attrs={'id':['partHeader']}),
|
||||
dict(name='div', attrs={'id':['top_bottom_box', 'lista_top']})]
|
||||
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'articleContent'})]
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
28
recipes/blognexto.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class blognexto(BasicNewsRecipe):
|
||||
title = 'BLOG.NEXTO.pl'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='o e-publikacjach prawie wszystko'
|
||||
masthead_url='http://blog.nexto.pl/wp-content/uploads/2012/04/logo-blog-nexto.pl_.jpg'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'comment-cloud'}))
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'post-date1'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fb-like'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'tags'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'postnavi'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'commments-box'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'respond'}))
|
||||
|
||||
feeds = [('Artykuly', 'http://feeds.feedburner.com/blognexto')]
|
@ -15,7 +15,8 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
auto_cleanup = True
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Boston'
|
||||
category = 'news, boston, usa, world'
|
||||
language = 'en'
|
||||
@ -30,23 +31,23 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'id':['INDblogEntry','blogEntry','articleHeader','articleGraphs','galleryShell']})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','iframe'])
|
||||
,dict(attrs={'id':['blogheadTools','bdc_emailWidget','tools','relatedContent']})
|
||||
]
|
||||
#keep_only_tags = [dict(attrs={'id':['INDblogEntry','blogEntry','articleHeader','articleGraphs','galleryShell']})]
|
||||
#remove_tags = [
|
||||
#dict(name=['object','link','script','iframe'])
|
||||
#,dict(attrs={'id':['blogheadTools','bdc_emailWidget','tools','relatedContent']})
|
||||
#]
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories' , u'http://feeds.boston.com/boston/topstories' )
|
||||
,(u'Patriots news', u'http://feeds.boston.com/boston/sports/football/patriots')
|
||||
,(u'Patriots news', u'http://feeds.boston.com/boston/sports/football/patriots/patriots_rss')
|
||||
,(u'National news', u'http://feeds.boston.com/boston/news/nation' )
|
||||
,(u'World news' , u'http://feeds.boston.com/boston/news/world' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?page=full'
|
||||
#def print_version(self, url):
|
||||
#return url + '?page=full'
|
||||
|
||||
def get_article_url(self, article):
|
||||
rawarticle = article.get('guid', None)
|
||||
return rawarticle.rpartition('?')[0]
|
||||
#def get_article_url(self, article):
|
||||
#rawarticle = article.get('guid', None)
|
||||
#return rawarticle.rpartition('?')[0]
|
||||
|
||||
|
82
recipes/brecha.recipe
Normal file
@ -0,0 +1,82 @@
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.brecha.com.uy
|
||||
'''
|
||||
|
||||
import urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Brecha(BasicNewsRecipe):
|
||||
title = 'Brecha Digital'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Brecha , Cultura ,Sociales , Separatas, Lupas, Vueltas de Montevideo y toda la infomacion que caracteriza a este semanario'
|
||||
publisher = 'Brecha'
|
||||
category = 'brecha, digital, prensa, uruguay, semanario, sociedad, politica, cultura'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_UY'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
auto_cleanup = True
|
||||
needs_subscription = 'optional'
|
||||
masthead_url = 'http://www.brecha.com.uy/templates/ja_nex/themes/orange/images/logo.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'task':'login'
|
||||
,'view':'register'
|
||||
,'username':self.username
|
||||
,'password':self.password
|
||||
})
|
||||
br.open('http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register',data)
|
||||
return br
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link']),
|
||||
dict(name='div', attrs={'id':'js_ja'}),
|
||||
dict(name='ul', attrs={'class':'actions'})
|
||||
]
|
||||
remove_attributes=['lang', 'border']
|
||||
|
||||
feeds = [
|
||||
(u'Politica' , u'http://www.brecha.com.uy/index.php/politica-uruguaya?format=feed&type=rss'),
|
||||
(u'Mundo' , u'http://www.brecha.com.uy/index.php/mundo?format=feed&type=rss'),
|
||||
(u'Mapamundi', u'http://www.brecha.com.uy/index.php/mundo/mapamundi?format=feed&type=rss'),
|
||||
(u'Cultura' , u'http://www.brecha.com.uy/index.php/cultura?format=feed&type=rss'),
|
||||
(u'Vueltas de Montevideo', u'http://www.brecha.com.uy/index.php/cultura/vueltas-de-montevideo?format=feed&type=rss'),
|
||||
(u'Secos y Mojados', u'http://www.brecha.com.uy/index.php/cultura/secos-y-mojados?format=feed&type=rss'),
|
||||
(u'Literarias', u'http://www.brecha.com.uy/index.php/cultura/literarias?format=feed&type=rss'),
|
||||
(u'Sociedad', u'http://www.brecha.com.uy/index.php/sociedad?format=feed&type=rss'),
|
||||
(u'Especiales', u'http://www.brecha.com.uy/index.php/especiales?format=feed&type=rss'),
|
||||
(u'Contratapa', u'http://www.brecha.com.uy/index.php/contratapa?format=feed&type=rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?tmpl=component&print=1&layout=default&page='
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
|
||||
for image in soup.findAll('img', alt=True):
|
||||
if image['alt'].startswith('Tapa '):
|
||||
return 'http://www.brecha.com.uy' + urllib.quote(image['src'])
|
||||
return None
|
140
recipes/brewiarz.recipe
Normal file
@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import datetime, re
|
||||
|
||||
|
||||
class brewiarz(BasicNewsRecipe):
|
||||
title = u'Brewiarz'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Serwis poświęcony Liturgii Godzin (brewiarzowi) - formie codziennej modlitwy Kościoła katolickiego.'
|
||||
masthead_url = 'http://brewiarz.pl/images/logo2.gif'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
publication_type = 'newspaper'
|
||||
next_days = 1
|
||||
|
||||
def parse_index(self):
|
||||
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
|
||||
"05": "v", "06": "vi", "07": "vii", "08": "viii",
|
||||
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
|
||||
|
||||
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
|
||||
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
|
||||
|
||||
now = datetime.datetime.now()
|
||||
|
||||
feeds = []
|
||||
for i in range(0, self.next_days):
|
||||
url_date = now + datetime.timedelta(days=i)
|
||||
url_date_month = url_date.strftime("%m")
|
||||
url_date_month_roman = dec2rom_dict[url_date_month]
|
||||
url_date_day = url_date.strftime("%d")
|
||||
url_date_year = url_date.strftime("%Y")[2:]
|
||||
url_date_weekday = url_date.strftime("%A")
|
||||
url_date_weekday_pl = weekday_dict[url_date_weekday]
|
||||
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
sectors = self.get_sectors(url)
|
||||
for subpage in sectors:
|
||||
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year + " - " + subpage.string
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/" + subpage['href']
|
||||
print(url)
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def get_sectors(self, url):
|
||||
sectors = []
|
||||
soup = self.index_to_soup(url)
|
||||
sectors_table = soup.find(name='table', attrs={'width': '490'})
|
||||
sector_links = sectors_table.findAll(name='a')
|
||||
for sector_links_modified in sector_links:
|
||||
link_parent_text = sector_links_modified.findParent(name='div').text
|
||||
if link_parent_text:
|
||||
sector_links_modified.text = link_parent_text.text
|
||||
sectors.append(sector_links_modified)
|
||||
return sectors
|
||||
|
||||
def parse_pages(self, url):
|
||||
current_articles = []
|
||||
soup = self.index_to_soup(url)
|
||||
www = soup.find(attrs={'class': 'www'})
|
||||
if www:
|
||||
box_title = www.find(text='Teksty LG')
|
||||
article_box_parent = box_title.findParent('ul')
|
||||
article_box_sibling = article_box_parent.findNextSibling('ul')
|
||||
for li in article_box_sibling.findAll('li'):
|
||||
link = li.find(name='a')
|
||||
ol = link.findNextSibling(name='ol')
|
||||
if ol:
|
||||
sublinks = ol.findAll(name='a')
|
||||
for sublink in sublinks:
|
||||
link_title = self.tag_to_string(link) + " - " + self.tag_to_string(sublink)
|
||||
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
|
||||
link_url = url[:-10] + link_url_print
|
||||
current_articles.append({'title': link_title,
|
||||
'url': link_url, 'description': '', 'date': ''})
|
||||
else:
|
||||
if link.findParent(name = 'ol'):
|
||||
continue
|
||||
else:
|
||||
link_title = self.tag_to_string(link)
|
||||
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', link['href'])
|
||||
link_url = url[:-10] + link_url_print
|
||||
current_articles.append({'title': link_title,
|
||||
'url': link_url, 'description': '', 'date': ''})
|
||||
return current_articles
|
||||
else:
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
footer = soup.find(name='a', attrs={'href': 'http://brewiarz.pl'})
|
||||
footer_parent = footer.findParent('div')
|
||||
footer_parent.extract()
|
||||
|
||||
header = soup.find(text='http://brewiarz.pl')
|
||||
header_parent = header.findParent('div')
|
||||
header_parent.extract()
|
||||
|
||||
subheader = soup.find(text='Kolor szat:').findParent('div')
|
||||
subheader.extract()
|
||||
|
||||
color = soup.find('b')
|
||||
color.extract()
|
||||
|
||||
cleaned = self.strip_tags(soup)
|
||||
|
||||
div = cleaned.findAll(name='div')
|
||||
div[1].extract()
|
||||
div[2].extract()
|
||||
div[3].extract()
|
||||
|
||||
return cleaned
|
||||
|
||||
def strip_tags(self, soup_dirty):
|
||||
VALID_TAGS = ['p', 'div', 'br', 'b', 'a', 'title', 'head', 'html', 'body']
|
||||
|
||||
for tag in soup_dirty.findAll(True):
|
||||
if tag.name not in VALID_TAGS:
|
||||
for i, x in enumerate(tag.parent.contents):
|
||||
if x == tag:
|
||||
break
|
||||
else:
|
||||
print "Can't find", tag, "in", tag.parent
|
||||
continue
|
||||
for r in reversed(tag.contents):
|
||||
tag.parent.insert(i, r)
|
||||
tag.extract()
|
||||
|
||||
return soup_dirty
|
45
recipes/buchreport.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
|
||||
|
||||
class Buchreport(BasicNewsRecipe) :
|
||||
__author__ = 'a.peter'
|
||||
__copyright__ = 'a.peter'
|
||||
__license__ = 'GPL v3'
|
||||
description = 'Buchreport'
|
||||
version = 4
|
||||
title = u'Buchreport'
|
||||
timefmt = ' [%d.%m.%Y]'
|
||||
encoding = 'cp1252'
|
||||
language = 'de'
|
||||
|
||||
|
||||
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
|
||||
article, articledate, articledescription { text-align: left; } \
|
||||
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
|
||||
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
|
||||
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
|
||||
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
|
||||
|
||||
oldest_article = 7.0
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
publication_type = 'newspaper'
|
||||
|
||||
remove_tags_before = dict(name='h2')
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
|
||||
dict(name='iframe'),
|
||||
dict(name='img')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
|
||||
]
|
||||
|
||||
def get_masthead_url(self):
|
||||
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'
|
@ -16,6 +16,7 @@ class BusinessSpectator(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
@ -32,11 +33,11 @@ class BusinessSpectator(BasicNewsRecipe):
|
||||
,'linearize_tables': False
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
|
||||
#keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
|
||||
|
||||
remove_tags = [dict(attrs={'class':'hql'})]
|
||||
#remove_tags = [dict(attrs={'class':'hql'})]
|
||||
|
||||
remove_attributes = ['width','height','style']
|
||||
#remove_attributes = ['width','height','style']
|
||||
|
||||
feeds = [
|
||||
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
|
||||
@ -46,3 +47,4 @@ class BusinessSpectator(BasicNewsRecipe):
|
||||
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
|
||||
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
|
||||
]
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.business-standard.com
|
||||
'''
|
||||
@ -14,10 +14,12 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = False
|
||||
encoding = 'cp1252'
|
||||
publisher = 'Business Standard Limited'
|
||||
category = 'news, business, money, india, world'
|
||||
language = 'en_IN'
|
||||
masthead_url = 'http://feeds.business-standard.com/images/logo_08.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -26,7 +28,7 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
keep_only_tags=[dict(attrs={'class':'TableClas'})]
|
||||
#keep_only_tags=[dict(name='td', attrs={'class':'TableClas'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','iframe','base','meta'])
|
||||
,dict(attrs={'class':'rightDiv2'})
|
||||
@ -45,3 +47,8 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
,(u'Management & Mktg' , u'http://feeds.business-standard.com/rss/7_0.xml' )
|
||||
,(u'Opinion' , u'http://feeds.business-standard.com/rss/5_0.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
l, s, tp = url.rpartition('/')
|
||||
t, k, autono = l.rpartition('/')
|
||||
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
|
||||
|
@ -1,105 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008 Kovid Goyal kovid@kovidgoyal.net, 2010 Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.businessweek.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BusinessWeek(BasicNewsRecipe):
|
||||
title = 'Business Week'
|
||||
__author__ = 'Kovid Goyal and Darko Miletic'
|
||||
description = 'Read the latest international business news & stock market news. Get updated company profiles, financial advice, global economy and technology news.'
|
||||
publisher = 'Bloomberg L.P.'
|
||||
category = 'Business, business news, stock market, stock market news, financial advice, company profiles, financial advice, global economy, technology news'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
||||
masthead_url = 'http://assets.businessweek.com/images/bw-logo.png'
|
||||
extra_css = """
|
||||
body{font-family: Helvetica,Arial,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.tagline{color: gray; font-style: italic}
|
||||
.photoCredit{font-size: small; color: gray}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class':'inStory'})
|
||||
,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
|
||||
,dict(attrs={'id':['inset','videoDisplay']})
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody','article_body','articleBody']})]
|
||||
remove_attributes = ['lang']
|
||||
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
||||
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
|
||||
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
||||
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
||||
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
||||
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
||||
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
||||
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
||||
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
||||
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
||||
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
||||
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
||||
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
||||
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
||||
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
||||
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
||||
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
||||
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
||||
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
||||
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = article.get('guid', None)
|
||||
if 'podcasts' in url:
|
||||
return None
|
||||
if 'surveys' in url:
|
||||
return None
|
||||
if 'images' in url:
|
||||
return None
|
||||
if 'feedroom' in url:
|
||||
return None
|
||||
if '/magazine/toc/' in url:
|
||||
return None
|
||||
rurl, sep, rest = url.rpartition('?')
|
||||
if rurl:
|
||||
return rurl
|
||||
return rest
|
||||
|
||||
def print_version(self, url):
|
||||
if '/news/' in url or '/blog/' in url:
|
||||
return url
|
||||
if '/magazine' in url:
|
||||
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/printer/')
|
||||
else:
|
||||
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
|
||||
return rurl.replace('/investing/','/investor/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
68
recipes/bwmagazine2.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
|
||||
title = 'Business Week Magazine'
|
||||
__author__ = 'Rick Shang'
|
||||
|
||||
description = 'A renowned business publication. Business news, trends and profiles of successful businesspeople.'
|
||||
language = 'en'
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article_body_container'}),
|
||||
]
|
||||
remove_tags = [dict(name='ui'),dict(name='li')]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
#Go to the issue
|
||||
soup = self.index_to_soup('http://www.businessweek.com/magazine/news/articles/business_news.htm')
|
||||
|
||||
#Find date
|
||||
mag=soup.find('h2',text='Magazine')
|
||||
dates=self.tag_to_string(mag.findNext('h3'))
|
||||
self.timefmt = u' [%s]'%dates
|
||||
|
||||
#Go to the main body
|
||||
div0 = soup.find ('div', attrs={'class':'column left'})
|
||||
section_title = ''
|
||||
feeds = OrderedDict()
|
||||
for div in div0.findAll('h4'):
|
||||
articles = []
|
||||
section_title = self.tag_to_string(div.findPrevious('h3')).strip()
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print'}).a['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
|
||||
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
|
||||
div1 = soup.find ('div', attrs={'class':'column center'})
|
||||
section_title = ''
|
||||
for div in div1.findAll('h5'):
|
||||
articles = []
|
||||
desc=self.tag_to_string(div.findNext('p')).strip()
|
||||
section_title = self.tag_to_string(div.findPrevious('h3')).strip()
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print'}).a['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
return ans
|
@ -1,35 +1,314 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CalgaryHerald(BasicNewsRecipe):
|
||||
title = u'Calgary Herald'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://rss.canada.com/get/?F233'),
|
||||
(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
|
||||
(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
|
||||
(u'Politics', u'http://rss.canada.com/get/?F7551'),
|
||||
(u'National', u'http://rss.canada.com/get/?F7552'),
|
||||
(u'World', u'http://rss.canada.com/get/?F7553'),
|
||||
]
|
||||
__author__ = 'rty'
|
||||
pubisher = 'Calgary Herald'
|
||||
description = 'Calgary, Alberta, Canada'
|
||||
category = 'News, Calgary, Alberta, Canada'
|
||||
|
||||
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en_CA'
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
##masthead_url = 'http://www.calgaryherald.com/index.html'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'storyheader'}),
|
||||
dict(name='div', attrs={'id':'storycontent'})
|
||||
|
||||
]
|
||||
remove_tags_after = {'class':"story_tool_hr"}
|
||||
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
www.canada.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
postmedia_index_pages = [
|
||||
(u'Headlines',u'/index.html'),
|
||||
(u'Ottawa & Area',u'/news/ottawa/index.html'),
|
||||
(u'Vancouver',u'/news/vancouver/index.html'),
|
||||
(u'Calgary',u'/news/calgary/index.html'),
|
||||
(u'Edmonton',u'/news/edmonton/index.html'),
|
||||
(u'Montreal',u'/news/montreal/index.html'),
|
||||
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
|
||||
(u'British Columbia',u'/news/bc/index.html'),
|
||||
(u'Alberta',u'/news/alberta/index.html'),
|
||||
(u'Canada',u'/news/canada/index.html'),
|
||||
(u'National',u'/news/national/index.html'),
|
||||
(u'Politics',u'/news/politics/index.html'),
|
||||
(u'Insight',u'/news/insight/index.html'),
|
||||
(u'Special Reports',u'/news/specialreports/index.html'),
|
||||
(u'Gangs',u'/news/gangs/index.html'),
|
||||
(u'Education',u'/news/education/index.html'),
|
||||
(u'Health',u'/news/health/index.html'),
|
||||
(u'Environment',u'/news/environment/index.html'),
|
||||
(u'World',u'/news/world/index.html'),
|
||||
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
|
||||
(u'Crime',u'/news/blotter/index.html'),
|
||||
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
|
||||
(u'Diplomatica',u'/news/diplomatica/index.html'),
|
||||
(u'Opinion',u'/opinion/index.html'),
|
||||
(u'Columnists',u'/columnists/index.html'),
|
||||
(u'Editorials',u'/opinion/editorials/index.html'),
|
||||
(u'Letters',u'/opinion/letters/index.html'),
|
||||
(u'Business',u'/business/index.html'),
|
||||
(u'Sports',u'/sports/index.html'),
|
||||
(u'Arts',u'/entertainment/index.html'),
|
||||
(u'Life',u'/life/index.html'),
|
||||
(u'Technology',u'/technology/index.html'),
|
||||
(u'Travel',u'/travel/index.html'),
|
||||
(u'Health',u'/health/index.html')
|
||||
]
|
||||
|
||||
|
||||
# un-comment the following six lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
|
||||
## logo_url = 'vplogo.jpg'
|
||||
## fp_tag = 'CAN_TP'
|
||||
|
||||
# un-comment the following six lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
|
||||
## logo_url = 'vslogo.jpg'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following six lines for the Calgary Herald
|
||||
title = u'Calgary Herald'
|
||||
url_prefix = 'http://www.calgaryherald.com'
|
||||
description = u'News from Calgary, AB'
|
||||
std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
|
||||
logo_url = 'chlogo.jpg'
|
||||
fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following six lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
|
||||
## logo_url = 'ejlogo.jpg'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following six lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||
## logo_url = 'oclogo.jpg'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following six lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
|
||||
## logo_url = 'mglogo.jpg'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
Kindle_Fire=False
|
||||
masthead_url = std_logo_url
|
||||
|
||||
url_list = []
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
encoding = 'utf-8'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
#storyheader { font-size: medium; }
|
||||
#storyheader h1 { font-size: x-large; }
|
||||
#storyheader h2 { font-size: small; font-style: italic; }
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||
|
||||
remove_tags = [{'class':'comments'},
|
||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||
dict(name='h2', attrs={'id':'photocredit'}),
|
||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def prepare_masthead_image(self, path_to_image, out_path):
|
||||
if self.Kindle_Fire:
|
||||
from calibre.utils.magick import Image, create_canvas
|
||||
img = Image()
|
||||
img.open(path_to_image)
|
||||
width, height = img.size
|
||||
img2 = create_canvas(width, height)
|
||||
img2.compose(img)
|
||||
img2.save(out_path)
|
||||
else:
|
||||
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete empty id attributes--they screw up the TOC for unknown reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
|
||||
pgall = soup.find('div',attrs={'id':'storyphoto'})
|
||||
if pgall is not None: # photo gallery perhaps
|
||||
if (soup.find('div',attrs={'id':'storycontent'}) is None):
|
||||
allpics = Tag(soup,'div')
|
||||
first_img = pgall.find('div','storyimage')
|
||||
if first_img is not None:
|
||||
first_img.extract()
|
||||
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
|
||||
if tlist is not None:
|
||||
for atag in tlist.findAll('a'):
|
||||
img = Tag(soup,'img')
|
||||
srcpre, sep, srcpost = atag.img['src'].partition('?')
|
||||
img['src'] = srcpre
|
||||
pdesc = Tag(soup,'p')
|
||||
pdesc.insert(0,atag.img['alt'])
|
||||
pdesc['class']='photocaption'
|
||||
div = Tag(soup,'div')
|
||||
div.insert(0,pdesc)
|
||||
div.insert(0,img)
|
||||
allpics.append(div)
|
||||
pgall.replaceWith(allpics)
|
||||
|
||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||
pg.extract()
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
articles = {}
|
||||
ans = []
|
||||
|
||||
|
||||
def handle_article(adiv,key):
|
||||
h1tag = adiv.h1
|
||||
if h1tag is not None:
|
||||
atag = h1tag.a
|
||||
if atag is not None:
|
||||
url = atag['href']
|
||||
if atag['href'].startswith('http'):
|
||||
return
|
||||
elif atag['href'].startswith('/'):
|
||||
url = self.url_prefix+atag['href']
|
||||
else:
|
||||
url = self.url_prefix+'/'+atag['href']
|
||||
if url in self.url_list:
|
||||
return
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
if 'VIDEO' in title.upper():
|
||||
return
|
||||
if 'GALLERY' in title.upper():
|
||||
return
|
||||
if 'PHOTOS' in title.upper():
|
||||
return
|
||||
dtag = adiv.find('div','content')
|
||||
description=''
|
||||
print("URL "+url)
|
||||
print("TITLE "+title)
|
||||
if dtag is not None:
|
||||
stag = dtag.span
|
||||
if stag is not None:
|
||||
if stag['class'] != 'timestamp':
|
||||
description = self.tag_to_string(stag,False)
|
||||
else:
|
||||
description = self.tag_to_string(dtag,False)
|
||||
print("DESCRIPTION: "+description)
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||
|
||||
def parse_web_index(key, keyurl):
|
||||
try:
|
||||
soup = self.index_to_soup(self.url_prefix+keyurl)
|
||||
except:
|
||||
return
|
||||
ans.append(key)
|
||||
mainsoup = soup.find('div','bodywrapper')
|
||||
footer = mainsoup.find(attrs={'id':'footerfeature'})
|
||||
if footer is not None:
|
||||
footer.extract()
|
||||
print("Section: "+key)
|
||||
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
|
||||
handle_article(wdiv,key)
|
||||
wdiv.extract()
|
||||
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
|
||||
for adiv in wdiv.findAll('div','featurecontent'):
|
||||
handle_article(adiv,key)
|
||||
|
||||
for (k,url) in self.postmedia_index_pages:
|
||||
parse_web_index(k,url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
||||
|
||||
|
@ -6,10 +6,12 @@ class AdvancedUserRecipe1271446252(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
language = 'fr'
|
||||
__author__ = 'zorgluf'
|
||||
max_articles_per_feed = 25
|
||||
#encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'CanardPC', u'http://www.canardpc.com/feed.php')]
|
||||
remove_tags_after = dict(id='auteur_news')
|
||||
remove_tags_before = dict(id='fil_ariane')
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='a', attrs={'class':'news_tags'}),
|
||||
dict(name='div', attrs={'id':'fil_ariane'})]
|
||||
|
||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AcademiaCatavencu(BasicNewsRecipe):
|
||||
title = u'Academia Ca\u0163avencu'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Tagma cum laude'
|
||||
description = 'Academia Catavencu. Pamflete!'
|
||||
publisher = u'Ca\u0163avencu'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
@ -21,7 +21,7 @@ class AcademiaCatavencu(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
category = 'Ziare'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.academiacatavencu.info/images/logo.png'
|
||||
cover_url = 'http://www.inpolitics.ro/Uploads/Articles/academia_catavencu.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -31,21 +31,21 @@ class AcademiaCatavencu(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'art_title'}),
|
||||
dict(name='div', attrs={'class':'art_text'})
|
||||
dict(name='h1', attrs={'class':'entry-title'}),
|
||||
dict(name='div', attrs={'class':'entry-content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['desp_m']})
|
||||
, dict(name='div', attrs={'id':['tags']})
|
||||
dict(name='div', attrs={'class':['mr_social_sharing_wrapper']})
|
||||
, dict(name='div', attrs={'id':['fb_share_1']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['desp_m']})
|
||||
dict(name='div', attrs={'id':['fb_share_1']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.academiacatavencu.info/rss.xml')
|
||||
(u'Feeds', u'http://www.academiacatavencu.info/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
75
recipes/cato.recipe
Normal file
@ -0,0 +1,75 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CATOInstitute(BasicNewsRecipe):
|
||||
title = u'The CATO Institute'
|
||||
description = "The Cato Institute is a public policy research organization — a think tank — \
|
||||
dedicated to the principles of individual liberty, limited government, free markets and peace.\
|
||||
Its scholars and analysts conduct independent, nonpartisan research on a wide range of policy issues."
|
||||
__author__ = '_reader'
|
||||
__date__ = '05 July 2012'
|
||||
__version__ = '1.0'
|
||||
cover_url = 'http://www.cato.org/images/logo.jpg'
|
||||
masthead_url = 'http://www.cato.org/images/logo.jpg'
|
||||
language = 'en'
|
||||
oldest_article = 30 #days
|
||||
max_articles_per_feed = 100
|
||||
needs_subscription = False
|
||||
publisher = 'CATO Institute'
|
||||
category = 'commentary'
|
||||
tags = 'commentary'
|
||||
publication_type = 'blog'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
simultaneous_downloads = 10
|
||||
recursions = 0
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description,
|
||||
'tags' : tags,
|
||||
'language' : language,
|
||||
'publisher' : publisher,
|
||||
'authors' : publisher,
|
||||
'smarten_punctuation' : True
|
||||
}
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Cato Recent Op-Eds', u'http://feeds.cato.org/CatoRecentOpeds'),
|
||||
(u'Cato Homepage Headlines', u'http://feeds.cato.org/CatoHomepageHeadlines'),
|
||||
(u'Cato Media Updates', u'http://feeds.cato.org/CatoMediaUpdates'),
|
||||
(u'Cato@Liberty', u'http://feeds.cato.org/Cato-at-liberty'),
|
||||
(u'Cato Unbound', u'http://feeds.feedburner.com/cato-unbound'),
|
||||
(u'Education and Child Policy', u'http://www.cato.org/rss/ra.xml?name=education-child-policy'),
|
||||
(u'Finance, Banking & Monetary Policy', u'http://www.cato.org/rss/ra.xml?name=finance-banking-monetary-policy'),
|
||||
(u'Government and Politics', u'http://www.cato.org/rss/ra.xml?name=government-politics'),
|
||||
(u'International Economics & Development', u'http://www.cato.org/rss/ra.xml?name=international-economics-development'),
|
||||
(u'Political Philosophy', u'http://www.cato.org/rss/ra.xml?name=political-philosophy'),
|
||||
(u'Social Security', u'http://www.cato.org/rss/ra.xml?name=social-security'),
|
||||
(u'Telecom, Internet & Information Policy', u'http://www.cato.org/rss/ra.xml?name=telecom-internet-information-policy'),
|
||||
(u'Energy and Environment', u'http://www.cato.org/rss/ra.xml?name=energy-environment'),
|
||||
(u'Foreign Policy and National Security', u'http://www.cato.org/rss/ra.xml?name=foreign-policy-national-security'),
|
||||
(u'Health Care', u'http://www.cato.org/rss/ra.xml?name=health-care'),
|
||||
(u'Law and Civil Liberties', u'http://www.cato.org/rss/ra.xml?name=law-civil-liberties'),
|
||||
(u'Regulatory Studies', u'http://www.cato.org/rss/ra.xml?name=regulatory-studies'),
|
||||
(u'Tax and Budget Policy', u'http://www.cato.org/rss/ra.xml?name=tax-budget-policy'),
|
||||
(u'Trade and Immigration', u'http://www.cato.org/rss/ra.xml?name=trade-immigration')
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
def print_version(self,url):
|
||||
R_unbound = re.compile(r'(^.*cato-unbound.*)(\/\?utm_source.*$)' , re.DOTALL | re.IGNORECASE ) #CATO Unbound
|
||||
R_pubs = re.compile(r'(^.*\/publications\/.*$)' , re.DOTALL | re.IGNORECASE ) #CATO Publications
|
||||
if re.match(R_unbound, url):
|
||||
printURL = r'\g<1>' + '/print/'
|
||||
elif re.match(R_pubs, url):
|
||||
printURL = url + '?print'
|
||||
else:
|
||||
printURL = url + '/print/'
|
||||
return printURL
|
68
recipes/ceska_pozice.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ceskaPoziceRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Česká pozice'
|
||||
description = 'Česká pozice'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
|
||||
(u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
|
||||
(u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
|
||||
(u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
domain = u'http://www.ceskapozice.cz'
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['block-ad', 'region region-content-ad']}),
|
||||
dict(name='ul', attrs={'class':'links'}),
|
||||
dict(name='div', attrs={'id':['comments', 'back-to-top']}),
|
||||
dict(name='div', attrs={'class':['next-page', 'region region-content-ad']}),
|
||||
dict(name='cite')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
return soup
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div', attrs={'class':'paging-bottom'})
|
||||
if pager:
|
||||
nextbutton = pager.find('li', attrs={'class':'pager-next'})
|
||||
if nextbutton:
|
||||
nexturl = self.domain + nextbutton.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'main-body'})
|
||||
for it in texttag.findAll('div', attrs={'class':'region region-content-ad'}):
|
||||
it.extract()
|
||||
for it in texttag.findAll('cite'):
|
||||
it.extract()
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2, texttag, newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position, texttag)
|
||||
pager.extract()
|
||||
|
30
recipes/ceske_noviny.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ceskenovinyRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'České Noviny'
|
||||
description = 'ceskenoviny.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Domácí', u'http://www.ceskenoviny.cz/sluzby/rss/domov.php')
|
||||
#,(u'Hlavní události', u'http://www.ceskenoviny.cz/sluzby/rss/index.php')
|
||||
#,(u'Přehled zpráv', u'http://www.ceskenoviny.cz/sluzby/rss/zpravy.php')
|
||||
#,(u'Ze světa', u'http://www.ceskenoviny.cz/sluzby/rss/svet.php')
|
||||
#,(u'Kultura', u'http://www.ceskenoviny.cz/sluzby/rss/kultura.php')
|
||||
#,(u'IT', u'http://www.ceskenoviny.cz/sluzby/rss/pocitace.php')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://i4.cn.cz/grafika/cn_logo-print.gif'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'clnk'})]
|
26
recipes/cesky_rozhlas_6.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class cro6Recipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Český rozhlas 6'
|
||||
description = 'Český rozhlas 6'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags = [dict(name='div', attrs={'class':['audio-play-all', 'poradHeaders', 'actions']}),
|
||||
dict(name='p', attrs={'class':['para-last']})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
@ -13,11 +13,11 @@ class CGM(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds= True
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheers=True
|
||||
no_stylesheets = True
|
||||
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}'
|
||||
remove_tags_before=dict(id='mainContent')
|
||||
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
||||
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
||||
remove_tags=[dict(name='div', attrs={'class':['fbContainer', 'socials']}),
|
||||
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
||||
dict(id=['movieShare', 'container'])]
|
||||
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
||||
|
87
recipes/chronicle_higher_ed.recipe
Normal file
@ -0,0 +1,87 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
class Chronicle(BasicNewsRecipe):
|
||||
|
||||
title = 'The Chronicle of Higher Education'
|
||||
__author__ = 'Rick Shang'
|
||||
|
||||
description = 'Weekly news and job-information source for college and university faculty members, administrators, and students.'
|
||||
language = 'en'
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article'}),
|
||||
]
|
||||
remove_tags = [dict(name='div',attrs={'class':['related module1','maintitle']}),
|
||||
dict(name='div', attrs={'id':['section-nav','icon-row', 'enlarge-popup']}),
|
||||
dict(name='a', attrs={'class':'show-enlarge enlarge'})]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
needs_subscription = True
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://chronicle.com/myaccount/login')
|
||||
br.select_form(nr=1)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
#Go to the issue
|
||||
soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/')
|
||||
issue = soup0.find('ul',attrs={'class':'feature-promo-list'}).li
|
||||
issueurl = "http://chronicle.com"+issue.a['href']
|
||||
|
||||
#Find date
|
||||
dates = self.tag_to_string(issue.a).split(': ')[-1]
|
||||
self.timefmt = u' [%s]'%dates
|
||||
|
||||
#Find cover
|
||||
cover=soup0.find('div',attrs={'class':'side-content'}).find(attrs={'src':re.compile("photos/biz/Current")})
|
||||
if cover is not None:
|
||||
if "chronicle.com" in cover['src']:
|
||||
self.cover_url=cover['src']
|
||||
else:
|
||||
self.cover_url="http://chronicle.com" + cover['src']
|
||||
#Go to the main body
|
||||
soup = self.index_to_soup(issueurl)
|
||||
div = soup.find ('div', attrs={'id':'article-body'})
|
||||
|
||||
feeds = OrderedDict()
|
||||
section_title = ''
|
||||
for post in div.findAll('li'):
|
||||
articles = []
|
||||
a=post.find('a', href=True)
|
||||
if a is not None:
|
||||
title=self.tag_to_string(a)
|
||||
url="http://chronicle.com"+a['href'].strip()
|
||||
sectiontitle=post.findPrevious('h3')
|
||||
if sectiontitle is None:
|
||||
sectiontitle=post.findPrevious('h4')
|
||||
section_title=self.tag_to_string(sectiontitle)
|
||||
desc=self.tag_to_string(post.find('p'))
|
||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
return ans
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#process all the images
|
||||
for div in soup.findAll('div', attrs={'class':'tableauPlaceholder'}):
|
||||
|
||||
noscripts=div.find('noscript').a
|
||||
div.replaceWith(noscripts)
|
||||
for div0 in soup.findAll('div',text='Powered by Tableau'):
|
||||
div0.extract()
|
||||
return soup
|
||||
|
58
recipes/ciperchile.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
ciperchile.cl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CiperChile(BasicNewsRecipe):
|
||||
title = 'CIPER Chile'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El Centro de Investigacion e Informacion Periodistica (CIPER) es una institucion independiente que desarrolla reportajes de investigacion de acuerdo a principios de maxima calidad e integridad profesional. Para lograr dicho objetivo, los profesionales de CIPER incorporan a las tecnicas propias del reporteo el uso sistematico de las leyes chilenas que norman el libre acceso a la informacion, de manera que los documentos que se obtengan por esta via esten puestos a disposicion del publico sin restricciones.'
|
||||
publisher = 'CIPER'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_CL'
|
||||
auto_cleanup = False
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'blog'
|
||||
masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,sans-serif}
|
||||
.excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em}
|
||||
.author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small}
|
||||
.date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey}
|
||||
.epigrafe{font-size: small; color: grey}
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link']),
|
||||
dict(attrs={'class':['articleSharingTools','articleNav']})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
remove_tags_before=dict(name='p', attrs={'class':'epigrafe'})
|
||||
remove_tags_after=dict(name='div', attrs={'class':'articleBody'})
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'articleElements'})]
|
||||
|
||||
feeds = [
|
||||
(u'Opinion del lector', u'http://ciperchile.cl/category/opinion-del-lector/feed/')
|
||||
,(u'Reportajes de investigacion', u'http://ciperchile.cl/category/reportajes-de-investigacion/feed/')
|
||||
,(u'Actualidad y Entrevistas', u'http://ciperchile.cl/category/actualidad-y-entrevistas/feed/')
|
||||
,(u'Opinion', u'http://ciperchile.cl/category/opinion/feed/')
|
||||
,(u'Accesso a la informacion', u'http://ciperchile.cl/category/acceso-a-la-informacion/feed/')
|
||||
,(u'Libros', u'http://ciperchile.cl/category/libros/feed/')
|
||||
,(u'Blog', u'http://ciperchile.cl/category/blog/feed/')
|
||||
]
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.codinghorror.com/blog/
|
||||
'''
|
||||
@ -14,28 +12,25 @@ class CodingHorror(BasicNewsRecipe):
|
||||
description = 'programming and human factors - Jeff Atwood'
|
||||
category = 'blog, programming'
|
||||
publisher = 'Jeff Atwood'
|
||||
language = 'en'
|
||||
|
||||
author = 'Jeff Atwood'
|
||||
language = 'en'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
auto_cleanup = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--author' , author
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nauthors="' + author + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
, 'authors' : publisher
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div',attrs={'class':'feedflare'})
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )]
|
38
recipes/conowego_pl.recipe
Executable file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
class CoNowegoPl(BasicNewsRecipe):
|
||||
title = u'conowego.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Nowy wortal technologiczny oraz gazeta internetowa. Testy najnowszych produktów, fachowe porady i recenzje. U nas znajdziesz wszystko o elektronice użytkowej !'
|
||||
cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png'
|
||||
category = 'IT, news'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})]
|
||||
feeds = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for i in soup.findAll('img'):
|
||||
i.parent.insert(0, BeautifulSoup('<br />'))
|
||||
i.insert(len(i), BeautifulSoup('<br />'))
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = appendtag.find('div', attrs={'class':'pages'})
|
||||
if tag:
|
||||
nexturls=tag.findAll('a')
|
||||
for nexturl in nexturls[:-1]:
|
||||
soup2 = self.index_to_soup('http://www.conowego.pl/' + nexturl['href'])
|
||||
pagetext = soup2.find(attrs={'class':'ni_content'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
for r in appendtag.findAll(attrs={'class':['pages', 'paginationWrap']}):
|
||||
r.extract()
|
@ -1,13 +1,13 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
#from calibre import __appname__
|
||||
from calibre.utils.magick import Image
|
||||
import re
|
||||
from calibre import browser
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Cosmopolitan UK'
|
||||
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
|
||||
description = 'Author : D.Asbury : Womens Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 21/12/11
|
||||
#last update 7/7/12 hopefully get current cover from itunes
|
||||
# greyscale code by Starson
|
||||
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
|
||||
no_stylesheets = True
|
||||
@ -15,6 +15,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
max_articles_per_feed = 20
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
ignore_duplicate_articles = {'title'}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
@ -39,14 +40,19 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
feeds = [
|
||||
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://itunes.apple.com/gb/app/cosmopolitan-uk/id461363572?mt=8')
|
||||
# look for the block containing the sun button and url
|
||||
cov = soup.find(attrs={'alt' : 'iPhone Screenshot 1'})
|
||||
cov2 = str(cov['src'])
|
||||
br = browser()
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(cov2)
|
||||
cover_url = cov2
|
||||
except:
|
||||
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
|
||||
|
||||
return cover_url
|
||||
|
||||
|
||||
|
@ -1,12 +1,13 @@
|
||||
from calibre import browser
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
title = u'Countryfile.com'
|
||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||
__author__ = 'Dave Asbury'
|
||||
description = 'The official website of Countryfile Magazine'
|
||||
# last updated 15/4/12
|
||||
# last updated 7/10/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
@ -14,16 +15,19 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
#articles_are_obfuscated = True
|
||||
ignore_duplicate_articles = {'title'}
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.countryfile.com/')
|
||||
cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
|
||||
#print '******** ',cov,' ***'
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[124:-90]
|
||||
#print '******** ',cov2,' ***'
|
||||
|
||||
cov = soup.find(attrs={'width' : '160', 'class' : re.compile('imagecache imagecache-160px_wide')})
|
||||
print '******** ',cov,' ***'
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[10:101]
|
||||
print '******** ',cov2,' ***'
|
||||
#cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg'
|
||||
# try to get cover - if can't get known cover
|
||||
br = browser()
|
||||
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(cov2)
|
||||
@ -40,3 +44,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
(u'Country News', u'http://www.countryfile.com/rss/news'),
|
||||
(u'Countryside', u'http://www.countryfile.com/rss/countryside'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
12
recipes/ct24.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1339974788(BasicNewsRecipe):
|
||||
title = u'\u010cT24'
|
||||
oldest_article = 1
|
||||
language = 'cs'
|
||||
__author__ = 'zoidozoido'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Hlavn\xed zpr\xe1vy', u'http://www.ceskatelevize.cz/ct24/rss/hlavni-zpravy/'), (u'Dom\xe1c\xed', u'http://www.ceskatelevize.cz/ct24/rss/domaci/'), (u'Sv\u011bt', u'http://www.ceskatelevize.cz/ct24/rss/svet/'), (u'Regiony', u'http://www.ceskatelevize.cz/ct24/rss/regiony/'), (u'Kultura', u'http://www.ceskatelevize.cz/ct24/rss/kultura/'), (u'Ekonomika', u'http://www.ceskatelevize.cz/ct24/rss/ekonomika/'), (u'Sport - hlavn\xed zpr\xe1vy', u'http://www.ceskatelevize.cz/ct4/rss/hlavni-zpravy/'), (u'OH 2012', u'http://www.ceskatelevize.cz/ct4/rss/oh-2012/')]
|
||||
remove_tags = [dict(name='img')]
|
59
recipes/cumhuriyet.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from __future__ import unicode_literals
|
||||
# based on a recipe by Darko Miletic
|
||||
#
|
||||
# Cumhuriyet Gazetesi'nin köşe yazıları okuyuculara cumhuriyet.com.tr
|
||||
# adresi üzerinden ücretsiz olarak sunulmaktadır.
|
||||
# Calibre yazılımıyla kullanılabilen bu reçete Cumhuriyet Gazetesi'nin
|
||||
# günlük köşe yazılarını hızlıca derleyip e-okuyucunuzda kolayca okunabilir
|
||||
# hale getirir. Yazıların yayınlanma saati sabah olduğu için reçeteyi
|
||||
# 7:00-24:00 arasında çizelgelemeniz gerekmektedir.
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Sethi Eksi <sethi.eksi at gmail.com>'
|
||||
'''
|
||||
cumhuriyet.com.tr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Cumhuriyet_tr(BasicNewsRecipe):
|
||||
title = 'Cumhuriyet - Yazarlar'
|
||||
__author__ = 'Cumhuriyet Gazetesi Yazarları'
|
||||
description = 'Günlük Cumhuriyet Gazetesi Köşe Yazıları'
|
||||
publisher = 'Cumhuriyet'
|
||||
category = 'news, politics, Turkey'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1254'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.cumhuriyet.com.tr/home/cumhuriyet/sablon2000/img/cumlogobeyaz1.gif'
|
||||
language = 'tr'
|
||||
extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
.article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['embed','iframe','object','link','base'])]
|
||||
remove_tags_before = dict(attrs={'class':'c565'})
|
||||
remove_tags_after = dict(attrs={'class':'c565'})
|
||||
|
||||
feeds = [
|
||||
(u'Yazarlar' , u'http://www.cumhuriyet.com.tr/?kn=5&xl=rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
articleid = url.rpartition('hn=')[2]
|
||||
return 'http://www.cumhuriyet.com.tr/?hn=' + articleid
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -7,7 +7,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
description = 'News as provided by The Daily Mirror -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 8/6/12
|
||||
# last updated 19/10/12
|
||||
language = 'en_GB'
|
||||
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
|
||||
@ -15,10 +15,12 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 12
|
||||
max_articles_per_feed = 1
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {'title'}
|
||||
|
||||
# auto_cleanup = True
|
||||
#conversion_options = { 'linearize_tables' : True }
|
||||
|
||||
@ -60,11 +62,12 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
|
||||
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
]
|
||||
extra_css = '''
|
||||
h1{ font-size:medium;}
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
img { display:block}
|
||||
'''#
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
||||
@ -75,8 +78,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
#cov2 now contains url of the page containing pic
|
||||
soup = self.index_to_soup(cov2)
|
||||
cov = soup.find(attrs={'id' : 'large'})
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[27:-18]
|
||||
cov=str(cov)
|
||||
cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||
cov2 = str(cov2)
|
||||
cov2=cov2[2:len(cov2)-2]
|
||||
#cov2 now is pic url, now go back to original function
|
||||
br = browser()
|
||||
br.set_handle_redirect(False)
|
||||
|
26
recipes/delco_times.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Delcoe Times'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
#encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [
|
||||
('News',
|
||||
'http://www.delcotimes.com/?rss=news'),
|
||||
('Sports',
|
||||
'http://www.delcotimes.com/?rss=sports'),
|
||||
('Business',
|
||||
'http://business-news.thestreet.com/the-delaware-county-daily-times/rss/109393'),
|
||||
('Entertainment',
|
||||
'http://www.delcotimes.com/?rss=entertainment'),
|
||||
]
|
||||
|
39
recipes/demagog.cz.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class demagogRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Demagog.cz'
|
||||
publisher = u''
|
||||
description = 'demagog.cz'
|
||||
oldest_article = 6
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Aktuality', u'http://demagog.cz/rss')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
.vyrok_suhrn{margin-top:50px; }
|
||||
.vyrok{margin-bottom:30px; }
|
||||
"""
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'vyrok_odovodnenie_tgl'}),
|
||||
dict(name='img', attrs={'class':'vyrok_fotografia'})]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='div', attrs={'class':'vyrok_text_after'})
|
||||
preprocess_regexps = [(re.compile(r'(<div class="vyrok_suhrn">)', re.DOTALL|re.IGNORECASE), lambda match: '\1<hr>')]
|
||||
|
||||
|
||||
|
||||
|
36
recipes/denik.cz.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ceskyDenikRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'denik.cz'
|
||||
publisher = u''
|
||||
description = u'Český deník'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Z domova', u'http://www.denik.cz/rss/z_domova.html')
|
||||
,(u'Pražský deník - Moje Praha', u'http://prazsky.denik.cz/rss/zpravy_region.html')
|
||||
#,(u'Zahraničí', u'http://www.denik.cz/rss/ze_sveta.html')
|
||||
#,(u'Kultura', u'http://www.denik.cz/rss/kultura.html')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://g.denik.cz/images/loga/denik.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_tags = []
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
|
||||
#remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='p', attrs={'class':'clanek-autor'})
|
||||
|
||||
|
28
recipes/denik_referendum.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class denikReferendumRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Den\u00edk Referendum'
|
||||
publisher = u''
|
||||
description = ''
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Deník Referendum', u'http://feeds.feedburner.com/DenikReferendum')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_attributes = []
|
||||
remove_tags_after = dict(name='div', attrs={'class':['text']})
|
||||
remove_tags = [dict(name='div', attrs={'class':['box boxLine', 'box noprint', 'box']}),
|
||||
dict(name='h3', attrs={'class':'head alt'})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['content']})]
|
@ -72,7 +72,7 @@ class DerSpiegel(BasicNewsRecipe):
|
||||
for article in section.findNextSiblings(['dd','dt']):
|
||||
if article.name == 'dt':
|
||||
break
|
||||
link = article.find('a')
|
||||
link = article.find('a', href=True)
|
||||
title = self.tag_to_string(link).strip()
|
||||
if title in self.empty_articles:
|
||||
continue
|
||||
|
@ -1,71 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
dilemaveche.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DilemaVeche(BasicNewsRecipe):
|
||||
title = u'Dilema Veche' # apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza ca fiind creatorul fiecarui articol in feed-ul RSS)
|
||||
__author__ = 'song2' # inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script
|
||||
description = '"Sint vechi, domnule!" (I.L. Caragiale)'
|
||||
publisher = 'Adevarul Holding'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 200
|
||||
encoding = 'utf8'
|
||||
language = 'ro'
|
||||
masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
|
||||
publication_type = 'magazine'
|
||||
feeds = [
|
||||
('Editoriale si opinii - Situatiunea', 'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'),
|
||||
('Editoriale si opinii - Pe ce lume traim', 'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'),
|
||||
('Editoriale si opinii - Bordeie si obiceie', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
|
||||
('Editoriale si opinii - Talc Show', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
|
||||
('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'),
|
||||
('La zi in cultura - Dilema va recomanda', 'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'),
|
||||
('La zi in cultura - Carte', 'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'),
|
||||
('La zi in cultura - Film', 'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'),
|
||||
('La zi in cultura - Muzica', 'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'),
|
||||
('La zi in cultura - Arte performative', 'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'),
|
||||
('La zi in cultura - Arte vizuale', 'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'),
|
||||
('Societate - Ieri cu vedere spre azi', 'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'),
|
||||
('Societate - Din polul opus', 'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'),
|
||||
('Societate - Mass comedia', 'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'),
|
||||
('Societate - La singular si la plural', 'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'),
|
||||
('Oameni si idei - Educatie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
|
||||
('Oameni si idei - Polemici si dezbateri', 'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'),
|
||||
('Oameni si idei - Stiinta si tehnologie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
|
||||
('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/005/0/feed')
|
||||
]
|
||||
remove_tags_before = dict(name='div',attrs={'class':'spacer_10'})
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'art_related_left'}),
|
||||
dict(name='div', attrs={'class':'controale'}),
|
||||
dict(name='div', attrs={'class':'simple_overlay'}),
|
||||
]
|
||||
remove_tags_after = [dict(id='facebookLike')]
|
||||
remove_javascript = True
|
||||
title = u'Dilema Veche'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Sint vechi, domnule! (I.L. Caragiale)'
|
||||
publisher = u'Adev\u0103rul Holding'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
extra_css = """
|
||||
body{font-family: Georgia,Times,serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup('http://dilemaveche.ro')
|
||||
link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
|
||||
if link_item and link_item.a:
|
||||
cover_url = link_item.a['href']
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover_url)
|
||||
except: #daca nu gaseste pdf-ul
|
||||
self.log("\nPDF indisponibil")
|
||||
link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
|
||||
if link_item and link_item.img:
|
||||
cover_url = link_item.img['src']
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover_url)
|
||||
except: #daca nu gaseste nici imaginea mica mica
|
||||
print('Mama lor de nenorociti! nu este nici pdf nici imagine')
|
||||
cover_url ='http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
|
||||
return cover_url
|
||||
cover_margins = (10, 15, '#ffffff')
|
||||
use_embedded_content = False
|
||||
category = 'Ziare'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'c_left_column'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['adshop_widget_428x60']}) ,
|
||||
dict(name='div', attrs={'id':['gallery']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':['adshop_widget_428x60']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://dilemaveche.ro/rss.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|