Replace CRLF line endings

This commit is contained in:
Kovid Goyal 2013-05-28 11:42:53 +05:30
parent a1ce980d99
commit c0f549625a
47 changed files with 6457 additions and 6457 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,157 +1,157 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
<filter id="filter5365">
<feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
</filter>
<g id="layer1">
</g>
<g id="layer2">
<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005
171.429,297.005 "/>
<g id="path5265" filter="url(#filter5365)">
<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09
119.953,80.636 70.397,97.084 "/>
</g>
<g id="path5267" filter="url(#filter5365)">
<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
l2.322,16.553L118.639,100.902z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
</g>
<g id="path5269" filter="url(#filter5365)">
<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
C68.936,101.726,70.711,98.81,70.711,98.81z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
</g>
<g id="path5271" filter="url(#filter5365)">
<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
C17.974,94.288,17.113,87.874,21.479,79.607z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
</g>
<g id="path5273" filter="url(#filter5365)">
<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
L120.871,99.092z"/>
</g>
<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#B5FFA6"/>
<stop offset="1" style="stop-color:#76E976"/>
</radialGradient>
<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
l0.356,14.644L118.22,97.921z"/>
<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
C66.471,100.649,68.068,97.629,68.068,97.629z"/>
<g id="path5419" filter="url(#filter5365)">
<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233
106.738,52.778 57.183,69.227 "/>
</g>
<g id="path5421" filter="url(#filter5365)">
<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
l2.322,16.552L105.424,73.045z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
</g>
<g id="path5423" filter="url(#filter5365)">
<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
C55.721,73.869,57.497,70.953,57.497,70.953z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
</g>
<g id="path5425" filter="url(#filter5365)">
<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
C4.759,66.431,3.899,60.017,8.265,51.751z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
</g>
<g id="path5427" filter="url(#filter5365)">
<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
L107.656,71.234z"/>
</g>
<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
l49.548,18.171L54.54,67.985L6.102,50.193z"/>
<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#789DED"/>
<stop offset="1" style="stop-color:#2381E8"/>
</radialGradient>
<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
z"/>
<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
C53.256,72.793,54.854,69.772,54.854,69.772z"/>
<g id="path5447" filter="url(#filter5365)">
<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305
123.882,28.85 74.326,45.299 "/>
</g>
<g id="path5449" filter="url(#filter5365)">
<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
l2.321,16.552L122.567,49.116z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
</g>
<g id="path5451" filter="url(#filter5365)">
<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
C72.863,49.94,74.641,47.024,74.641,47.024z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
</g>
<g id="path5453" filter="url(#filter5365)">
<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
C21.902,42.502,21.042,36.088,25.408,27.822z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
</g>
<g id="path5455" filter="url(#filter5365)">
<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
</g>
<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#FD8A8A"/>
<stop offset="1" style="stop-color:#FF7878"/>
</radialGradient>
<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
<stop offset="0" style="stop-color:#FFFFFF"/>
<stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
</linearGradient>
<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
L122.148,46.135z"/>
<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
C70.399,48.864,71.997,45.844,71.997,45.844z"/>
</g>
</svg>
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
<filter id="filter5365">
<feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
</filter>
<g id="layer1">
</g>
<g id="layer2">
<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005
171.429,297.005 "/>
<g id="path5265" filter="url(#filter5365)">
<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09
119.953,80.636 70.397,97.084 "/>
</g>
<g id="path5267" filter="url(#filter5365)">
<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
l2.322,16.553L118.639,100.902z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
</g>
<g id="path5269" filter="url(#filter5365)">
<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
C68.936,101.726,70.711,98.81,70.711,98.81z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
</g>
<g id="path5271" filter="url(#filter5365)">
<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
C17.974,94.288,17.113,87.874,21.479,79.607z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
</g>
<g id="path5273" filter="url(#filter5365)">
<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
L120.871,99.092z"/>
</g>
<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#B5FFA6"/>
<stop offset="1" style="stop-color:#76E976"/>
</radialGradient>
<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
l0.356,14.644L118.22,97.921z"/>
<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
C66.471,100.649,68.068,97.629,68.068,97.629z"/>
<g id="path5419" filter="url(#filter5365)">
<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233
106.738,52.778 57.183,69.227 "/>
</g>
<g id="path5421" filter="url(#filter5365)">
<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
l2.322,16.552L105.424,73.045z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
</g>
<g id="path5423" filter="url(#filter5365)">
<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
C55.721,73.869,57.497,70.953,57.497,70.953z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
</g>
<g id="path5425" filter="url(#filter5365)">
<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
C4.759,66.431,3.899,60.017,8.265,51.751z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
</g>
<g id="path5427" filter="url(#filter5365)">
<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
L107.656,71.234z"/>
</g>
<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
l49.548,18.171L54.54,67.985L6.102,50.193z"/>
<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#789DED"/>
<stop offset="1" style="stop-color:#2381E8"/>
</radialGradient>
<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
z"/>
<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
C53.256,72.793,54.854,69.772,54.854,69.772z"/>
<g id="path5447" filter="url(#filter5365)">
<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305
123.882,28.85 74.326,45.299 "/>
</g>
<g id="path5449" filter="url(#filter5365)">
<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
l2.321,16.552L122.567,49.116z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
</g>
<g id="path5451" filter="url(#filter5365)">
<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
C72.863,49.94,74.641,47.024,74.641,47.024z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
</g>
<g id="path5453" filter="url(#filter5365)">
<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
C21.902,42.502,21.042,36.088,25.408,27.822z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
</g>
<g id="path5455" filter="url(#filter5365)">
<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
</g>
<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#FD8A8A"/>
<stop offset="1" style="stop-color:#FF7878"/>
</radialGradient>
<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
<stop offset="0" style="stop-color:#FFFFFF"/>
<stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
</linearGradient>
<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
L122.148,46.135z"/>
<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
C70.399,48.864,71.997,45.844,71.997,45.844z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 473 KiB

After

Width:  |  Height:  |  Size: 472 KiB

View File

@ -1,32 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
width="181px" height="212px" viewBox="0 0 181 212" enable-background="new 0 0 181 212" xml:space="preserve">
<g>
<path fill="#FFFFFF" d="M105.134,97.504l-3.598,2.688l-4.197-5.618l-5.754,4.299l-1.733-2.32
c19.007-16.602,22.311-45.299,6.955-65.852C80.691,9.133,50.143,4.711,28.574,20.826s-25.99,46.663-9.875,68.232
c15.355,20.553,43.812,25.521,65.122,12l1.732,2.319l-5.755,4.3l4.198,5.619l-3.598,2.688l68.083,91.125l24.734-18.48
L105.134,97.504z M29.113,81.277C17.296,65.46,20.538,43.058,36.355,31.24c15.816-11.817,38.219-8.575,50.036,7.242
c11.817,15.817,8.575,38.22-7.241,50.037C63.333,100.337,40.931,97.094,29.113,81.277z"/>
<g>
<g>
<path fill="#010101" d="M28.574,15.642C7.006,31.756,2.584,62.305,18.699,83.874c16.114,21.568,46.663,25.989,68.231,9.875
c21.569-16.115,25.99-46.664,9.876-68.232C80.691,3.948,50.143-0.473,28.574,15.642z M79.15,83.335
c-15.817,11.817-38.22,8.574-50.037-7.242c-11.817-15.817-8.575-38.219,7.242-50.037c15.816-11.817,38.219-8.575,50.036,7.242
C98.209,49.115,94.967,71.517,79.15,83.335z"/>
</g>
<rect x="83.577" y="89.507" transform="matrix(0.8011 -0.5986 0.5986 0.8011 -39.0685 71.0375)" fill="#010101" width="7.528" height="9.579"/>
<rect x="81.819" y="94.546" transform="matrix(0.8011 -0.5985 0.5985 0.8011 -42.3354 75.7256)" fill="#991A37" width="21.894" height="14.026"/>
<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="98.5879" y1="122.3535" x2="129.4668" y2="122.3535" gradientTransform="matrix(0.8011 -0.5985 0.5985 0.8011 -37.7672 117.3501)">
<stop offset="0" style="stop-color:#000000"/>
<stop offset="0.3983" style="stop-color:#FFFFFF"/>
<stop offset="1" style="stop-color:#000000"/>
</linearGradient>
<polygon fill="url(#SVGID_1_)" points="173.217,183.444 148.482,201.925 80.399,110.8 105.134,92.319 "/>
</g>
</g>
</svg>
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
width="181px" height="212px" viewBox="0 0 181 212" enable-background="new 0 0 181 212" xml:space="preserve">
<g>
<path fill="#FFFFFF" d="M105.134,97.504l-3.598,2.688l-4.197-5.618l-5.754,4.299l-1.733-2.32
c19.007-16.602,22.311-45.299,6.955-65.852C80.691,9.133,50.143,4.711,28.574,20.826s-25.99,46.663-9.875,68.232
c15.355,20.553,43.812,25.521,65.122,12l1.732,2.319l-5.755,4.3l4.198,5.619l-3.598,2.688l68.083,91.125l24.734-18.48
L105.134,97.504z M29.113,81.277C17.296,65.46,20.538,43.058,36.355,31.24c15.816-11.817,38.219-8.575,50.036,7.242
c11.817,15.817,8.575,38.22-7.241,50.037C63.333,100.337,40.931,97.094,29.113,81.277z"/>
<g>
<g>
<path fill="#010101" d="M28.574,15.642C7.006,31.756,2.584,62.305,18.699,83.874c16.114,21.568,46.663,25.989,68.231,9.875
c21.569-16.115,25.99-46.664,9.876-68.232C80.691,3.948,50.143-0.473,28.574,15.642z M79.15,83.335
c-15.817,11.817-38.22,8.574-50.037-7.242c-11.817-15.817-8.575-38.219,7.242-50.037c15.816-11.817,38.219-8.575,50.036,7.242
C98.209,49.115,94.967,71.517,79.15,83.335z"/>
</g>
<rect x="83.577" y="89.507" transform="matrix(0.8011 -0.5986 0.5986 0.8011 -39.0685 71.0375)" fill="#010101" width="7.528" height="9.579"/>
<rect x="81.819" y="94.546" transform="matrix(0.8011 -0.5985 0.5985 0.8011 -42.3354 75.7256)" fill="#991A37" width="21.894" height="14.026"/>
<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="98.5879" y1="122.3535" x2="129.4668" y2="122.3535" gradientTransform="matrix(0.8011 -0.5985 0.5985 0.8011 -37.7672 117.3501)">
<stop offset="0" style="stop-color:#000000"/>
<stop offset="0.3983" style="stop-color:#FFFFFF"/>
<stop offset="1" style="stop-color:#000000"/>
</linearGradient>
<polygon fill="url(#SVGID_1_)" points="173.217,183.444 148.482,201.925 80.399,110.8 105.134,92.319 "/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@ -1,30 +1,30 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.autosport.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class autosport(BasicNewsRecipe):
title = u'Autosport'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.autosport.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class autosport(BasicNewsRecipe):
title = u'Autosport'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]

View File

@ -1,45 +1,45 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
class Buchreport(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
description = 'Buchreport'
version = 4
title = u'Buchreport'
timefmt = ' [%d.%m.%Y]'
encoding = 'cp1252'
language = 'de'
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
article, articledate, articledescription { text-align: left; } \
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
oldest_article = 7.0
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
remove_tags_before = dict(name='h2')
remove_tags_after = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
]
remove_tags = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
dict(name='iframe'),
dict(name='img')
]
feeds = [
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
]
def get_masthead_url(self):
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
class Buchreport(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
description = 'Buchreport'
version = 4
title = u'Buchreport'
timefmt = ' [%d.%m.%Y]'
encoding = 'cp1252'
language = 'de'
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
article, articledate, articledescription { text-align: left; } \
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
oldest_article = 7.0
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
remove_tags_before = dict(name='h2')
remove_tags_after = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
]
remove_tags = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
dict(name='iframe'),
dict(name='img')
]
feeds = [
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
]
def get_masthead_url(self):
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'

View File

@ -1,11 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
title = u'Diario La Republica'
__author__ = 'CAVALENCIA'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
language = 'es_CO'
feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
title = u'Diario La Republica'
__author__ = 'CAVALENCIA'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
language = 'es_CO'
feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]

View File

@ -1,98 +1,98 @@
#!/usr/bin/env python
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
dnevnik.com.mk
'''
import re
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Dnevnik(BasicNewsRecipe):
INDEX = 'http://www.dnevnik.com.mk'
__author__ = 'Darko Spasovski'
title = 'Dnevnik - mk'
description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif'
language = 'mk'
publication_type = 'newspaper'
category = 'news, Macedonia'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
## Remove anything before the start of the article.
(r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
## Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match : '</body>'),
]
]
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
"""
conversion_options = {
'comment' : description,
'tags' : category,
'language' : language,
'linearize_tables' : True
}
def parse_index(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
feeds = []
for section in soup.findAll('td', attrs={'class':'WB_DNEVNIK_ArhivaFormTitle'}):
sectionTitle = section.contents[0].string
if sectionTitle.lower().startswith('online'):
# Skip online articles
continue
containerTable = section.findPrevious(name='table').findNextSibling(name='table')
if containerTable==None:
print 'No container table found - page layout may have been changed.'
continue
articles = []
for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
title = self.tag_to_string(article, use_alt=True).strip()
articles.append({'title': title, 'url':'http://www.dnevnik.com.mk/' + article['href'], 'description':'', 'date':''})
if articles:
feeds.append((sectionTitle, articles))
return sorted(feeds, key=lambda section: self.get_weight(section))
def get_weight(self, section):
"""
Returns 'weight' of a section.
Used for sorting the sections based on their 'natural' order in the printed edition.
"""
natural_order = { u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13 }
if section[0].string.lower() in natural_order:
return natural_order[section[0].string.lower()]
else:
return 999 # section names not on the list go to the bottom
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
if anchor != None:
raw = browser().open_novisit(self.INDEX + '/' + anchor['href']).read()
cover_soup = BeautifulSoup(raw)
url = cover_soup.find('div', attrs={'class':'WB_DNEVNIK_Datum2'}).findNext('img')['src']
return self.INDEX + '/' + url
return ''
#!/usr/bin/env python
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
dnevnik.com.mk
'''
import re
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Dnevnik(BasicNewsRecipe):
INDEX = 'http://www.dnevnik.com.mk'
__author__ = 'Darko Spasovski'
title = 'Dnevnik - mk'
description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif'
language = 'mk'
publication_type = 'newspaper'
category = 'news, Macedonia'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
## Remove anything before the start of the article.
(r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
## Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match : '</body>'),
]
]
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
"""
conversion_options = {
'comment' : description,
'tags' : category,
'language' : language,
'linearize_tables' : True
}
def parse_index(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
feeds = []
for section in soup.findAll('td', attrs={'class':'WB_DNEVNIK_ArhivaFormTitle'}):
sectionTitle = section.contents[0].string
if sectionTitle.lower().startswith('online'):
# Skip online articles
continue
containerTable = section.findPrevious(name='table').findNextSibling(name='table')
if containerTable==None:
print 'No container table found - page layout may have been changed.'
continue
articles = []
for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
title = self.tag_to_string(article, use_alt=True).strip()
articles.append({'title': title, 'url':'http://www.dnevnik.com.mk/' + article['href'], 'description':'', 'date':''})
if articles:
feeds.append((sectionTitle, articles))
return sorted(feeds, key=lambda section: self.get_weight(section))
def get_weight(self, section):
"""
Returns 'weight' of a section.
Used for sorting the sections based on their 'natural' order in the printed edition.
"""
natural_order = { u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13 }
if section[0].string.lower() in natural_order:
return natural_order[section[0].string.lower()]
else:
return 999 # section names not on the list go to the bottom
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
if anchor != None:
raw = browser().open_novisit(self.INDEX + '/' + anchor['href']).read()
cover_soup = BeautifulSoup(raw)
url = cover_soup.find('div', attrs={'class':'WB_DNEVNIK_Datum2'}).findNext('img')['src']
return self.INDEX + '/' + url
return ''

View File

@ -1,56 +1,56 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
title = u'Periódico El Colombiano'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = dict(id='enviaTips')
remove_tags_after = dict(id='zonaPata')
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),
(u'Colombia', u'http://www.elcolombiano.com/rss/Colombia.xml'),
(u'Economia', u'http://www.elcolombiano.com/rss/Economia.xml'),
(u'Internacional', u'http://www.elcolombiano.com/rss/Internacional.xml'),
(u'Politica', u'http://www.elcolombiano.com/rss/Politica.xml'),
(u'Cultura', u'http://www.elcolombiano.com/rss/Cultura.xml'),
(u'Entretenimiento', u'http://www.elcolombiano.com/rss/Farandula.xml'),
(u'Tecnologia', u'http://www.elcolombiano.com/rss/Tecnologia.xml'),
(u'Television', u'http://www.elcolombiano.com/rss/Television.xml'),
(u'Vida y Sociedad', u'http://www.elcolombiano.com/rss/Vida.xml'),
(u'Turismo', u'http://www.elcolombiano.com/rss/Turismo.xm'),
(u'Salud', u'http://www.elcolombiano.com/rss/Salud.xml'),
(u'Ciencia', u'http://www.elcolombiano.com/rss/Ciencia.xml')]
remove_tags = [dict(name='div', attrs={'class':'objetosRelacionados'}),
dict(name='div', attrs={'class':'notasRelacionadas contenedor'}),
dict(name='div', attrs={'class':'comentarios'}),
dict(name='div', attrs={'class':'mapaDelSitio'}),
dict(name='div', attrs={'class':'creditos'}),
dict(name='div', attrs={'class':'votos'}),
dict(name='div', attrs={'class':'divopt2'}),
dict(name='div', attrs={'class':'comentarios'}),
dict(name='div', attrs={'class':'pestanasLateral'}),
dict(name='div', attrs={'class':'resumenSeccion'}),
dict(name='div', attrs={'class':'zonaComercial'}),
dict(name='div', attrs={'id':'zonaPata'})]
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
title = u'Periódico El Colombiano'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = dict(id='enviaTips')
remove_tags_after = dict(id='zonaPata')
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),
(u'Colombia', u'http://www.elcolombiano.com/rss/Colombia.xml'),
(u'Economia', u'http://www.elcolombiano.com/rss/Economia.xml'),
(u'Internacional', u'http://www.elcolombiano.com/rss/Internacional.xml'),
(u'Politica', u'http://www.elcolombiano.com/rss/Politica.xml'),
(u'Cultura', u'http://www.elcolombiano.com/rss/Cultura.xml'),
(u'Entretenimiento', u'http://www.elcolombiano.com/rss/Farandula.xml'),
(u'Tecnologia', u'http://www.elcolombiano.com/rss/Tecnologia.xml'),
(u'Television', u'http://www.elcolombiano.com/rss/Television.xml'),
(u'Vida y Sociedad', u'http://www.elcolombiano.com/rss/Vida.xml'),
(u'Turismo', u'http://www.elcolombiano.com/rss/Turismo.xm'),
(u'Salud', u'http://www.elcolombiano.com/rss/Salud.xml'),
(u'Ciencia', u'http://www.elcolombiano.com/rss/Ciencia.xml')]
remove_tags = [dict(name='div', attrs={'class':'objetosRelacionados'}),
dict(name='div', attrs={'class':'notasRelacionadas contenedor'}),
dict(name='div', attrs={'class':'comentarios'}),
dict(name='div', attrs={'class':'mapaDelSitio'}),
dict(name='div', attrs={'class':'creditos'}),
dict(name='div', attrs={'class':'votos'}),
dict(name='div', attrs={'class':'divopt2'}),
dict(name='div', attrs={'class':'comentarios'}),
dict(name='div', attrs={'class':'pestanasLateral'}),
dict(name='div', attrs={'class':'resumenSeccion'}),
dict(name='div', attrs={'class':'zonaComercial'}),
dict(name='div', attrs={'id':'zonaPata'})]

View File

@ -1,54 +1,54 @@
# coding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElEspectador(BasicNewsRecipe):
title = u'Periódico el Espectador'
__author__ = 'BIGO-CAVA'
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='content')
remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})]
language = 'es_CO'
#keep_only_tags = [dict(name='div', id='content')]
remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}),
dict(name='div', attrs={'class':'relpauta'}),
dict(name='div', attrs={'class':'recursosrelacionados'}),
dict(name='div', attrs={'class':'nav_negocios'})]
# dict(name='div', attrs={'class':'tags_playerrecurso'}),
# dict(name='div', attrs={'class':'ico-mail2'}),
# dict(name='div', attrs={'id':'caja-instapaper'}),
# dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
(u'Economía', u'http://www.elespectador.com/economia/feed'),
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]
# coding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElEspectador(BasicNewsRecipe):
title = u'Periódico el Espectador'
__author__ = 'BIGO-CAVA'
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='content')
remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})]
language = 'es_CO'
#keep_only_tags = [dict(name='div', id='content')]
remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}),
dict(name='div', attrs={'class':'relpauta'}),
dict(name='div', attrs={'class':'recursosrelacionados'}),
dict(name='div', attrs={'class':'nav_negocios'})]
# dict(name='div', attrs={'class':'tags_playerrecurso'}),
# dict(name='div', attrs={'class':'ico-mail2'}),
# dict(name='div', attrs={'id':'caja-instapaper'}),
# dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
(u'Economía', u'http://www.elespectador.com/economia/feed'),
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]

View File

@ -1,40 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313609361(BasicNewsRecipe):
news = True
title = u'El Mostrador'
__author__ = 'Alex Mitrani'
description = u'Chilean online newspaper'
publisher = u'La Plaza S.A.'
category = 'news, rss'
oldest_article = 7
max_articles_per_feed = 100
summary_length = 1000
language = 'es_CL'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg'
remove_tags_before = dict(name='div', attrs={'class':'news-heading cf'})
remove_tags_after = dict(name='div', attrs={'class':'footer-actions cf'})
remove_tags = [dict(name='div', attrs={'class':'footer-actions cb cf'})
,dict(name='div', attrs={'class':'news-aside fl'})
,dict(name='div', attrs={'class':'footer-actions cf'})
,dict(name='div', attrs={'class':'user-bar','id':'top'})
,dict(name='div', attrs={'class':'indicators'})
,dict(name='div', attrs={'id':'header'})
]
feeds = [(u'Temas Destacados'
, u'http://www.elmostrador.cl/destacado/feed/')
, (u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/')
, (u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/')
, (u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/')
, (u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/')
, (u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/')
, (u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/')
, (u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/')
]
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313609361(BasicNewsRecipe):
news = True
title = u'El Mostrador'
__author__ = 'Alex Mitrani'
description = u'Chilean online newspaper'
publisher = u'La Plaza S.A.'
category = 'news, rss'
oldest_article = 7
max_articles_per_feed = 100
summary_length = 1000
language = 'es_CL'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg'
remove_tags_before = dict(name='div', attrs={'class':'news-heading cf'})
remove_tags_after = dict(name='div', attrs={'class':'footer-actions cf'})
remove_tags = [dict(name='div', attrs={'class':'footer-actions cb cf'})
,dict(name='div', attrs={'class':'news-aside fl'})
,dict(name='div', attrs={'class':'footer-actions cf'})
,dict(name='div', attrs={'class':'user-bar','id':'top'})
,dict(name='div', attrs={'class':'indicators'})
,dict(name='div', attrs={'id':'header'})
]
feeds = [(u'Temas Destacados'
, u'http://www.elmostrador.cl/destacado/feed/')
, (u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/')
, (u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/')
, (u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/')
, (u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/')
, (u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/')
, (u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/')
, (u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/')
]

View File

@ -1,52 +1,52 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElTiempo02(BasicNewsRecipe):
title = u'Periódico el Tiempo'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
dict(name='div', attrs={'class':'recomend-art'}),
dict(name='div', attrs={'class':'caja-facebook'}),
dict(name='div', attrs={'class':'caja-twitter'}),
dict(name='div', attrs={'class':'caja-buzz'}),
dict(name='div', attrs={'class':'ico-mail2'}),
dict(name='div', attrs={'id':'caja-instapaper'}),
dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
(u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
(u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
(u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
(u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
(u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
(u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
(u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
(u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
(u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
(u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
(u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
(u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
(u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElTiempo02(BasicNewsRecipe):
title = u'Periódico el Tiempo'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
dict(name='div', attrs={'class':'recomend-art'}),
dict(name='div', attrs={'class':'caja-facebook'}),
dict(name='div', attrs={'class':'caja-twitter'}),
dict(name='div', attrs={'class':'caja-buzz'}),
dict(name='div', attrs={'class':'ico-mail2'}),
dict(name='div', attrs={'id':'caja-instapaper'}),
dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
(u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
(u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
(u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
(u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
(u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
(u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
(u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
(u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
(u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
(u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
(u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
(u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
(u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]

View File

@ -1,129 +1,129 @@
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'utf'
ENCHTM = 'utf-8'
directionhtm = 'ltr'
requires_version = (0,7,47)
news = True
title = u'Estad\xe3o'
__author__ = 'Euler Alves'
description = u'Brazilian news from Estad\xe3o'
publisher = u'Estad\xe3o'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()-timedelta(days=2)
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<10:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
SCREENSHOT = 'http://estadao.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
remove_tags = [
dict(name='div',
attrs={'id':[
'bb-md-noticia-tabs'
]})
,dict(name='div',
attrs={'class':[
'tags'
,'discussion'
,'bb-gg adsense_container'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
]
feeds = [
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
if self.THUMBALIZR_API:
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'utf'
ENCHTM = 'utf-8'
directionhtm = 'ltr'
requires_version = (0,7,47)
news = True
title = u'Estad\xe3o'
__author__ = 'Euler Alves'
description = u'Brazilian news from Estad\xe3o'
publisher = u'Estad\xe3o'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()-timedelta(days=2)
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<10:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
SCREENSHOT = 'http://estadao.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
remove_tags = [
dict(name='div',
attrs={'id':[
'bb-md-noticia-tabs'
]})
,dict(name='div',
attrs={'class':[
'tags'
,'discussion'
,'bb-gg adsense_container'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
]
feeds = [
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
if self.THUMBALIZR_API:
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url

View File

@ -1,165 +1,165 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt_BR'
LANGHTM = 'pt-br'
ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1'
directionhtm = 'ltr'
requires_version = (0,7,47)
news = True
title = u'Folha de S\xE3o Paulo'
__author__ = 'Euler Alves and Alex Mitrani'
description = u'Brazilian news from Folha de S\xE3o Paulo'
publisher = u'Folha de S\xE3o Paulo'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
html2lrf_options = [
'--comment', description
,'--category', category
,'--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<6:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
SCREENSHOT = 'http://www1.folha.uol.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
remove_tags = [
dict(name='div',
attrs={'id':[
'articleButton'
,'bookmarklets'
,'ad-180x150-1'
,'contextualAdsArticle'
,'articleEnd'
,'articleComments'
]})
,dict(name='div',
attrs={'class':[
'openBox adslibraryArticle'
,'toolbar'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
,dict(name='li')
]
remove_tags_after = dict(name='div',attrs={'id':'articleEnd'})
feeds = [
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
,(u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml')
,(u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml')
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
,(u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml')
,(u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml')
,(u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml')
,(u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml')
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
,(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml')
,(u'Zapping', u'http://feeds.folha.uol.com.br/colunas/zapping/rss091.xml')
,(u'Cida Santos', u'http://feeds.folha.uol.com.br/colunas/cidasantos/rss091.xml')
,(u'Clóvis Rossi', u'http://feeds.folha.uol.com.br/colunas/clovisrossi/rss091.xml')
,(u'Eliane Cantanhêde', u'http://feeds.folha.uol.com.br/colunas/elianecantanhede/rss091.xml')
,(u'Fernando Canzian', u'http://feeds.folha.uol.com.br/colunas/fernandocanzian/rss091.xml')
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/colunas/gilbertodimenstein/rss091.xml')
,(u'Hélio Schwartsman', u'http://feeds.folha.uol.com.br/colunas/helioschwartsman/rss091.xml')
,(u'Humberto Luiz Peron', u'http://feeds.folha.uol.com.br/colunas/futebolnarede/rss091.xml')
,(u'João Pereira Coutinho', u'http://feeds.folha.uol.com.br/colunas/joaopereiracoutinho/rss091.xml')
,(u'José Antonio Ramalho', u'http://feeds.folha.uol.com.br/colunas/canalaberto/rss091.xml')
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/colunas/kennedyalencar/rss091.xml')
,(u'Luiz Caversan', u'http://feeds.folha.uol.com.br/colunas/luizcaversan/rss091.xml')
,(u'Luiz Rivoiro', u'http://feeds.folha.uol.com.br/colunas/paiepai/rss091.xml')
,(u'Marcelo Leite', u'http://feeds.folha.uol.com.br/colunas/marceloleite/rss091.xml')
,(u'Sérgio Malbergier', u'http://feeds.folha.uol.com.br/colunas/sergiomalbergier/rss091.xml')
,(u'Sylvia Colombo', u'http://feeds.folha.uol.com.br/colunas/sylviacolombo/rss091.xml')
,(u'Valdo Cruz', u'http://feeds.folha.uol.com.br/colunas/valdocruz/rss091.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt_BR'
LANGHTM = 'pt-br'
ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1'
directionhtm = 'ltr'
requires_version = (0,7,47)
news = True
title = u'Folha de S\xE3o Paulo'
__author__ = 'Euler Alves and Alex Mitrani'
description = u'Brazilian news from Folha de S\xE3o Paulo'
publisher = u'Folha de S\xE3o Paulo'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
html2lrf_options = [
'--comment', description
,'--category', category
,'--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<6:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
SCREENSHOT = 'http://www1.folha.uol.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
remove_tags = [
dict(name='div',
attrs={'id':[
'articleButton'
,'bookmarklets'
,'ad-180x150-1'
,'contextualAdsArticle'
,'articleEnd'
,'articleComments'
]})
,dict(name='div',
attrs={'class':[
'openBox adslibraryArticle'
,'toolbar'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
,dict(name='li')
]
remove_tags_after = dict(name='div',attrs={'id':'articleEnd'})
feeds = [
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
,(u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml')
,(u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml')
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
,(u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml')
,(u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml')
,(u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml')
,(u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml')
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
,(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml')
,(u'Zapping', u'http://feeds.folha.uol.com.br/colunas/zapping/rss091.xml')
,(u'Cida Santos', u'http://feeds.folha.uol.com.br/colunas/cidasantos/rss091.xml')
,(u'Clóvis Rossi', u'http://feeds.folha.uol.com.br/colunas/clovisrossi/rss091.xml')
,(u'Eliane Cantanhêde', u'http://feeds.folha.uol.com.br/colunas/elianecantanhede/rss091.xml')
,(u'Fernando Canzian', u'http://feeds.folha.uol.com.br/colunas/fernandocanzian/rss091.xml')
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/colunas/gilbertodimenstein/rss091.xml')
,(u'Hélio Schwartsman', u'http://feeds.folha.uol.com.br/colunas/helioschwartsman/rss091.xml')
,(u'Humberto Luiz Peron', u'http://feeds.folha.uol.com.br/colunas/futebolnarede/rss091.xml')
,(u'João Pereira Coutinho', u'http://feeds.folha.uol.com.br/colunas/joaopereiracoutinho/rss091.xml')
,(u'José Antonio Ramalho', u'http://feeds.folha.uol.com.br/colunas/canalaberto/rss091.xml')
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/colunas/kennedyalencar/rss091.xml')
,(u'Luiz Caversan', u'http://feeds.folha.uol.com.br/colunas/luizcaversan/rss091.xml')
,(u'Luiz Rivoiro', u'http://feeds.folha.uol.com.br/colunas/paiepai/rss091.xml')
,(u'Marcelo Leite', u'http://feeds.folha.uol.com.br/colunas/marceloleite/rss091.xml')
,(u'Sérgio Malbergier', u'http://feeds.folha.uol.com.br/colunas/sergiomalbergier/rss091.xml')
,(u'Sylvia Colombo', u'http://feeds.folha.uol.com.br/colunas/sylviacolombo/rss091.xml')
,(u'Valdo Cruz', u'http://feeds.folha.uol.com.br/colunas/valdocruz/rss091.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url

View File

@ -1,35 +1,35 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
import string
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPlSzczecin(BasicNewsRecipe):
title = u'Gazeta Wyborcza Szczecin'
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'
language = 'pl'
publisher = 'Agora S.A.'
category = 'news, szczecin'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
remove_tags = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
cover_url = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
def get_article_url(self, article):
s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
s = s.group(1)
replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_"}
for (a, b) in replacements.iteritems():
s = string.replace(s, a, b)
s = string.replace(s, "0A", "0")
return "http://"+s
def print_version(self, url):
s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
no1 = s.group(2)
no2 = s.group(3)
return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
import string
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPlSzczecin(BasicNewsRecipe):
title = u'Gazeta Wyborcza Szczecin'
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'
language = 'pl'
publisher = 'Agora S.A.'
category = 'news, szczecin'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
remove_tags = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
cover_url = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
def get_article_url(self, article):
s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
s = s.group(1)
replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_"}
for (a, b) in replacements.iteritems():
s = string.replace(s, a, b)
s = string.replace(s, "0A", "0")
return "http://"+s
def print_version(self, url):
s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
no1 = s.group(2)
no2 = s.group(3)
return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)

View File

@ -1,43 +1,43 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
import string
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1322322819(BasicNewsRecipe):
title = u'GS24.pl (Głos Szczeciński)'
description = u'Internetowy serwis Głosu Szczecińskiego'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'
language = 'pl'
publisher = 'Media Regionalne sp. z o.o.'
category = 'news, szczecin'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
cover_url = "http://www.gs24.pl/images/top_logo.png"
feeds = [
# (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
(u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
(u'Stargard', u'http://www.gs24.pl/stargard.xml'),
(u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
(u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
(u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
(u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
(u'Police', u'http://www.gs24.pl/police.xml'),
(u'Region', u'http://www.gs24.pl/region.xml'),
(u'Sport', u'http://www.gs24.pl/sport.xml'),
]
def get_article_url(self, article):
s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
s = s.group(1)
replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_", "0D" : "?", "0F" : "="}
for (a, b) in replacements.iteritems():
s = string.replace(s, a, b)
s = string.replace(s, "0A", "0")
return "http://"+s
def print_version(self, url):
return url + "&Template=printpicart"
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
import string
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1322322819(BasicNewsRecipe):
title = u'GS24.pl (Głos Szczeciński)'
description = u'Internetowy serwis Głosu Szczecińskiego'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'
language = 'pl'
publisher = 'Media Regionalne sp. z o.o.'
category = 'news, szczecin'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
cover_url = "http://www.gs24.pl/images/top_logo.png"
feeds = [
# (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
(u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
(u'Stargard', u'http://www.gs24.pl/stargard.xml'),
(u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
(u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
(u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
(u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
(u'Police', u'http://www.gs24.pl/police.xml'),
(u'Region', u'http://www.gs24.pl/region.xml'),
(u'Sport', u'http://www.gs24.pl/sport.xml'),
]
def get_article_url(self, article):
s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
s = s.group(1)
replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_", "0D" : "?", "0F" : "="}
for (a, b) in replacements.iteritems():
s = string.replace(s, a, b)
s = string.replace(s, "0A", "0")
return "http://"+s
def print_version(self, url):
return url + "&Template=printpicart"

View File

@ -1,47 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh(BasicNewsRecipe):
title = u'Hankyoreh'
language = 'ko'
description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 5
recursions = 1
max_articles_per_feed = 5
no_stylesheets = True
keep_only_tags = [
dict(name='tr', attrs={'height':['60px']}),
dict(id=['fontSzArea'])
]
remove_tags = [
dict(target='_blank'),
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
dict(name='iframe', attrs={'width':['590']}),
]
remove_tags_after = [
dict(target='_top')
]
feeds = [
('All News','http://www.hani.co.kr/rss/'),
('Politics','http://www.hani.co.kr/rss/politics/'),
('Economy','http://www.hani.co.kr/rss/economy/'),
('Society','http://www.hani.co.kr/rss/society/'),
('International','http://www.hani.co.kr/rss/international/'),
('Culture','http://www.hani.co.kr/rss/culture/'),
('Sports','http://www.hani.co.kr/rss/sports/'),
('Science','http://www.hani.co.kr/rss/science/'),
('Opinion','http://www.hani.co.kr/rss/opinion/'),
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
('Multihani','http://www.hani.co.kr/rss/multihani/'),
('Lead','http://www.hani.co.kr/rss/lead/'),
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
]
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh(BasicNewsRecipe):
title = u'Hankyoreh'
language = 'ko'
description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 5
recursions = 1
max_articles_per_feed = 5
no_stylesheets = True
keep_only_tags = [
dict(name='tr', attrs={'height':['60px']}),
dict(id=['fontSzArea'])
]
remove_tags = [
dict(target='_blank'),
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
dict(name='iframe', attrs={'width':['590']}),
]
remove_tags_after = [
dict(target='_top')
]
feeds = [
('All News','http://www.hani.co.kr/rss/'),
('Politics','http://www.hani.co.kr/rss/politics/'),
('Economy','http://www.hani.co.kr/rss/economy/'),
('Society','http://www.hani.co.kr/rss/society/'),
('International','http://www.hani.co.kr/rss/international/'),
('Culture','http://www.hani.co.kr/rss/culture/'),
('Sports','http://www.hani.co.kr/rss/sports/'),
('Science','http://www.hani.co.kr/rss/science/'),
('Opinion','http://www.hani.co.kr/rss/opinion/'),
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
('Multihani','http://www.hani.co.kr/rss/multihani/'),
('Lead','http://www.hani.co.kr/rss/lead/'),
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
]

View File

@ -1,25 +1,25 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh21(BasicNewsRecipe):
title = u'Hankyoreh21'
language = 'ko'
description = u'The Hankyoreh21 Magazine articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 20
recursions = 1
max_articles_per_feed = 120
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='font', attrs={'class':'t18bk'}),
dict(id=['fontSzArea'])
]
feeds = [
('Hani21','http://h21.hani.co.kr/rss/ '),
]
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh21(BasicNewsRecipe):
title = u'Hankyoreh21'
language = 'ko'
description = u'The Hankyoreh21 Magazine articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 20
recursions = 1
max_articles_per_feed = 120
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='font', attrs={'class':'t18bk'}),
dict(id=['fontSzArea'])
]
feeds = [
('Hani21','http://h21.hani.co.kr/rss/ '),
]

View File

@ -1,47 +1,47 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HindustanTimes(BasicNewsRecipe):
title = u'Huffington Post UK'
language = 'en_GB'
__author__ = 'Krittika Goyal'
oldest_article = 2 #days
max_articles_per_feed = 25
#encoding = 'cp1252'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//div[@class="articleBody"]'
feeds = [
('UK Politics',
'http://www.huffingtonpost.com/feeds/verticals/uk-politics/news.xml'),
('UK Entertainment',
'http://www.huffingtonpost.com/feeds/verticals/uk-entertainment/news.xml'),
('UK Style',
'http://www.huffingtonpost.com/feeds/verticals/uk-style/news.xml'),
('UK Fashion:',
'http://www.huffingtonpost.com/feeds/verticals/uk-fashion/news.xml'),
('UK Universities:',
'http://www.huffingtonpost.com/feeds/verticals/uk-universities-education/news.xml'),
('UK World',
'http://www.huffingtonpost.com/feeds/verticals/uk-world/news.xml'),
('UK Lifestyle',
'http://www.huffingtonpost.com/feeds/verticals/uk-lifestyle/news.xml'),
('UK Comedy',
'http://www.huffingtonpost.com/feeds/verticals/uk-comedy/news.xml'),
('UK Celebrity',
'http://www.huffingtonpost.com/feeds/verticals/uk-celebrity/news.xml'),
('UK Culture',
'http://www.huffingtonpost.com/feeds/verticals/uk-culture/news.xml'),
('UK News',
'http://www.huffingtonpost.com/feeds/verticals/uk/news.xml'),
('UK Tech',
'http://www.huffingtonpost.com/feeds/verticals/uk-tech/news.xml'),
('UK Sport',
'http://www.huffingtonpost.com/feeds/verticals/uk-sport/news.xml'),
]
def get_article_url(self, entry):
if entry.links:
return entry.links[0]['href']
return BasicNewsRecipe.get_article_url(self, entry)
from calibre.web.feeds.news import BasicNewsRecipe
class HindustanTimes(BasicNewsRecipe):
title = u'Huffington Post UK'
language = 'en_GB'
__author__ = 'Krittika Goyal'
oldest_article = 2 #days
max_articles_per_feed = 25
#encoding = 'cp1252'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//div[@class="articleBody"]'
feeds = [
('UK Politics',
'http://www.huffingtonpost.com/feeds/verticals/uk-politics/news.xml'),
('UK Entertainment',
'http://www.huffingtonpost.com/feeds/verticals/uk-entertainment/news.xml'),
('UK Style',
'http://www.huffingtonpost.com/feeds/verticals/uk-style/news.xml'),
('UK Fashion:',
'http://www.huffingtonpost.com/feeds/verticals/uk-fashion/news.xml'),
('UK Universities:',
'http://www.huffingtonpost.com/feeds/verticals/uk-universities-education/news.xml'),
('UK World',
'http://www.huffingtonpost.com/feeds/verticals/uk-world/news.xml'),
('UK Lifestyle',
'http://www.huffingtonpost.com/feeds/verticals/uk-lifestyle/news.xml'),
('UK Comedy',
'http://www.huffingtonpost.com/feeds/verticals/uk-comedy/news.xml'),
('UK Celebrity',
'http://www.huffingtonpost.com/feeds/verticals/uk-celebrity/news.xml'),
('UK Culture',
'http://www.huffingtonpost.com/feeds/verticals/uk-culture/news.xml'),
('UK News',
'http://www.huffingtonpost.com/feeds/verticals/uk/news.xml'),
('UK Tech',
'http://www.huffingtonpost.com/feeds/verticals/uk-tech/news.xml'),
('UK Sport',
'http://www.huffingtonpost.com/feeds/verticals/uk-sport/news.xml'),
]
def get_article_url(self, entry):
if entry.links:
return entry.links[0]['href']
return BasicNewsRecipe.get_article_url(self, entry)

View File

@ -1,110 +1,110 @@
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
class IlManifesto(BasicNewsRecipe):
title = 'Il Manifesto'
__author__ = 'Giacomo Lacava'
description = 'quotidiano comunista - ultima edizione html disponibile'
publication_type = 'newspaper'
publisher = 'il manifesto coop. editrice a r.l.'
language = 'it'
oldest_article = 2
max_articles_per_feed = 100
delay = 1
no_stylesheets = True
simultaneous_downloads = 5
timeout = 30
auto_cleanup = True
remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
remove_tags_after = dict(id='myPrintArea')
manifesto_index = None
manifesto_datestr = None
def _set_manifesto_index(self):
if self.manifesto_index == None:
startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/'
startSoup = self.index_to_soup(startUrl)
lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
del(startSoup)
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
urlsplit = lastEdition.split('/')
self.manifesto_datestr = urlsplit[-1]
if urlsplit[-1] == '':
self.manifesto_datestr = urlsplit[-2]
def get_cover_url(self):
self._set_manifesto_index()
url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
return url
def parse_index(self):
self._set_manifesto_index()
soup = self.index_to_soup(self.manifesto_index)
feedLinks = soup.find('div',id='accordion_inedicola').findAll('a')
result = []
for feed in feedLinks:
articles = []
feedName = feed.find('h2').string
feedUrl = MANIFESTO_BASEURL + feed['href']
feedSoup = self.index_to_soup(feedUrl)
indexRoot = feedSoup.find('div',attrs={'class':'column1'})
for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
artLink = div.find('a')
if artLink is None: continue # empty div
title = artLink.string
url = MANIFESTO_BASEURL + artLink['href']
description = ''
descNode = div.find('div',attrs={'class':'text_12'})
if descNode is not None:
description = descNode.string
author = ''
authNode = div.find('div',attrs={'class':'firma'})
if authNode is not None:
author = authNode.string
articleText = ''
article = {
'title':title,
'url':url,
'date': strftime('%d %B %Y'),
'description': description,
'content': articleText,
'author': author
}
articles.append(article)
result.append((feedName,articles))
return result
def extract_readable_article(self, html, url):
bs = BeautifulSoup(html)
col1 = bs.find('div',attrs={'class':'column1'})
content = col1.find('div',attrs={'class':'bodytext'})
title = bs.find(id='titolo_articolo').string
author = col1.find('span',attrs={'class':'firma'})
subtitle = ''
subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
if subNode is not None:
subtitle = subNode
summary = ''
sommNode = bs.find('div',attrs={'class':'sommario'})
if sommNode is not None:
summary = sommNode
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
del(bs)
return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
class IlManifesto(BasicNewsRecipe):
title = 'Il Manifesto'
__author__ = 'Giacomo Lacava'
description = 'quotidiano comunista - ultima edizione html disponibile'
publication_type = 'newspaper'
publisher = 'il manifesto coop. editrice a r.l.'
language = 'it'
oldest_article = 2
max_articles_per_feed = 100
delay = 1
no_stylesheets = True
simultaneous_downloads = 5
timeout = 30
auto_cleanup = True
remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
remove_tags_after = dict(id='myPrintArea')
manifesto_index = None
manifesto_datestr = None
def _set_manifesto_index(self):
if self.manifesto_index == None:
startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/'
startSoup = self.index_to_soup(startUrl)
lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
del(startSoup)
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
urlsplit = lastEdition.split('/')
self.manifesto_datestr = urlsplit[-1]
if urlsplit[-1] == '':
self.manifesto_datestr = urlsplit[-2]
def get_cover_url(self):
self._set_manifesto_index()
url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
return url
def parse_index(self):
self._set_manifesto_index()
soup = self.index_to_soup(self.manifesto_index)
feedLinks = soup.find('div',id='accordion_inedicola').findAll('a')
result = []
for feed in feedLinks:
articles = []
feedName = feed.find('h2').string
feedUrl = MANIFESTO_BASEURL + feed['href']
feedSoup = self.index_to_soup(feedUrl)
indexRoot = feedSoup.find('div',attrs={'class':'column1'})
for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
artLink = div.find('a')
if artLink is None: continue # empty div
title = artLink.string
url = MANIFESTO_BASEURL + artLink['href']
description = ''
descNode = div.find('div',attrs={'class':'text_12'})
if descNode is not None:
description = descNode.string
author = ''
authNode = div.find('div',attrs={'class':'firma'})
if authNode is not None:
author = authNode.string
articleText = ''
article = {
'title':title,
'url':url,
'date': strftime('%d %B %Y'),
'description': description,
'content': articleText,
'author': author
}
articles.append(article)
result.append((feedName,articles))
return result
def extract_readable_article(self, html, url):
bs = BeautifulSoup(html)
col1 = bs.find('div',attrs={'class':'column1'})
content = col1.find('div',attrs={'class':'bodytext'})
title = bs.find(id='titolo_articolo').string
author = col1.find('span',attrs={'class':'firma'})
subtitle = ''
subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
if subNode is not None:
subtitle = subNode
summary = ''
sommNode = bs.find('div',attrs={'class':'sommario'})
if sommNode is not None:
summary = sommNode
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
del(bs)
return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)

View File

@ -1,34 +1,34 @@
from calibre.web.feeds.news import BasicNewsRecipe
class JakartaGlobe(BasicNewsRecipe):
title = u'Jakarta Globe'
oldest_article = 3
max_articles_per_feed = 100
feeds = [
(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
]
__author__ = 'rty'
pubisher = 'JakartaGlobe.com'
description = 'JakartaGlobe, Indonesia, Newspaper'
category = 'News, Indonesia'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_ID'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
keep_only_tags = [
dict(name='div', attrs={'class':'story'}),
dict(name='span', attrs={'class':'headline'}),
dict(name='div', attrs={'class':'story'}),
dict(name='p', attrs={'id':'bodytext'})
]
from calibre.web.feeds.news import BasicNewsRecipe
class JakartaGlobe(BasicNewsRecipe):
title = u'Jakarta Globe'
oldest_article = 3
max_articles_per_feed = 100
feeds = [
(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
]
__author__ = 'rty'
pubisher = 'JakartaGlobe.com'
description = 'JakartaGlobe, Indonesia, Newspaper'
category = 'News, Indonesia'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_ID'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
keep_only_tags = [
dict(name='div', attrs={'class':'story'}),
dict(name='span', attrs={'class':'headline'}),
dict(name='div', attrs={'class':'story'}),
dict(name='p', attrs={'id':'bodytext'})
]

View File

@ -1,37 +1,37 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
__author__ = 'Vadim Dyadkin'
from calibre.web.feeds.news import BasicNewsRecipe
class Computerra(BasicNewsRecipe):
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
oldest_article = 100
__author__ = 'Vadim Dyadkin (edited by A. Chewi)'
max_articles_per_feed = 50
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables' : True}
simultaneous_downloads = 5
language = 'ru'
description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
remove_tags = [
dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
dict(name='ul', attrs={'class': "related_post"}),
dict(name='p', attrs={'class': 'info'}),
dict(name='a', attrs={'class': 'twitter-share-button'}),
dict(name='a', attrs={'type': 'button_count'}),
dict(name='h2', attrs={})
]
def print_version(self, url):
return url + '?print=true'
#!/usr/bin/python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
__author__ = 'Vadim Dyadkin'
from calibre.web.feeds.news import BasicNewsRecipe
class Computerra(BasicNewsRecipe):
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
oldest_article = 100
__author__ = 'Vadim Dyadkin (edited by A. Chewi)'
max_articles_per_feed = 50
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables' : True}
simultaneous_downloads = 5
language = 'ru'
description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
remove_tags = [
dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
dict(name='ul', attrs={'class': "related_post"}),
dict(name='p', attrs={'class': 'info'}),
dict(name='a', attrs={'class': 'twitter-share-button'}),
dict(name='a', attrs={'type': 'button_count'}),
dict(name='h2', attrs={})
]
def print_version(self, url):
return url + '?print=true'

View File

@ -1,34 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
description = 'Leipziger Volkszeitung Online RSS'
version = 1
title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
description = 'Leipziger Volkszeitung Online RSS'
version = 1
title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'

View File

@ -1,100 +1,100 @@
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image, PixelWand
class LifeHacker(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'en'
LANGHTM = 'en'
language = 'en'
ENCODING = 'utf'
ENCHTM = 'utf-8'
requires_version = (0,7,47)
news = True
title = u'LifeHacker'
__author__ = 'Euler Alves'
description = u'Tips, tricks, and downloads for getting things done.'
publisher = u'lifehacker.com'
author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 20
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = True
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90'
cover_margins = (0,0,'white')
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
remove_tags = [
{'class': 'feedflare'},
dict(name='div',
attrs={'class':[
'ad_container'
,'ad_300x250'
,'ad_interstitial'
,'share-wrap'
,'ad_300x600'
,'ad_perma-footer-adsense'
,'ad_perma-panorama'
,'ad panorama'
,'ad_container'
]})
,dict(name='div',
attrs={'id':[
'agegate_container'
,'agegate_container_rejected'
,'sharemenu-wrap'
]})
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image, PixelWand
class LifeHacker(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'en'
LANGHTM = 'en'
language = 'en'
ENCODING = 'utf'
ENCHTM = 'utf-8'
requires_version = (0,7,47)
news = True
title = u'LifeHacker'
__author__ = 'Euler Alves'
description = u'Tips, tricks, and downloads for getting things done.'
publisher = u'lifehacker.com'
author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 20
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = True
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90'
cover_margins = (0,0,'white')
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
remove_tags = [
{'class': 'feedflare'},
dict(name='div',
attrs={'class':[
'ad_container'
,'ad_300x250'
,'ad_interstitial'
,'share-wrap'
,'ad_300x600'
,'ad_perma-footer-adsense'
,'ad_perma-panorama'
,'ad panorama'
,'ad_container'
]})
,dict(name='div',
attrs={'id':[
'agegate_container'
,'agegate_container_rejected'
,'sharemenu-wrap'
]})
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup

View File

@ -1,85 +1,85 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Matthew Briggs'
__docformat__ = 'restructuredtext en'
'''
http://www.herald sun.com.au/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe):
title = u'Melbourne Herald Sun'
__author__ = u'Ray Hartley'
description = (u'Victorian and National News'
'. You will need to have a subscription to '
'http://www.heraldsun.com.au to get full articles.')
language = 'en_AU'
oldest_article = 2
needs_subscription = 'optional'
max_articles_per_feed = 30
remove_javascript = True
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_AU'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://resources2.news.com.au/cs/heraldsun/images/header-and-footer/logo.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.caption{display: inline; font-size: x-small}
"""
conversion_options = {
'comment' : description
, 'language' : language
}
keep_only_tags = [dict(attrs={'id':'story'})]
remove_tags_before=dict(attrs={'class':'story-header'})
remove_tags_after=dict(attrs={'class':'story-footer'})
remove_tags = [
dict(name=['meta','link','base','iframe','embed','object','media-metadata','media-reference','media-producer'])
,dict(attrs={'class':['story-header-tools','story-sidebar','story-footer','story-summary-list']})
]
remove_attributes=['lang']
feeds = [(u'Breaking News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_breakingnews_206.xml' )
,(u'Business' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_business_207.xml' )
,(u'Entertainment' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_entertainment_208.xml' )
,(u'Health Science' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_health_212.xml' )
,(u'Music' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_music_449.xml' )
,(u'National News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_national_209.xml' )
,(u'Sport News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_sport_213.xml' )
,(u'AFL News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_205.xml' )
,(u'State News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_vic_214.xml' )
,(u'Technology' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tech_215.xml' )
,(u'World News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_world_216.xml' )
,(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/heraldsun_opinion_210.xml' )
,(u'Andrew Bolt' , u'http://blogs.news.com.au/heraldsun/andrewbolt/index.php/xml/rss_2.0/heraldsun/hs_andrewbolt/')
,(u'Afl - St Kilda' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_stkilda_565.xml')
,(u'Terry McCrann' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tmccrann_224.xml' )
,(u'The Other side' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_otherside_211.xml')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username and self.password:
br.open('http://www.heraldsun.com.au')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()
if '>log out' not in raw.lower():
raise ValueError('Failed to log in to www.heralsun'
' are your username and password correct?')
return br
def get_article_url(self, article):
return article.id
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Matthew Briggs'
__docformat__ = 'restructuredtext en'
'''
http://www.herald sun.com.au/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe):
title = u'Melbourne Herald Sun'
__author__ = u'Ray Hartley'
description = (u'Victorian and National News'
'. You will need to have a subscription to '
'http://www.heraldsun.com.au to get full articles.')
language = 'en_AU'
oldest_article = 2
needs_subscription = 'optional'
max_articles_per_feed = 30
remove_javascript = True
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_AU'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://resources2.news.com.au/cs/heraldsun/images/header-and-footer/logo.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.caption{display: inline; font-size: x-small}
"""
conversion_options = {
'comment' : description
, 'language' : language
}
keep_only_tags = [dict(attrs={'id':'story'})]
remove_tags_before=dict(attrs={'class':'story-header'})
remove_tags_after=dict(attrs={'class':'story-footer'})
remove_tags = [
dict(name=['meta','link','base','iframe','embed','object','media-metadata','media-reference','media-producer'])
,dict(attrs={'class':['story-header-tools','story-sidebar','story-footer','story-summary-list']})
]
remove_attributes=['lang']
feeds = [(u'Breaking News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_breakingnews_206.xml' )
,(u'Business' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_business_207.xml' )
,(u'Entertainment' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_entertainment_208.xml' )
,(u'Health Science' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_health_212.xml' )
,(u'Music' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_music_449.xml' )
,(u'National News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_national_209.xml' )
,(u'Sport News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_sport_213.xml' )
,(u'AFL News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_205.xml' )
,(u'State News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_vic_214.xml' )
,(u'Technology' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tech_215.xml' )
,(u'World News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_world_216.xml' )
,(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/heraldsun_opinion_210.xml' )
,(u'Andrew Bolt' , u'http://blogs.news.com.au/heraldsun/andrewbolt/index.php/xml/rss_2.0/heraldsun/hs_andrewbolt/')
,(u'Afl - St Kilda' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_stkilda_565.xml')
,(u'Terry McCrann' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tmccrann_224.xml' )
,(u'The Other side' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_otherside_211.xml')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username and self.password:
br.open('http://www.heraldsun.com.au')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()
if '>log out' not in raw.lower():
raise ValueError('Failed to log in to www.heralsun'
' are your username and password correct?')
return br
def get_article_url(self, article):
return article.id

View File

@ -1,138 +1,138 @@
# -*- coding: utf-8 -*-
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
class Menorca(BasicNewsRecipe):
title = 'Menorca'
publisher = 'Editorial Menorca S.A. '
__author__ = 'M. Sintes'
description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a'
category = 'news, politics, economy, culture, Menorca, Spain '
language = 'es'
enconding = 'cp1252'
no_stylesheets = True
oldest_article = 5
max_articles_per_feed = 25
feeds = [ (u'Principal',u'http://www.menorca.info/rss'),
(u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'),
(u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'),
(u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'),
(u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'),
(u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'),
(u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'),
(u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'),
(u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'),
(u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'),
(u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'),
(u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'),
(u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'),
(u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'),
(u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'),
(u'Balears', u'http://www.menorca.info/rss?seccion=balears')]
#Seccions amb link rss erroni. Es recupera directament de la pagina web
seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'),
(u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'),
(u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')]
remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
remove_tags_after = dict(name='div', attrs={'class':'compartir'})
remove_tags = [dict(id = 'utilidades'),
dict(name='div', attrs={'class': 'totalComentarios'}),
dict(name='div', attrs={'class': 'compartir'}),
dict(name='div', attrs={'class': re.compile("img_noticia*")})
]
def print_version(self, url):
url_imprimir = url + '?d=print'
return url.replace(url, url_imprimir)
def feed_to_index_append(self, feedObject, masterFeed):
# Loop thru the feed object and build the correct type of article list
for feed in feedObject:
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date,
'description' : article.text_summary
}
newArticles.append(newArt)
# append the newly-built list object to the index object # passed in as masterFeed.
masterFeed.append((feed.title,newArticles))
def parse_index(self):
rssFeeds = Feed()
rssFeeds = BasicNewsRecipe.parse_feeds(self)
articles = []
feeds = []
self.feed_to_index_append(rssFeeds,feeds)
for (nom_seccio, url_seccio) in self.seccions_web:
articles = []
soup = self.index_to_soup(url_seccio)
for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}):
h = article.find(['h2','h3'])
titol = self.tag_to_string(h)
a = article.find('a', href=True)
url = 'http://www.menorca.info' + a['href']
desc = None
autor = ''
dt = ''
soup_art = self.index_to_soup(url)
aut = soup_art.find('div', attrs={'class':'autor'})
tx = self.tag_to_string(aut)
ls = re.split('[,;]',tx)
t = len(ls)
if t >= 1:
autor = ls[0]
if t > 1:
d = ls[t-1]
if len(d) >= 10:
lt = len(d) - 10
dt = d[lt:]
self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt)
articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor})
if articles:
feeds.append((nom_seccio, articles))
return feeds
# -*- coding: utf-8 -*-
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
class Menorca(BasicNewsRecipe):
title = 'Menorca'
publisher = 'Editorial Menorca S.A. '
__author__ = 'M. Sintes'
description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a'
category = 'news, politics, economy, culture, Menorca, Spain '
language = 'es'
enconding = 'cp1252'
no_stylesheets = True
oldest_article = 5
max_articles_per_feed = 25
feeds = [ (u'Principal',u'http://www.menorca.info/rss'),
(u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'),
(u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'),
(u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'),
(u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'),
(u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'),
(u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'),
(u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'),
(u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'),
(u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'),
(u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'),
(u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'),
(u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'),
(u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'),
(u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'),
(u'Balears', u'http://www.menorca.info/rss?seccion=balears')]
#Seccions amb link rss erroni. Es recupera directament de la pagina web
seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'),
(u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'),
(u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')]
remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
remove_tags_after = dict(name='div', attrs={'class':'compartir'})
remove_tags = [dict(id = 'utilidades'),
dict(name='div', attrs={'class': 'totalComentarios'}),
dict(name='div', attrs={'class': 'compartir'}),
dict(name='div', attrs={'class': re.compile("img_noticia*")})
]
def print_version(self, url):
url_imprimir = url + '?d=print'
return url.replace(url, url_imprimir)
def feed_to_index_append(self, feedObject, masterFeed):
# Loop thru the feed object and build the correct type of article list
for feed in feedObject:
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date,
'description' : article.text_summary
}
newArticles.append(newArt)
# append the newly-built list object to the index object # passed in as masterFeed.
masterFeed.append((feed.title,newArticles))
def parse_index(self):
rssFeeds = Feed()
rssFeeds = BasicNewsRecipe.parse_feeds(self)
articles = []
feeds = []
self.feed_to_index_append(rssFeeds,feeds)
for (nom_seccio, url_seccio) in self.seccions_web:
articles = []
soup = self.index_to_soup(url_seccio)
for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}):
h = article.find(['h2','h3'])
titol = self.tag_to_string(h)
a = article.find('a', href=True)
url = 'http://www.menorca.info' + a['href']
desc = None
autor = ''
dt = ''
soup_art = self.index_to_soup(url)
aut = soup_art.find('div', attrs={'class':'autor'})
tx = self.tag_to_string(aut)
ls = re.split('[,;]',tx)
t = len(ls)
if t >= 1:
autor = ls[0]
if t > 1:
d = ls[t-1]
if len(d) >= 10:
lt = len(d) - 10
dt = d[lt:]
self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt)
articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor})
if articles:
feeds.append((nom_seccio, articles))
return feeds

View File

@ -1,27 +1,27 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1329123365(BasicNewsRecipe):
title = u'Mobilebulgaria.com'
__author__ = 'M3 Web'
description = 'The biggest Bulgarian site covering mobile consumer electronics. Offers detailed reviews, popular discussion forum, shop and platform for selling new and second hand phones and gadgets.'
category = 'News, Reviews, Offers, Forum'
oldest_article = 45
max_articles_per_feed = 10
language = 'bg'
encoding = 'windows-1251'
no_stylesheets = False
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'bigblock'}),
dict(name='div', attrs={'class':'verybigblock'}),
dict(name='table', attrs={'class':'obiaviresults'}),
dict(name='div', attrs={'class':'forumblock'}),
dict(name='div', attrs={'class':'forumblock_b1'}),
dict(name='div', attrs={'class':'block2_2colswrap'})]
feeds = [(u'News', u'http://www.mobilebulgaria.com/rss_full.php'),
(u'Reviews', u'http://www.mobilebulgaria.com/rss_reviews.php'),
(u'Offers', u'http://www.mobilebulgaria.com/obiavi/rss.php'),
(u'Forum', u'http://www.mobilebulgaria.com/rss_forum_last10.php')]
extra_css = '''
#gallery1 div{display: block; float: left; margin: 0 10px 10px 0;} '''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1329123365(BasicNewsRecipe):
title = u'Mobilebulgaria.com'
__author__ = 'M3 Web'
description = 'The biggest Bulgarian site covering mobile consumer electronics. Offers detailed reviews, popular discussion forum, shop and platform for selling new and second hand phones and gadgets.'
category = 'News, Reviews, Offers, Forum'
oldest_article = 45
max_articles_per_feed = 10
language = 'bg'
encoding = 'windows-1251'
no_stylesheets = False
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'bigblock'}),
dict(name='div', attrs={'class':'verybigblock'}),
dict(name='table', attrs={'class':'obiaviresults'}),
dict(name='div', attrs={'class':'forumblock'}),
dict(name='div', attrs={'class':'forumblock_b1'}),
dict(name='div', attrs={'class':'block2_2colswrap'})]
feeds = [(u'News', u'http://www.mobilebulgaria.com/rss_full.php'),
(u'Reviews', u'http://www.mobilebulgaria.com/rss_reviews.php'),
(u'Offers', u'http://www.mobilebulgaria.com/obiavi/rss.php'),
(u'Forum', u'http://www.mobilebulgaria.com/rss_forum_last10.php')]
extra_css = '''
#gallery1 div{display: block; float: left; margin: 0 10px 10px 0;} '''

View File

@ -15,7 +15,7 @@ class mojegotowanie(BasicNewsRecipe):
language = 'pl'
description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.'
masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif'
cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif'
cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100

View File

@ -1,35 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308572538(BasicNewsRecipe):
title = u'Novinite.com'
__author__ = 'Martin Tsanchev'
description = 'Real time provider of the latest Bulgarian news in English'
category = 'Business, Politics, Society, Sports, Crime, Lifestyle, World, People'
language = 'en_BG'
encoding = 'utf-8'
oldest_article = 7
max_articles_per_feed = 10
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [dict(name='a', attrs={'class':'twitter-share-button'})]
remove_tags_after = dict(id='textsize')
no_stylesheets = True
feeds = [(u'Business', u'http://www.novinite.com/services/news_rdf.php?category_id=1'),
(u'Finance', u'http://www.novinite.com/services/news_rdf.php?category_id=15'),
(u'Energy', u'http://www.novinite.com/services/news_rdf.php?category_id=16'),
(u'Industry', u'http://www.novinite.com/services/news_rdf.php?category_id=17'),
(u'Properties', u'http://www.novinite.com/services/news_rdf.php?category_id=18'),
(u'Politics', u'http://www.novinite.com/services/news_rdf.php?category_id=2'),
(u'Diplomacy', u'http://www.novinite.com/services/news_rdf.php?category_id=20'),
(u'Defense', u'http://www.novinite.com/services/news_rdf.php?category_id=21'),
(u'Bulgaria in EU', u'http://www.novinite.com/services/news_rdf.php?category_id=22'),
(u'Domestic', u'http://www.novinite.com/services/news_rdf.php?category_id=23'),
(u'Society', u'http://www.novinite.com/services/news_rdf.php?category_id=3'),
(u'Environment', u'http://www.novinite.com/services/news_rdf.php?category_id=24'),
(u'Education', u'http://www.novinite.com/services/news_rdf.php?category_id=25'),
(u'Culture', u'http://www.novinite.com/services/news_rdf.php?category_id=26'),
(u'Archaeology', u'http://www.novinite.com/services/news_rdf.php?category_id=34'),
(u'Health', u'http://www.novinite.com/services/news_rdf.php?category_id=62'),
(u'Sports', u'http://www.novinite.com/services/news_rdf.php?category_id=4'),
(u'Crime', u'http://www.novinite.com/services/news_rdf.php?category_id=5'),
(u'Lifestyle', u'http://www.novinite.com/services/news_rdf.php?category_id=6'),
(u'World', u'http://www.novinite.com/services/news_rdf.php?category_id=30')]
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308572538(BasicNewsRecipe):
title = u'Novinite.com'
__author__ = 'Martin Tsanchev'
description = 'Real time provider of the latest Bulgarian news in English'
category = 'Business, Politics, Society, Sports, Crime, Lifestyle, World, People'
language = 'en_BG'
encoding = 'utf-8'
oldest_article = 7
max_articles_per_feed = 10
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [dict(name='a', attrs={'class':'twitter-share-button'})]
remove_tags_after = dict(id='textsize')
no_stylesheets = True
feeds = [(u'Business', u'http://www.novinite.com/services/news_rdf.php?category_id=1'),
(u'Finance', u'http://www.novinite.com/services/news_rdf.php?category_id=15'),
(u'Energy', u'http://www.novinite.com/services/news_rdf.php?category_id=16'),
(u'Industry', u'http://www.novinite.com/services/news_rdf.php?category_id=17'),
(u'Properties', u'http://www.novinite.com/services/news_rdf.php?category_id=18'),
(u'Politics', u'http://www.novinite.com/services/news_rdf.php?category_id=2'),
(u'Diplomacy', u'http://www.novinite.com/services/news_rdf.php?category_id=20'),
(u'Defense', u'http://www.novinite.com/services/news_rdf.php?category_id=21'),
(u'Bulgaria in EU', u'http://www.novinite.com/services/news_rdf.php?category_id=22'),
(u'Domestic', u'http://www.novinite.com/services/news_rdf.php?category_id=23'),
(u'Society', u'http://www.novinite.com/services/news_rdf.php?category_id=3'),
(u'Environment', u'http://www.novinite.com/services/news_rdf.php?category_id=24'),
(u'Education', u'http://www.novinite.com/services/news_rdf.php?category_id=25'),
(u'Culture', u'http://www.novinite.com/services/news_rdf.php?category_id=26'),
(u'Archaeology', u'http://www.novinite.com/services/news_rdf.php?category_id=34'),
(u'Health', u'http://www.novinite.com/services/news_rdf.php?category_id=62'),
(u'Sports', u'http://www.novinite.com/services/news_rdf.php?category_id=4'),
(u'Crime', u'http://www.novinite.com/services/news_rdf.php?category_id=5'),
(u'Lifestyle', u'http://www.novinite.com/services/news_rdf.php?category_id=6'),
(u'World', u'http://www.novinite.com/services/news_rdf.php?category_id=30')]

View File

@ -1,36 +1,36 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
title = u'Periódico Portafolio Colombia'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'),
(u'Economia', u'http://www.portafolio.co/economia/feed'),
(u'Internacional', u'http://www.portafolio.co/internacional/feed'),
(u'Indicadores', u'http://www.portafolio.co/indicadores/feed'),
(u'Opinion', u'http://www.portafolio.co/opinion/feed'),
(u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'),
(u'Herramientas', u'http://www.portafolio.co/herramientas/feed')]
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
title = u'Periódico Portafolio Colombia'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'),
(u'Economia', u'http://www.portafolio.co/economia/feed'),
(u'Internacional', u'http://www.portafolio.co/internacional/feed'),
(u'Indicadores', u'http://www.portafolio.co/indicadores/feed'),
(u'Opinion', u'http://www.portafolio.co/opinion/feed'),
(u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'),
(u'Herramientas', u'http://www.portafolio.co/herramientas/feed')]

View File

@ -1,11 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341570(BasicNewsRecipe):
title = u'Revista Semana'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341570(BasicNewsRecipe):
title = u'Revista Semana'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]

View File

@ -1,28 +1,28 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.rushisaband.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class rushisaband(BasicNewsRecipe):
title = u'Rushisaband'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h4'))
keep_only_tags.append(dict(name = 'h5'))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.rushisaband.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class rushisaband(BasicNewsRecipe):
title = u'Rushisaband'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h4'))
keep_only_tags.append(dict(name = 'h5'))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]

View File

@ -1,29 +1,29 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rybinski.eu
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Rybinski(BasicNewsRecipe):
title = u'Rybinski.eu - economy of the XXI century'
description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'post-meta-1'}),
dict(name = 'div', attrs = {'class' : 'post-meta-2'}),
dict(name = 'div', attrs = {'class' : 'post-comments'})
]
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rybinski.eu
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Rybinski(BasicNewsRecipe):
title = u'Rybinski.eu - economy of the XXI century'
description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'post-meta-1'}),
dict(name = 'div', attrs = {'class' : 'post-meta-2'}),
dict(name = 'div', attrs = {'class' : 'post-comments'})
]

View File

@ -1,22 +1,22 @@
__license__ = 'GPL v3'
__copyright__ = '2011 Neil Grogan'
#
# Silicon Republic Recipe
#
from calibre.web.feeds.news import BasicNewsRecipe
class SiliconRepublic(BasicNewsRecipe):
title = u'Silicon Republic'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Neil Grogan'
language = 'en_IE'
remove_tags = [dict(attrs={'class':['thumb','txt','compactbox','icons','catlist','catlistinner','taglist','taglistinner','social','also-in','also-in-inner','also-in-footer','zonek-dfp','paneladvert','rcadvert','panel','h2b']}),
dict(id=['header','logo','header-right','sitesearch','rsslinks','topnav','topvideos','topvideos-list','topnews','topnews-list','slideshow','slides','compactheader','compactnews','compactfeatures','article-type','contactlinks-header','banner-zone-k-dfp','footer-related','directory-services','also-in-section','featuredrelated1','featuredrelated2','featuredrelated3','featuredrelated4','advert2-dfp']),
dict(name=['script', 'style'])]
feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')]
__license__ = 'GPL v3'
__copyright__ = '2011 Neil Grogan'
#
# Silicon Republic Recipe
#
from calibre.web.feeds.news import BasicNewsRecipe
class SiliconRepublic(BasicNewsRecipe):
title = u'Silicon Republic'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Neil Grogan'
language = 'en_IE'
remove_tags = [dict(attrs={'class':['thumb','txt','compactbox','icons','catlist','catlistinner','taglist','taglistinner','social','also-in','also-in-inner','also-in-footer','zonek-dfp','paneladvert','rcadvert','panel','h2b']}),
dict(id=['header','logo','header-right','sitesearch','rsslinks','topnav','topvideos','topvideos-list','topnews','topnews-list','slideshow','slides','compactheader','compactnews','compactfeatures','article-type','contactlinks-header','banner-zone-k-dfp','footer-related','directory-services','also-in-section','featuredrelated1','featuredrelated2','featuredrelated3','featuredrelated4','advert2-dfp']),
dict(name=['script', 'style'])]
feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')]

View File

@ -1,15 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1296179411(BasicNewsRecipe):
title = u'SPIN Magzine'
__author__ = 'Quistopher'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'Daily Noise Blog | SPIN.com', u'http://www.spin.com/blog/feed'),
(u'It Happened Last Night | SPIN.com', u'http://www.spin.com/it-happened-last-night/feed'),
(u'Album Reviews | SPIN.com', u'http://www.spin.com/album-reviews/feed')
]
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1296179411(BasicNewsRecipe):
title = u'SPIN Magzine'
__author__ = 'Quistopher'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'Daily Noise Blog | SPIN.com', u'http://www.spin.com/blog/feed'),
(u'It Happened Last Night | SPIN.com', u'http://www.spin.com/it-happened-last-night/feed'),
(u'Album Reviews | SPIN.com', u'http://www.spin.com/album-reviews/feed')
]

View File

@ -1,18 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
title = u'Thai Post Daily'
__author__ = 'Chotechai P.'
language = 'th'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
feeds = [(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
def print_version(self, url):
return url.replace(url, 'http://www.thaipost.net/print/' + url [32:])
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-logo'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-site_name'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-breadcrumb'}))
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
title = u'Thai Post Daily'
__author__ = 'Chotechai P.'
language = 'th'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
feeds = [(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
def print_version(self, url):
return url.replace(url, 'http://www.thaipost.net/print/' + url [32:])
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-logo'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-site_name'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-breadcrumb'}))

View File

@ -1,27 +1,27 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313555075(BasicNewsRecipe):
news = True
title = u'The Clinic'
__author__ = 'Alex Mitrani'
description = u'Online version of Chilean satirical weekly'
publisher = u'The Clinic'
category = 'news, politics, Chile, rss'
oldest_article = 7
max_articles_per_feed = 100
summary_length = 1000
language = 'es_CL'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.theclinic.cl/wp-content/themes/tc12m/css/ui/mainLogoTC-top.png'
remove_tags_before = dict(name='article', attrs={'class':'scope bordered'})
remove_tags_after = dict(name='div', attrs={'id':'commentsSection'})
remove_tags = [dict(name='span', attrs={'class':'relTags'})
,dict(name='div', attrs={'class':'articleActivity hdcol'})
,dict(name='div', attrs={'id':'commentsSection'})
]
feeds = [(u'The Clinic Online', u'http://www.theclinic.cl/feed/')]
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313555075(BasicNewsRecipe):
news = True
title = u'The Clinic'
__author__ = 'Alex Mitrani'
description = u'Online version of Chilean satirical weekly'
publisher = u'The Clinic'
category = 'news, politics, Chile, rss'
oldest_article = 7
max_articles_per_feed = 100
summary_length = 1000
language = 'es_CL'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.theclinic.cl/wp-content/themes/tc12m/css/ui/mainLogoTC-top.png'
remove_tags_before = dict(name='article', attrs={'class':'scope bordered'})
remove_tags_after = dict(name='div', attrs={'id':'commentsSection'})
remove_tags = [dict(name='span', attrs={'class':'relTags'})
,dict(name='div', attrs={'class':'articleActivity hdcol'})
,dict(name='div', attrs={'id':'commentsSection'})
]
feeds = [(u'The Clinic Online', u'http://www.theclinic.cl/feed/')]

View File

@ -1,63 +1,63 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TNR(BasicNewsRecipe):
title = 'The New Republic'
__author__ = 'Krittika Goyal'
description = '''The New Republic is a journal of opinion with an emphasis
on politics and domestic and international affairs. It carries feature
articles by staff and contributing editors. The second half of each issue
is devoted to book and the arts, theater, motion pictures, music and art.'''
language = 'en'
encoding = 'UTF-8'
needs_subscription = True
preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.newrepublic.com/user')
br.select_form(nr=1)
try:
br['user'] = self.username
except:
br['name'] = self.username
br['pass'] = self.password
self.log('Logging in...')
raw = br.submit().read()
if 'SIGN OUT' not in raw:
raise ValueError('Failed to log in to tnr.com, check your username and password')
self.log('Logged in successfully')
return br
def parse_index(self):
raw = self.index_to_soup('http://www.newrepublic.com/current-issue', raw=True)
# raw = self.index_to_soup(open('/t/raw.html').read().decode('utf-8'), raw=True)
for pat, sub in self.preprocess_regexps:
raw = pat.sub(sub, raw)
soup = self.index_to_soup(raw)
feed_title = 'The New Republic Magazine Articles'
articles = []
for div in soup.findAll('div', attrs={'class':lambda x: x and 'field-item' in x.split()}):
a = div.find('a', href=True, attrs={'class':lambda x: x != 'author'})
if a is not None:
art_title = self.tag_to_string(a)
url = a.get('href')
num = re.search(r'/(\d+)/', url)
if num is not None:
art = num.group(1)
url = 'http://www.newrepublic.com/node/%s/print'%art
self.log.info('\tFound article:', art_title, 'at', url)
article = {'title':art_title, 'url':url, 'description':'', 'date':''}
articles.append(article)
return [(feed_title, articles)]
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TNR(BasicNewsRecipe):
title = 'The New Republic'
__author__ = 'Krittika Goyal'
description = '''The New Republic is a journal of opinion with an emphasis
on politics and domestic and international affairs. It carries feature
articles by staff and contributing editors. The second half of each issue
is devoted to book and the arts, theater, motion pictures, music and art.'''
language = 'en'
encoding = 'UTF-8'
needs_subscription = True
preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.newrepublic.com/user')
br.select_form(nr=1)
try:
br['user'] = self.username
except:
br['name'] = self.username
br['pass'] = self.password
self.log('Logging in...')
raw = br.submit().read()
if 'SIGN OUT' not in raw:
raise ValueError('Failed to log in to tnr.com, check your username and password')
self.log('Logged in successfully')
return br
def parse_index(self):
raw = self.index_to_soup('http://www.newrepublic.com/current-issue', raw=True)
# raw = self.index_to_soup(open('/t/raw.html').read().decode('utf-8'), raw=True)
for pat, sub in self.preprocess_regexps:
raw = pat.sub(sub, raw)
soup = self.index_to_soup(raw)
feed_title = 'The New Republic Magazine Articles'
articles = []
for div in soup.findAll('div', attrs={'class':lambda x: x and 'field-item' in x.split()}):
a = div.find('a', href=True, attrs={'class':lambda x: x != 'author'})
if a is not None:
art_title = self.tag_to_string(a)
url = a.get('href')
num = re.search(r'/(\d+)/', url)
if num is not None:
art = num.group(1)
url = 'http://www.newrepublic.com/node/%s/print'%art
self.log.info('\tFound article:', art_title, 'at', url)
article = {'title':art_title, 'url':url, 'description':'', 'date':''}
articles.append(article)
return [(feed_title, articles)]

View File

@ -1,76 +1,76 @@
#!/usr/bin/env python
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
utrinski.com.mk
'''
import re
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre import browser
class UtrinskiVesnik(BasicNewsRecipe):
INDEX = 'http://www.utrinski.com.mk/'
title = 'Utrinski Vesnik'
description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
language = 'mk'
remove_javascript = True
publication_type = 'newspaper'
category = 'news, Macedonia'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
## Remove anything before the start of the article.
(r'<body.*?Article start-->', lambda match: '<body>'),
## Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match : '</body>'),
]
]
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
"""
conversion_options = {
'comment' : description,
'tags' : category,
'language' : language,
'linearize_tables' : True
}
def parse_index(self):
feeds = []
# open main page
soup = self.index_to_soup(self.INDEX)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_MainMenu'
for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_MainMenu'}):
sectionTitle = section.contents[0].string
sectionUrl = self.INDEX + section['href'].strip()
# open the anchor link
raw = browser().open_novisit(sectionUrl).read()
sectionSoup = BeautifulSoup(raw)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'
sectionArticles = sectionSoup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_ONLINEArticleTitle'})
articles = []
for sectionArticle in sectionArticles:
# article title = anchor's contents, article url = anchor's href
articleTitle = sectionArticle.contents[0].string.strip()
articleUrl = self.INDEX + sectionArticle['href'].strip()
articleDate = datetime.datetime.today().strftime('%d.%m.%Y')
articles.append({'title': articleTitle, 'url':articleUrl, 'description':'', 'date': articleDate})
if articles:
feeds.append((sectionTitle, articles))
return feeds
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d_%m_%Y')
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'
#!/usr/bin/env python
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
utrinski.com.mk
'''
import re
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre import browser
class UtrinskiVesnik(BasicNewsRecipe):
INDEX = 'http://www.utrinski.com.mk/'
title = 'Utrinski Vesnik'
description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
language = 'mk'
remove_javascript = True
publication_type = 'newspaper'
category = 'news, Macedonia'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
## Remove anything before the start of the article.
(r'<body.*?Article start-->', lambda match: '<body>'),
## Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match : '</body>'),
]
]
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
"""
conversion_options = {
'comment' : description,
'tags' : category,
'language' : language,
'linearize_tables' : True
}
def parse_index(self):
feeds = []
# open main page
soup = self.index_to_soup(self.INDEX)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_MainMenu'
for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_MainMenu'}):
sectionTitle = section.contents[0].string
sectionUrl = self.INDEX + section['href'].strip()
# open the anchor link
raw = browser().open_novisit(sectionUrl).read()
sectionSoup = BeautifulSoup(raw)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'
sectionArticles = sectionSoup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_ONLINEArticleTitle'})
articles = []
for sectionArticle in sectionArticles:
# article title = anchor's contents, article url = anchor's href
articleTitle = sectionArticle.contents[0].string.strip()
articleUrl = self.INDEX + sectionArticle['href'].strip()
articleDate = datetime.datetime.today().strftime('%d.%m.%Y')
articles.append({'title': articleTitle, 'url':articleUrl, 'description':'', 'date': articleDate})
if articles:
feeds.append((sectionTitle, articles))
return feeds
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d_%m_%Y')
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'

View File

@ -1,19 +1,19 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1350731826(BasicNewsRecipe):
title = u'Yazihane'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'A Erdogan'
description = 'Sports Blog'
publisher = 'yazihaneden.com'
category = 'sports, basketball, nba, cycling, euroleague'
no_stylesheets = True
use_embedded_content = False
masthead_url = 'http://www.yazihaneden.com/wp-content/uploads/Untitled-1.png'
language = 'tr'
keep_only_tags = [ dict(name='div', attrs={'id':re.compile('(^|| )post-($|| )', re.DOTALL)})]
remove_tags_after = dict(name='div', attrs={'class':'post-footer clear'})
feeds = [(u'Yazihane', u'http://www.yazihaneden.com/feed/')]
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1350731826(BasicNewsRecipe):
title = u'Yazihane'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'A Erdogan'
description = 'Sports Blog'
publisher = 'yazihaneden.com'
category = 'sports, basketball, nba, cycling, euroleague'
no_stylesheets = True
use_embedded_content = False
masthead_url = 'http://www.yazihaneden.com/wp-content/uploads/Untitled-1.png'
language = 'tr'
keep_only_tags = [ dict(name='div', attrs={'id':re.compile('(^|| )post-($|| )', re.DOTALL)})]
remove_tags_after = dict(name='div', attrs={'class':'post-footer clear'})
feeds = [(u'Yazihane', u'http://www.yazihaneden.com/feed/')]

View File

@ -1,208 +1,208 @@
@echo OFF
REM Calibre-Portable.bat
REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬
REM
REM Batch File to start a Calibre configuration on Windows
REM giving explicit control of the location of:
REM - Calibre Program Files
REM - Calibre Library Files
REM - Calibre Config Files
REM - Calibre Metadata database
REM - Calibre Source files
REM - Calibre Temp Files
REM By setting the paths correctly it can be used to run:
REM - A "portable calibre" off a USB stick.
REM - A network installation with local metadata database
REM (for performance) and books stored on a network share
REM - A local installation using customised settings
REM
REM If trying to run off a USB stick then the folder structure
REM shown below is recommended (relative to the location of
REM this batch file). This can structure can also be used
REM when running of a local hard disk if you want to get the
REM level of control this batch file provides.
REM - Calibre2 Location of program files
REM - CalibreConfig Location of Configuration files
REM - CalibreLibrary Location of Books and metadata
REM - CalibreSource Location of Calibre Source files (Optional)
REM
REM This batch file is designed so that if you create the recommended
REM folder structure then it can be used 'as is' without modification.
REM
REM More information on the Environment Variables used by Calibre can
REM be found at:
REM http://manual.calibre-ebook.com/customize.html#environment-variables
REM
REM The documentation for this file in the Calibre manual can be found at:
REM http://manual.calibre-ebook.com/portable.html
REM
REM CHANGE HISTORY
REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬
REM 22 Jan 2012 itimpi - Updated to keep it in line with the calibre-portable.sh
REM file for Linux systems
REM -------------------------------------
REM Set up Calibre Config folder
REM
REM This is where user specific settings
REM are stored.
REM -------------------------------------
IF EXIST CalibreConfig (
SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
ECHO CONFIG FILES: %cd%\CalibreConfig
)
REM --------------------------------------------------------------
REM Specify Location of ebooks
REM
REM Location where Book files are located
REM Either set explicit path, or if running from a USB stick
REM a relative path can be used to avoid need to know the
REM drive letter of the USB stick.
REM
REM Comment out any of the following that are not to be used
REM (although leaving them in does not really matter)
REM --------------------------------------------------------------
IF EXIST U:\eBooks\CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
ECHO LIBRARY FILES: U:\eBOOKS\CalibreLibrary
)
IF EXIST CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
ECHO LIBRARY FILES: %cd%\CalibreLibrary
)
REM --------------------------------------------------------------
REM Specify Location of metadata database (optional)
REM
REM Location where the metadata.db file is located. If not set
REM the same location as Books files will be assumed. This.
REM option is typically set to get better performance when the
REM Library is on a (slow) network drive. Putting the metadata.db
REM file locally then makes gives a big performance improvement.
REM
REM NOTE. If you use this option, then the ability to switch
REM libraries within Calibre will be disabled. Therefore
REM you do not want to set it if the metadata.db file
REM is at the same location as the book files.
REM
REM Another point to watch is that plugins can cause problems
REM as they often store absolute path information
REM --------------------------------------------------------------
IF EXIST %cd%\CalibreMetadata\metadata.db (
IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
ECHO DATABASE: %cd%\CalibreMetadata\metadata.db
ECHO '
ECHO ***CAUTION*** Library Switching will be disabled
ECHO '
)
)
REM --------------------------------------------------------------
REM Specify Location of source (optional)
REM
REM It is easy to run Calibre from source
REM Just set the environment variable to where the source is located
REM When running from source the GUI will have a '*' after the version.
REM number that is displayed at the bottom of the Calibre main screen.
REM
REM More information on setting up a development environment can
REM be found at:
REM http://manual.calibre-ebook.com/develop.html#develop
REM --------------------------------------------------------------
IF EXIST CalibreSource\src (
SET CALIBRE_DEVELOP_FROM=%cd%\CalibreSource\src
ECHO SOURCE FILES: %cd%\CalibreSource\src
) ELSE (
ECHO SOURCE FILES: *** Not being Used ***
)
REM --------------------------------------------------------------
REM Specify Location of calibre Windows binaries (optional)
REM
REM To avoid needing Calibre to be set in the search path, ensure
REM that Calibre Program Files is current directory when starting.
REM The following test falls back to using search path .
REM This folder can be populated by copying the Calibre2 folder from
REM an existing installation or by installing direct to here.
REM
REM NOTE. Do not try and put both Windows and Linux binaries into
REM same folder as this can cause problems.
REM --------------------------------------------------------------
IF EXIST %cd%\Calibre2 (
CD %cd%\Calibre2
ECHO PROGRAM FILES: %cd%
) ELSE (
ECHO PROGRAM FILES: *** Use System search PATH ***
)
REM --------------------------------------------------------------
REM Location of Calibre Temporary files (optional)
REM
REM Calibre creates a lot of temporary files while running
REM In theory these are removed when Calibre finishes, but
REM in practise files can be left behind (particularily if
REM any errors occur). Using this option allows some
REM explicit clean-up of these files.
REM If not set Calibre uses the normal system TEMP location
REM --------------------------------------------------------------
SET CALIBRE_TEMP_DIR=%TEMP%\CALIBRE_TEMP
ECHO TEMPORARY FILES: %CALIBRE_TEMP_DIR%
IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%"
MKDIR "%CALIBRE_TEMP_DIR%"
REM set the following for any components that do
REM not obey the CALIBRE_TEMP_DIR setting
SET TMP=%CALIBRE_TEMP_DIR%
SET TEMP=%CALIBRE_TEMP_DIR%
REM --------------------------------------------------------------
REM Set the Interface language (optional)
REM
REM If not set Calibre uses the language set in Preferences
REM --------------------------------------------------------------
SET CALIBRE_OVERRIDE_LANG=EN
ECHO INTERFACE LANGUAGE: %CALIBRE_OVERRIDE_LANG%
REM ----------------------------------------------------------
REM The following gives a chance to check the settings before
REM starting Calibre. It can be commented out if not wanted.
REM ----------------------------------------------------------
ECHO '
ECHO Press CTRL-C if you do not want to continue
PAUSE
REM --------------------------------------------------------
REM Start up the calibre program.
REM
REM The use of 'belownormal' priority helps keep the system
REM responsive while Calibre is running. Within Calibre itself
REM the backgound processes should be set to run with 'low' priority.
REM Using the START command starts up Calibre in a separate process.
REM If used without /WAIT option it launches Calibre and contines batch file.
REM normally this would simply run off the end and close the Command window.
REM Use with /WAIT to wait until Calibre completes to run a task on exit
REM --------------------------------------------------------
ECHO "Starting up Calibre"
ECHO OFF
ECHO %cd%
START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"
@echo OFF
REM Calibre-Portable.bat
REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬
REM
REM Batch File to start a Calibre configuration on Windows
REM giving explicit control of the location of:
REM - Calibre Program Files
REM - Calibre Library Files
REM - Calibre Config Files
REM - Calibre Metadata database
REM - Calibre Source files
REM - Calibre Temp Files
REM By setting the paths correctly it can be used to run:
REM - A "portable calibre" off a USB stick.
REM - A network installation with local metadata database
REM (for performance) and books stored on a network share
REM - A local installation using customised settings
REM
REM If trying to run off a USB stick then the folder structure
REM shown below is recommended (relative to the location of
REM this batch file). This can structure can also be used
REM when running of a local hard disk if you want to get the
REM level of control this batch file provides.
REM - Calibre2 Location of program files
REM - CalibreConfig Location of Configuration files
REM - CalibreLibrary Location of Books and metadata
REM - CalibreSource Location of Calibre Source files (Optional)
REM
REM This batch file is designed so that if you create the recommended
REM folder structure then it can be used 'as is' without modification.
REM
REM More information on the Environment Variables used by Calibre can
REM be found at:
REM http://manual.calibre-ebook.com/customize.html#environment-variables
REM
REM The documentation for this file in the Calibre manual can be found at:
REM http://manual.calibre-ebook.com/portable.html
REM
REM CHANGE HISTORY
REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬
REM 22 Jan 2012 itimpi - Updated to keep it in line with the calibre-portable.sh
REM file for Linux systems
REM -------------------------------------
REM Set up Calibre Config folder
REM
REM This is where user specific settings
REM are stored.
REM -------------------------------------
IF EXIST CalibreConfig (
SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
ECHO CONFIG FILES: %cd%\CalibreConfig
)
REM --------------------------------------------------------------
REM Specify Location of ebooks
REM
REM Location where Book files are located
REM Either set explicit path, or if running from a USB stick
REM a relative path can be used to avoid need to know the
REM drive letter of the USB stick.
REM
REM Comment out any of the following that are not to be used
REM (although leaving them in does not really matter)
REM --------------------------------------------------------------
IF EXIST U:\eBooks\CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
ECHO LIBRARY FILES: U:\eBOOKS\CalibreLibrary
)
IF EXIST CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
ECHO LIBRARY FILES: %cd%\CalibreLibrary
)
REM --------------------------------------------------------------
REM Specify Location of metadata database (optional)
REM
REM Location where the metadata.db file is located. If not set
REM the same location as Books files will be assumed. This.
REM option is typically set to get better performance when the
REM Library is on a (slow) network drive. Putting the metadata.db
REM file locally then makes gives a big performance improvement.
REM
REM NOTE. If you use this option, then the ability to switch
REM libraries within Calibre will be disabled. Therefore
REM you do not want to set it if the metadata.db file
REM is at the same location as the book files.
REM
REM Another point to watch is that plugins can cause problems
REM as they often store absolute path information
REM --------------------------------------------------------------
IF EXIST %cd%\CalibreMetadata\metadata.db (
IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
ECHO DATABASE: %cd%\CalibreMetadata\metadata.db
ECHO '
ECHO ***CAUTION*** Library Switching will be disabled
ECHO '
)
)
REM --------------------------------------------------------------
REM Specify Location of source (optional)
REM
REM It is easy to run Calibre from source
REM Just set the environment variable to where the source is located
REM When running from source the GUI will have a '*' after the version.
REM number that is displayed at the bottom of the Calibre main screen.
REM
REM More information on setting up a development environment can
REM be found at:
REM http://manual.calibre-ebook.com/develop.html#develop
REM --------------------------------------------------------------
IF EXIST CalibreSource\src (
SET CALIBRE_DEVELOP_FROM=%cd%\CalibreSource\src
ECHO SOURCE FILES: %cd%\CalibreSource\src
) ELSE (
ECHO SOURCE FILES: *** Not being Used ***
)
REM --------------------------------------------------------------
REM Specify Location of calibre Windows binaries (optional)
REM
REM To avoid needing Calibre to be set in the search path, ensure
REM that Calibre Program Files is current directory when starting.
REM The following test falls back to using search path .
REM This folder can be populated by copying the Calibre2 folder from
REM an existing installation or by installing direct to here.
REM
REM NOTE. Do not try and put both Windows and Linux binaries into
REM same folder as this can cause problems.
REM --------------------------------------------------------------
IF EXIST %cd%\Calibre2 (
CD %cd%\Calibre2
ECHO PROGRAM FILES: %cd%
) ELSE (
ECHO PROGRAM FILES: *** Use System search PATH ***
)
REM --------------------------------------------------------------
REM Location of Calibre Temporary files (optional)
REM
REM Calibre creates a lot of temporary files while running
REM In theory these are removed when Calibre finishes, but
REM in practise files can be left behind (particularily if
REM any errors occur). Using this option allows some
REM explicit clean-up of these files.
REM If not set Calibre uses the normal system TEMP location
REM --------------------------------------------------------------
SET CALIBRE_TEMP_DIR=%TEMP%\CALIBRE_TEMP
ECHO TEMPORARY FILES: %CALIBRE_TEMP_DIR%
IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%"
MKDIR "%CALIBRE_TEMP_DIR%"
REM set the following for any components that do
REM not obey the CALIBRE_TEMP_DIR setting
SET TMP=%CALIBRE_TEMP_DIR%
SET TEMP=%CALIBRE_TEMP_DIR%
REM --------------------------------------------------------------
REM Set the Interface language (optional)
REM
REM If not set Calibre uses the language set in Preferences
REM --------------------------------------------------------------
SET CALIBRE_OVERRIDE_LANG=EN
ECHO INTERFACE LANGUAGE: %CALIBRE_OVERRIDE_LANG%
REM ----------------------------------------------------------
REM The following gives a chance to check the settings before
REM starting Calibre. It can be commented out if not wanted.
REM ----------------------------------------------------------
ECHO '
ECHO Press CTRL-C if you do not want to continue
PAUSE
REM --------------------------------------------------------
REM Start up the calibre program.
REM
REM The use of 'belownormal' priority helps keep the system
REM responsive while Calibre is running. Within Calibre itself
REM the backgound processes should be set to run with 'low' priority.
REM Using the START command starts up Calibre in a separate process.
REM If used without /WAIT option it launches Calibre and contines batch file.
REM normally this would simply run off the end and close the Command window.
REM Use with /WAIT to wait until Calibre completes to run a task on exit
REM --------------------------------------------------------
ECHO "Starting up Calibre"
ECHO OFF
ECHO %cd%
START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"

View File

@ -1,10 +1,10 @@
/**
* Version: 1.0 Alpha-1
* Build Date: 13-Nov-2007
* Copyright (c) 2006-2007, Coolite Inc. (http://www.coolite.com/). All rights reserved.
* License: Licensed under The MIT License. See license.txt and http://www.datejs.com/license/.
* Website: http://www.datejs.com/ or http://www.coolite.com/datejs/
*/
/**
* Version: 1.0 Alpha-1
* Build Date: 13-Nov-2007
* Copyright (c) 2006-2007, Coolite Inc. (http://www.coolite.com/). All rights reserved.
* License: Licensed under The MIT License. See license.txt and http://www.datejs.com/license/.
* Website: http://www.datejs.com/ or http://www.coolite.com/datejs/
*/
Date.CultureInfo={name:"en-US",englishName:"English (United States)",nativeName:"English (United States)",dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],abbreviatedDayNames:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],shortestDayNames:["Su","Mo","Tu","We","Th","Fr","Sa"],firstLetterDayNames:["S","M","T","W","T","F","S"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],abbreviatedMonthNames:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],amDesignator:"AM",pmDesignator:"PM",firstDayOfWeek:0,twoDigitYearMax:2029,dateElementOrder:"mdy",formatPatterns:{shortDate:"M/d/yyyy",longDate:"dddd, MMMM dd, yyyy",shortTime:"h:mm tt",longTime:"h:mm:ss tt",fullDateTime:"dddd, MMMM dd, yyyy h:mm:ss tt",sortableDateTime:"yyyy-MM-ddTHH:mm:ss",universalSortableDateTime:"yyyy-MM-dd HH:mm:ssZ",rfc1123:"ddd, dd MMM yyyy HH:mm:ss GMT",monthDay:"MMMM dd",yearMonth:"MMMM, yyyy"},regexPatterns:{jan:/^jan(uary)?/i,feb:/^feb(ruary)?/i,mar:/^mar(ch)?/i,apr:/^apr(il)?/i,may:/^may/i,jun:/^jun(e)?/i,jul:/^jul(y)?/i,aug:/^aug(ust)?/i,sep:/^sep(t(ember)?)?/i,oct:/^oct(ober)?/i,nov:/^nov(ember)?/i,dec:/^dec(ember)?/i,sun:/^su(n(day)?)?/i,mon:/^mo(n(day)?)?/i,tue:/^tu(e(s(day)?)?)?/i,wed:/^we(d(nesday)?)?/i,thu:/^th(u(r(s(day)?)?)?)?/i,fri:/^fr(i(day)?)?/i,sat:/^sa(t(urday)?)?/i,future:/^next/i,past:/^last|past|prev(ious)?/i,add:/^(\+|after|from)/i,subtract:/^(\-|before|ago)/i,yesterday:/^yesterday/i,today:/^t(oday)?/i,tomorrow:/^tomorrow/i,now:/^n(ow)?/i,millisecond:/^ms|milli(second)?s?/i,second:/^sec(ond)?s?/i,minute:/^min(ute)?s?/i,hour:/^h(ou)?rs?/i,week:/^w(ee)?k/i,month:/^m(o(nth)?s?)?/i,day:/^d(ays?)?/i,year:/^y((ea)?rs?)?/i,shortMeridian:/^(a|p)/i,longMeridian:/^(a\.?m?\.?|p\.?m?\.?)/i,timezone:/^((e(s|d)t|c(s|d)t|m(s|d)t|p(s|d)t)|((gmt)?\s*(\+|\-)\s*\d\d\d\d?)|gmt)/i,ordinalSuffix:/^\s*(st|nd|rd|th)/i,timeContext:/^\s*(\:|a|p)/i},abbreviatedTimeZoneStandard:{GMT:"-000",EST:"-0400",CST:"-0500",MST:"-0600",PST:"-0700"},abbreviatedTimeZoneDST:{GMT:"-000",EDT:"-0500",CDT:"-0600",MDT:"-0700",PDT:"-0800"}};
Date.getMonthNumberFromName=function(name){var n=Date.CultureInfo.monthNames,m=Date.CultureInfo.abbreviatedMonthNames,s=name.toLowerCase();for(var i=0;i<n.length;i++){if(n[i].toLowerCase()==s||m[i].toLowerCase()==s){return i;}}
return-1;};Date.getDayNumberFromName=function(name){var n=Date.CultureInfo.dayNames,m=Date.CultureInfo.abbreviatedDayNames,o=Date.CultureInfo.shortestDayNames,s=name.toLowerCase();for(var i=0;i<n.length;i++){if(n[i].toLowerCase()==s||m[i].toLowerCase()==s){return i;}}
@ -101,4 +101,4 @@ return _.any.apply(null,rx);}else{return _get(fx);}};g._formats=g.formats(["yyyy
return g._start.call({},s);};}());Date._parse=Date.parse;Date.parse=function(s){var r=null;if(!s){return null;}
try{r=Date.Grammar.start.call({},s);}catch(e){return null;}
return((r[1].length===0)?r[0]:null);};Date.getParseFunction=function(fx){var fn=Date.Grammar.formats(fx);return function(s){var r=null;try{r=fn.call({},s);}catch(e){return null;}
return((r[1].length===0)?r[0]:null);};};Date.parseExact=function(s,fx){return Date.getParseFunction(fx)(s);};
return((r[1].length===0)?r[0]:null);};};Date.parseExact=function(s,fx){return Date.getParseFunction(fx)(s);};

View File

@ -1,215 +1,215 @@
/**
* jQuery.ScrollTo
* Copyright (c) 2007-2009 Ariel Flesler - aflesler(at)gmail(dot)com | http://flesler.blogspot.com
* Dual licensed under MIT and GPL.
* Date: 5/25/2009
*
* @projectDescription Easy element scrolling using jQuery.
* http://flesler.blogspot.com/2007/10/jqueryscrollto.html
* Works with jQuery +1.2.6. Tested on FF 2/3, IE 6/7/8, Opera 9.5/6, Safari 3, Chrome 1 on WinXP.
*
* @author Ariel Flesler
* @version 1.4.2
*
* @id jQuery.scrollTo
* @id jQuery.fn.scrollTo
* @param {String, Number, DOMElement, jQuery, Object} target Where to scroll the matched elements.
* The different options for target are:
* - A number position (will be applied to all axes).
* - A string position ('44', '100px', '+=90', etc ) will be applied to all axes
* - A jQuery/DOM element ( logically, child of the element to scroll )
* - A string selector, that will be relative to the element to scroll ( 'li:eq(2)', etc )
* - A hash { top:x, left:y }, x and y can be any kind of number/string like above.
* - A percentage of the container's dimension/s, for example: 50% to go to the middle.
* - The string 'max' for go-to-end.
* @param {Number} duration The OVERALL length of the animation, this argument can be the settings object instead.
* @param {Object,Function} settings Optional set of settings or the onAfter callback.
* @option {String} axis Which axis must be scrolled, use 'x', 'y', 'xy' or 'yx'.
* @option {Number} duration The OVERALL length of the animation.
* @option {String} easing The easing method for the animation.
* @option {Boolean} margin If true, the margin of the target element will be deducted from the final position.
* @option {Object, Number} offset Add/deduct from the end position. One number for both axes or { top:x, left:y }.
* @option {Object, Number} over Add/deduct the height/width multiplied by 'over', can be { top:x, left:y } when using both axes.
* @option {Boolean} queue If true, and both axis are given, the 2nd axis will only be animated after the first one ends.
* @option {Function} onAfter Function to be called after the scrolling ends.
* @option {Function} onAfterFirst If queuing is activated, this function will be called after the first scrolling ends.
* @return {jQuery} Returns the same jQuery object, for chaining.
*
* @desc Scroll to a fixed position
* @example $('div').scrollTo( 340 );
*
* @desc Scroll relatively to the actual position
* @example $('div').scrollTo( '+=340px', { axis:'y' } );
*
* @dec Scroll using a selector (relative to the scrolled element)
* @example $('div').scrollTo( 'p.paragraph:eq(2)', 500, { easing:'swing', queue:true, axis:'xy' } );
*
* @ Scroll to a DOM element (same for jQuery object)
* @example var second_child = document.getElementById('container').firstChild.nextSibling;
* $('#container').scrollTo( second_child, { duration:500, axis:'x', onAfter:function(){
* alert('scrolled!!');
* }});
*
* @desc Scroll on both axes, to different values
* @example $('div').scrollTo( { top: 300, left:'+=200' }, { axis:'xy', offset:-20 } );
*/
;(function( $ ){
var $scrollTo = $.scrollTo = function( target, duration, settings ){
$(window).scrollTo( target, duration, settings );
};
$scrollTo.defaults = {
axis:'xy',
duration: parseFloat($.fn.jquery) >= 1.3 ? 0 : 1
};
// Returns the element that needs to be animated to scroll the window.
// Kept for backwards compatibility (specially for localScroll & serialScroll)
$scrollTo.window = function( scope ){
return $(window)._scrollable();
};
// Hack, hack, hack :)
// Returns the real elements to scroll (supports window/iframes, documents and regular nodes)
$.fn._scrollable = function(){
return this.map(function(){
var elem = this,
isWin = !elem.nodeName || $.inArray( elem.nodeName.toLowerCase(), ['iframe','#document','html','body'] ) != -1;
if( !isWin )
return elem;
var doc = (elem.contentWindow || elem).document || elem.ownerDocument || elem;
return $.browser.safari || doc.compatMode == 'BackCompat' ?
doc.body :
doc.documentElement;
});
};
$.fn.scrollTo = function( target, duration, settings ){
if( typeof duration == 'object' ){
settings = duration;
duration = 0;
}
if( typeof settings == 'function' )
settings = { onAfter:settings };
if( target == 'max' )
target = 9e9;
settings = $.extend( {}, $scrollTo.defaults, settings );
// Speed is still recognized for backwards compatibility
duration = duration || settings.speed || settings.duration;
// Make sure the settings are given right
settings.queue = settings.queue && settings.axis.length > 1;
if( settings.queue )
// Let's keep the overall duration
duration /= 2;
settings.offset = both( settings.offset );
settings.over = both( settings.over );
return this._scrollable().each(function(){
var elem = this,
$elem = $(elem),
targ = target, toff, attr = {},
win = $elem.is('html,body');
switch( typeof targ ){
// A number will pass the regex
case 'number':
case 'string':
if( /^([+-]=)?\d+(\.\d+)?(px|%)?$/.test(targ) ){
targ = both( targ );
// We are done
break;
}
// Relative selector, no break!
targ = $(targ,this);
case 'object':
// DOMElement / jQuery
if( targ.is || targ.style )
// Get the real position of the target
toff = (targ = $(targ)).offset();
}
$.each( settings.axis.split(''), function( i, axis ){
var Pos = axis == 'x' ? 'Left' : 'Top',
pos = Pos.toLowerCase(),
key = 'scroll' + Pos,
old = elem[key],
max = $scrollTo.max(elem, axis);
if( toff ){// jQuery / DOMElement
attr[key] = toff[pos] + ( win ? 0 : old - $elem.offset()[pos] );
// If it's a dom element, reduce the margin
if( settings.margin ){
attr[key] -= parseInt(targ.css('margin'+Pos)) || 0;
attr[key] -= parseInt(targ.css('border'+Pos+'Width')) || 0;
}
attr[key] += settings.offset[pos] || 0;
if( settings.over[pos] )
// Scroll to a fraction of its width/height
attr[key] += targ[axis=='x'?'width':'height']() * settings.over[pos];
}else{
var val = targ[pos];
// Handle percentage values
attr[key] = val.slice && val.slice(-1) == '%' ?
parseFloat(val) / 100 * max
: val;
}
// Number or 'number'
if( /^\d+$/.test(attr[key]) )
// Check the limits
attr[key] = attr[key] <= 0 ? 0 : Math.min( attr[key], max );
// Queueing axes
if( !i && settings.queue ){
// Don't waste time animating, if there's no need.
if( old != attr[key] )
// Intermediate animation
animate( settings.onAfterFirst );
// Don't animate this axis again in the next iteration.
delete attr[key];
}
});
animate( settings.onAfter );
function animate( callback ){
$elem.animate( attr, duration, settings.easing, callback && function(){
callback.call(this, target, settings);
});
};
}).end();
};
// Max scrolling position, works on quirks mode
// It only fails (not too badly) on IE, quirks mode.
$scrollTo.max = function( elem, axis ){
var Dim = axis == 'x' ? 'Width' : 'Height',
scroll = 'scroll'+Dim;
if( !$(elem).is('html,body') )
return elem[scroll] - $(elem)[Dim.toLowerCase()]();
var size = 'client' + Dim,
html = elem.ownerDocument.documentElement,
body = elem.ownerDocument.body;
return Math.max( html[scroll], body[scroll] )
- Math.min( html[size] , body[size] );
};
function both( val ){
return typeof val == 'object' ? val : { top:val, left:val };
};
/**
* jQuery.ScrollTo
* Copyright (c) 2007-2009 Ariel Flesler - aflesler(at)gmail(dot)com | http://flesler.blogspot.com
* Dual licensed under MIT and GPL.
* Date: 5/25/2009
*
* @projectDescription Easy element scrolling using jQuery.
* http://flesler.blogspot.com/2007/10/jqueryscrollto.html
* Works with jQuery +1.2.6. Tested on FF 2/3, IE 6/7/8, Opera 9.5/6, Safari 3, Chrome 1 on WinXP.
*
* @author Ariel Flesler
* @version 1.4.2
*
* @id jQuery.scrollTo
* @id jQuery.fn.scrollTo
* @param {String, Number, DOMElement, jQuery, Object} target Where to scroll the matched elements.
* The different options for target are:
* - A number position (will be applied to all axes).
* - A string position ('44', '100px', '+=90', etc ) will be applied to all axes
* - A jQuery/DOM element ( logically, child of the element to scroll )
* - A string selector, that will be relative to the element to scroll ( 'li:eq(2)', etc )
* - A hash { top:x, left:y }, x and y can be any kind of number/string like above.
* - A percentage of the container's dimension/s, for example: 50% to go to the middle.
* - The string 'max' for go-to-end.
* @param {Number} duration The OVERALL length of the animation, this argument can be the settings object instead.
* @param {Object,Function} settings Optional set of settings or the onAfter callback.
* @option {String} axis Which axis must be scrolled, use 'x', 'y', 'xy' or 'yx'.
* @option {Number} duration The OVERALL length of the animation.
* @option {String} easing The easing method for the animation.
* @option {Boolean} margin If true, the margin of the target element will be deducted from the final position.
* @option {Object, Number} offset Add/deduct from the end position. One number for both axes or { top:x, left:y }.
* @option {Object, Number} over Add/deduct the height/width multiplied by 'over', can be { top:x, left:y } when using both axes.
* @option {Boolean} queue If true, and both axis are given, the 2nd axis will only be animated after the first one ends.
* @option {Function} onAfter Function to be called after the scrolling ends.
* @option {Function} onAfterFirst If queuing is activated, this function will be called after the first scrolling ends.
* @return {jQuery} Returns the same jQuery object, for chaining.
*
* @desc Scroll to a fixed position
* @example $('div').scrollTo( 340 );
*
* @desc Scroll relatively to the actual position
* @example $('div').scrollTo( '+=340px', { axis:'y' } );
*
* @dec Scroll using a selector (relative to the scrolled element)
* @example $('div').scrollTo( 'p.paragraph:eq(2)', 500, { easing:'swing', queue:true, axis:'xy' } );
*
* @ Scroll to a DOM element (same for jQuery object)
* @example var second_child = document.getElementById('container').firstChild.nextSibling;
* $('#container').scrollTo( second_child, { duration:500, axis:'x', onAfter:function(){
* alert('scrolled!!');
* }});
*
* @desc Scroll on both axes, to different values
* @example $('div').scrollTo( { top: 300, left:'+=200' }, { axis:'xy', offset:-20 } );
*/
;(function( $ ){
var $scrollTo = $.scrollTo = function( target, duration, settings ){
$(window).scrollTo( target, duration, settings );
};
$scrollTo.defaults = {
axis:'xy',
duration: parseFloat($.fn.jquery) >= 1.3 ? 0 : 1
};
// Returns the element that needs to be animated to scroll the window.
// Kept for backwards compatibility (specially for localScroll & serialScroll)
$scrollTo.window = function( scope ){
return $(window)._scrollable();
};
// Hack, hack, hack :)
// Returns the real elements to scroll (supports window/iframes, documents and regular nodes)
$.fn._scrollable = function(){
return this.map(function(){
var elem = this,
isWin = !elem.nodeName || $.inArray( elem.nodeName.toLowerCase(), ['iframe','#document','html','body'] ) != -1;
if( !isWin )
return elem;
var doc = (elem.contentWindow || elem).document || elem.ownerDocument || elem;
return $.browser.safari || doc.compatMode == 'BackCompat' ?
doc.body :
doc.documentElement;
});
};
$.fn.scrollTo = function( target, duration, settings ){
if( typeof duration == 'object' ){
settings = duration;
duration = 0;
}
if( typeof settings == 'function' )
settings = { onAfter:settings };
if( target == 'max' )
target = 9e9;
settings = $.extend( {}, $scrollTo.defaults, settings );
// Speed is still recognized for backwards compatibility
duration = duration || settings.speed || settings.duration;
// Make sure the settings are given right
settings.queue = settings.queue && settings.axis.length > 1;
if( settings.queue )
// Let's keep the overall duration
duration /= 2;
settings.offset = both( settings.offset );
settings.over = both( settings.over );
return this._scrollable().each(function(){
var elem = this,
$elem = $(elem),
targ = target, toff, attr = {},
win = $elem.is('html,body');
switch( typeof targ ){
// A number will pass the regex
case 'number':
case 'string':
if( /^([+-]=)?\d+(\.\d+)?(px|%)?$/.test(targ) ){
targ = both( targ );
// We are done
break;
}
// Relative selector, no break!
targ = $(targ,this);
case 'object':
// DOMElement / jQuery
if( targ.is || targ.style )
// Get the real position of the target
toff = (targ = $(targ)).offset();
}
$.each( settings.axis.split(''), function( i, axis ){
var Pos = axis == 'x' ? 'Left' : 'Top',
pos = Pos.toLowerCase(),
key = 'scroll' + Pos,
old = elem[key],
max = $scrollTo.max(elem, axis);
if( toff ){// jQuery / DOMElement
attr[key] = toff[pos] + ( win ? 0 : old - $elem.offset()[pos] );
// If it's a dom element, reduce the margin
if( settings.margin ){
attr[key] -= parseInt(targ.css('margin'+Pos)) || 0;
attr[key] -= parseInt(targ.css('border'+Pos+'Width')) || 0;
}
attr[key] += settings.offset[pos] || 0;
if( settings.over[pos] )
// Scroll to a fraction of its width/height
attr[key] += targ[axis=='x'?'width':'height']() * settings.over[pos];
}else{
var val = targ[pos];
// Handle percentage values
attr[key] = val.slice && val.slice(-1) == '%' ?
parseFloat(val) / 100 * max
: val;
}
// Number or 'number'
if( /^\d+$/.test(attr[key]) )
// Check the limits
attr[key] = attr[key] <= 0 ? 0 : Math.min( attr[key], max );
// Queueing axes
if( !i && settings.queue ){
// Don't waste time animating, if there's no need.
if( old != attr[key] )
// Intermediate animation
animate( settings.onAfterFirst );
// Don't animate this axis again in the next iteration.
delete attr[key];
}
});
animate( settings.onAfter );
function animate( callback ){
$elem.animate( attr, duration, settings.easing, callback && function(){
callback.call(this, target, settings);
});
};
}).end();
};
// Max scrolling position, works on quirks mode
// It only fails (not too badly) on IE, quirks mode.
$scrollTo.max = function( elem, axis ){
var Dim = axis == 'x' ? 'Width' : 'Height',
scroll = 'scroll'+Dim;
if( !$(elem).is('html,body') )
return elem[scroll] - $(elem)[Dim.toLowerCase()]();
var size = 'client' + Dim,
html = elem.ownerDocument.documentElement,
body = elem.ownerDocument.body;
return Math.max( html[scroll], body[scroll] )
- Math.min( html[size] , body[size] );
};
function both( val ){
return typeof val == 'object' ? val : { top:val, left:val };
};
})( jQuery );

View File

@ -1,67 +1,67 @@
__license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import re, htmlentitydefs
from future_builtins import map
_ascii_pat = None
def clean_ascii_chars(txt, charlist=None):
r'''
Remove ASCII control chars.
This is all control chars except \t, \n and \r
'''
if not txt:
return ''
global _ascii_pat
if _ascii_pat is None:
chars = set(xrange(32))
chars.add(127)
for x in (9, 10, 13):
chars.remove(x)
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None:
pat = _ascii_pat
else:
pat = re.compile(u'|'.join(map(unichr, charlist)))
return pat.sub('', txt)
def allowed(x):
x = ord(x)
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
def clean_xml_chars(unicode_string):
return u''.join(filter(allowed, unicode_string))
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text, rm=False, rchar=u''):
def fixup(m, rm=rm, rchar=rchar):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
if rm:
return rchar # replace by char
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
__license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import re, htmlentitydefs
from future_builtins import map
_ascii_pat = None
def clean_ascii_chars(txt, charlist=None):
r'''
Remove ASCII control chars.
This is all control chars except \t, \n and \r
'''
if not txt:
return ''
global _ascii_pat
if _ascii_pat is None:
chars = set(xrange(32))
chars.add(127)
for x in (9, 10, 13):
chars.remove(x)
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None:
pat = _ascii_pat
else:
pat = re.compile(u'|'.join(map(unichr, charlist)))
return pat.sub('', txt)
def allowed(x):
x = ord(x)
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
def clean_xml_chars(unicode_string):
return u''.join(filter(allowed, unicode_string))
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text, rm=False, rchar=u''):
def fixup(m, rm=rm, rchar=rchar):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
if rm:
return rchar # replace by char
return text # leave as is
return re.sub("&#?\w+;", fixup, text)

View File

@ -1,25 +1,25 @@
Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the CherryPy Team nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the CherryPy Team nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,175 +1,175 @@
from lxml import etree
from html5lib.treebuilders.etree import tag_regexp
from gettext import gettext
_ = gettext
import _base
from html5lib.constants import voidElements
from html5lib import ihatexml
class Root(object):
def __init__(self, et):
self.elementtree = et
self.children = []
if et.docinfo.internalDTD:
self.children.append(Doctype(self, et.docinfo.root_name,
et.docinfo.public_id,
et.docinfo.system_url))
root = et.getroot()
node = root
while node.getprevious() is not None:
node = node.getprevious()
while node is not None:
self.children.append(node)
node = node.getnext()
self.text = None
self.tail = None
def __getitem__(self, key):
return self.children[key]
def getnext(self):
return None
def __len__(self):
return 1
class Doctype(object):
def __init__(self, root_node, name, public_id, system_id):
self.root_node = root_node
self.name = name
self.public_id = public_id
self.system_id = system_id
self.text = None
self.tail = None
def getnext(self):
return self.root_node.children[1]
class FragmentRoot(Root):
def __init__(self, children):
self.children = [FragmentWrapper(self, child) for child in children]
self.text = self.tail = None
def getnext(self):
return None
class FragmentWrapper(object):
def __init__(self, fragment_root, obj):
self.root_node = fragment_root
self.obj = obj
if hasattr(self.obj, 'text'):
self.text = self.obj.text
else:
self.text = None
if hasattr(self.obj, 'tail'):
self.tail = self.obj.tail
else:
self.tail = None
self.isstring = isinstance(obj, basestring)
def __getattr__(self, name):
return getattr(self.obj, name)
def getnext(self):
siblings = self.root_node.children
idx = siblings.index(self)
if idx < len(siblings) - 1:
return siblings[idx + 1]
else:
return None
def __getitem__(self, key):
return self.obj[key]
def __nonzero__(self):
return bool(self.obj)
def getparent(self):
return None
def __str__(self):
return str(self.obj)
def __len__(self):
return len(self.obj)
class TreeWalker(_base.NonRecursiveTreeWalker):
def __init__(self, tree):
if hasattr(tree, "getroot"):
tree = Root(tree)
elif isinstance(tree, list):
tree = FragmentRoot(tree)
_base.NonRecursiveTreeWalker.__init__(self, tree)
self.filter = ihatexml.InfosetFilter()
def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
return _base.TEXT, getattr(node, key)
elif isinstance(node, Root):
return (_base.DOCUMENT,)
elif isinstance(node, Doctype):
return _base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and node.isstring:
return _base.TEXT, node
elif node.tag == etree.Comment:
return _base.COMMENT, node.text
else:
#This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match:
namespace, tag = match.groups()
else:
namespace = None
tag = node.tag
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
[(self.filter.fromXmlName(name), value) for
name,value in node.attrib.iteritems()],
len(node) > 0 or node.text)
def getFirstChild(self, node):
assert not isinstance(node, tuple), _("Text nodes have no children")
assert len(node) or node.text, "Node has no children"
if node.text:
return (node, "text")
else:
return node[0]
def getNextSibling(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
if key == "text":
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
# because node[0] might evaluate to False if it has no child element
if len(node):
return node[0]
else:
return None
else: # tail
return node.getnext()
return node.tail and (node, "tail") or node.getnext()
def getParentNode(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
if key == "text":
return node
# else: fallback to "normal" processing
return node.getparent()
from lxml import etree
from html5lib.treebuilders.etree import tag_regexp
from gettext import gettext
_ = gettext
import _base
from html5lib.constants import voidElements
from html5lib import ihatexml
class Root(object):
def __init__(self, et):
self.elementtree = et
self.children = []
if et.docinfo.internalDTD:
self.children.append(Doctype(self, et.docinfo.root_name,
et.docinfo.public_id,
et.docinfo.system_url))
root = et.getroot()
node = root
while node.getprevious() is not None:
node = node.getprevious()
while node is not None:
self.children.append(node)
node = node.getnext()
self.text = None
self.tail = None
def __getitem__(self, key):
return self.children[key]
def getnext(self):
return None
def __len__(self):
return 1
class Doctype(object):
def __init__(self, root_node, name, public_id, system_id):
self.root_node = root_node
self.name = name
self.public_id = public_id
self.system_id = system_id
self.text = None
self.tail = None
def getnext(self):
return self.root_node.children[1]
class FragmentRoot(Root):
def __init__(self, children):
self.children = [FragmentWrapper(self, child) for child in children]
self.text = self.tail = None
def getnext(self):
return None
class FragmentWrapper(object):
def __init__(self, fragment_root, obj):
self.root_node = fragment_root
self.obj = obj
if hasattr(self.obj, 'text'):
self.text = self.obj.text
else:
self.text = None
if hasattr(self.obj, 'tail'):
self.tail = self.obj.tail
else:
self.tail = None
self.isstring = isinstance(obj, basestring)
def __getattr__(self, name):
return getattr(self.obj, name)
def getnext(self):
siblings = self.root_node.children
idx = siblings.index(self)
if idx < len(siblings) - 1:
return siblings[idx + 1]
else:
return None
def __getitem__(self, key):
return self.obj[key]
def __nonzero__(self):
return bool(self.obj)
def getparent(self):
return None
def __str__(self):
return str(self.obj)
def __len__(self):
return len(self.obj)
class TreeWalker(_base.NonRecursiveTreeWalker):
def __init__(self, tree):
if hasattr(tree, "getroot"):
tree = Root(tree)
elif isinstance(tree, list):
tree = FragmentRoot(tree)
_base.NonRecursiveTreeWalker.__init__(self, tree)
self.filter = ihatexml.InfosetFilter()
def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
return _base.TEXT, getattr(node, key)
elif isinstance(node, Root):
return (_base.DOCUMENT,)
elif isinstance(node, Doctype):
return _base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and node.isstring:
return _base.TEXT, node
elif node.tag == etree.Comment:
return _base.COMMENT, node.text
else:
#This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match:
namespace, tag = match.groups()
else:
namespace = None
tag = node.tag
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
[(self.filter.fromXmlName(name), value) for
name,value in node.attrib.iteritems()],
len(node) > 0 or node.text)
def getFirstChild(self, node):
assert not isinstance(node, tuple), _("Text nodes have no children")
assert len(node) or node.text, "Node has no children"
if node.text:
return (node, "text")
else:
return node[0]
def getNextSibling(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
if key == "text":
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
# because node[0] might evaluate to False if it has no child element
if len(node):
return node[0]
else:
return None
else: # tail
return node.getnext()
return node.tail and (node, "tail") or node.getnext()
def getParentNode(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
if key == "text":
return node
# else: fallback to "normal" processing
return node.getparent()

View File

@ -1,81 +1,81 @@
ACKNOWLEDGMENTS
* RAR text compression algorithm is based on Dmitry Shkarin PPMII
and Dmitry Subbotin carryless rangecoder public domain source code.
You may find it in ftp.elf.stuba.sk/pub/pc/pack.
* RAR encryption includes parts of code from Szymon Stefanek
and Brian Gladman AES implementations also as Steve Reid SHA-1 source.
---------------------------------------------------------------------------
Copyright (c) 2002, Dr Brian Gladman < >, Worcester, UK.
All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Source code of this package also as other cryptographic technology
and computing project related links are available on Brian Gladman's
web site: http://www.gladman.me.uk
* RAR uses CRC32 function based on Intel Slicing-by-8 algorithm.
Original Intel Slicing-by-8 code is available here:
http://sourceforge.net/projects/slicing-by-8/
Original Intel Slicing-by-8 code is licensed under BSD License
available at http://www.opensource.org/licenses/bsd-license.html
Copyright (c) 2004-2006 Intel Corporation.
All Rights Reserved
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
* Useful hints provided by Alexander Khoroshev and Bulat Ziganshin allowed
to significantly improve RAR compression and speed.
ACKNOWLEDGMENTS
* RAR text compression algorithm is based on Dmitry Shkarin PPMII
and Dmitry Subbotin carryless rangecoder public domain source code.
You may find it in ftp.elf.stuba.sk/pub/pc/pack.
* RAR encryption includes parts of code from Szymon Stefanek
and Brian Gladman AES implementations also as Steve Reid SHA-1 source.
---------------------------------------------------------------------------
Copyright (c) 2002, Dr Brian Gladman < >, Worcester, UK.
All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Source code of this package also as other cryptographic technology
and computing project related links are available on Brian Gladman's
web site: http://www.gladman.me.uk
* RAR uses CRC32 function based on Intel Slicing-by-8 algorithm.
Original Intel Slicing-by-8 code is available here:
http://sourceforge.net/projects/slicing-by-8/
Original Intel Slicing-by-8 code is licensed under BSD License
available at http://www.opensource.org/licenses/bsd-license.html
Copyright (c) 2004-2006 Intel Corporation.
All Rights Reserved
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
* Useful hints provided by Alexander Khoroshev and Bulat Ziganshin allowed
to significantly improve RAR compression and speed.