Replace CRLF line endings

This commit is contained in:
Kovid Goyal 2013-05-28 11:42:53 +05:30
parent a1ce980d99
commit c0f549625a
47 changed files with 6457 additions and 6457 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,157 +1,157 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) --> <!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" <svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve"> viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
<filter id="filter5365"> <filter id="filter5365">
<feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur> <feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
</filter> </filter>
<g id="layer1"> <g id="layer1">
</g> </g>
<g id="layer2"> <g id="layer2">
<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005 <polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005
171.429,297.005 "/> 171.429,297.005 "/>
<g id="path5265" filter="url(#filter5365)"> <g id="path5265" filter="url(#filter5365)">
<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/> <polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09 <polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09
119.953,80.636 70.397,97.084 "/> 119.953,80.636 70.397,97.084 "/>
</g> </g>
<g id="path5267" filter="url(#filter5365)"> <g id="path5267" filter="url(#filter5365)">
<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068 <path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
l2.322,16.553L118.639,100.902z"/> l2.322,16.553L118.639,100.902z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432 <path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/> c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
</g> </g>
<g id="path5269" filter="url(#filter5365)"> <g id="path5269" filter="url(#filter5365)">
<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812 <path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
C68.936,101.726,70.711,98.81,70.711,98.81z"/> C68.936,101.726,70.711,98.81,70.711,98.81z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986 <path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/> c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
</g> </g>
<g id="path5271" filter="url(#filter5365)"> <g id="path5271" filter="url(#filter5365)">
<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208 <path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
C17.974,94.288,17.113,87.874,21.479,79.607z"/> C17.974,94.288,17.113,87.874,21.479,79.607z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019 <path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/> l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
</g> </g>
<g id="path5273" filter="url(#filter5365)"> <g id="path5273" filter="url(#filter5365)">
<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346 <path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/> l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897 <path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139 l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
L120.871,99.092z"/> L120.871,99.092z"/>
</g> </g>
<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/> <path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982 <path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/> c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse"> <radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#B5FFA6"/> <stop offset="0" style="stop-color:#B5FFA6"/>
<stop offset="1" style="stop-color:#76E976"/> <stop offset="1" style="stop-color:#76E976"/>
</radialGradient> </radialGradient>
<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742 <path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/> l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d=" <path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071 M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
l0.356,14.644L118.22,97.921z"/> l0.356,14.644L118.22,97.921z"/>
<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029 <path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
C66.471,100.649,68.068,97.629,68.068,97.629z"/> C66.471,100.649,68.068,97.629,68.068,97.629z"/>
<g id="path5419" filter="url(#filter5365)"> <g id="path5419" filter="url(#filter5365)">
<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/> <polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233 <polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233
106.738,52.778 57.183,69.227 "/> 106.738,52.778 57.183,69.227 "/>
</g> </g>
<g id="path5421" filter="url(#filter5365)"> <g id="path5421" filter="url(#filter5365)">
<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069 <path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
l2.322,16.552L105.424,73.045z"/> l2.322,16.552L105.424,73.045z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2 <path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/> c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
</g> </g>
<g id="path5423" filter="url(#filter5365)"> <g id="path5423" filter="url(#filter5365)">
<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812 <path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
C55.721,73.869,57.497,70.953,57.497,70.953z"/> C55.721,73.869,57.497,70.953,57.497,70.953z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777 <path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/> c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
</g> </g>
<g id="path5425" filter="url(#filter5365)"> <g id="path5425" filter="url(#filter5365)">
<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062 <path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
C4.759,66.431,3.899,60.017,8.265,51.751z"/> C4.759,66.431,3.899,60.017,8.265,51.751z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018 <path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/> L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
</g> </g>
<g id="path5427" filter="url(#filter5365)"> <g id="path5427" filter="url(#filter5365)">
<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725 <path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/> l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959 <path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139 L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
L107.656,71.234z"/> L107.656,71.234z"/>
</g> </g>
<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779 <path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
l49.548,18.171L54.54,67.985L6.102,50.193z"/> l49.548,18.171L54.54,67.985L6.102,50.193z"/>
<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701 <path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/> c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse"> <radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#789DED"/> <stop offset="0" style="stop-color:#789DED"/>
<stop offset="1" style="stop-color:#2381E8"/> <stop offset="1" style="stop-color:#2381E8"/>
</radialGradient> </radialGradient>
<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01 <path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/> c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5 <path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064 L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
z"/> z"/>
<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028 <path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
C53.256,72.793,54.854,69.772,54.854,69.772z"/> C53.256,72.793,54.854,69.772,54.854,69.772z"/>
<g id="path5447" filter="url(#filter5365)"> <g id="path5447" filter="url(#filter5365)">
<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/> <polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/>
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305 <polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305
123.882,28.85 74.326,45.299 "/> 123.882,28.85 74.326,45.299 "/>
</g> </g>
<g id="path5449" filter="url(#filter5365)"> <g id="path5449" filter="url(#filter5365)">
<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069 <path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
l2.321,16.552L122.567,49.116z"/> l2.321,16.552L122.567,49.116z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271 <path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/> c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
</g> </g>
<g id="path5451" filter="url(#filter5365)"> <g id="path5451" filter="url(#filter5365)">
<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812 <path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
C72.863,49.94,74.641,47.024,74.641,47.024z"/> C72.863,49.94,74.641,47.024,74.641,47.024z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849 <path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/> c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
</g> </g>
<g id="path5453" filter="url(#filter5365)"> <g id="path5453" filter="url(#filter5365)">
<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133 <path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
C21.902,42.502,21.042,36.088,25.408,27.822z"/> C21.902,42.502,21.042,36.088,25.408,27.822z"/>
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018 <path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/> L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
</g> </g>
<g id="path5455" filter="url(#filter5365)"> <g id="path5455" filter="url(#filter5365)">
<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725 <path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/> l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168 <path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/> c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
</g> </g>
<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/> <path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778 <path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/> c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse"> <radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
<stop offset="0" style="stop-color:#FD8A8A"/> <stop offset="0" style="stop-color:#FD8A8A"/>
<stop offset="1" style="stop-color:#FF7878"/> <stop offset="1" style="stop-color:#FF7878"/>
</radialGradient> </radialGradient>
<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125 <path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/> c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)"> <linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
<stop offset="0" style="stop-color:#FFFFFF"/> <stop offset="0" style="stop-color:#FFFFFF"/>
<stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/> <stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
</linearGradient> </linearGradient>
<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/> <path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5 <path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643 l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
L122.148,46.135z"/> L122.148,46.135z"/>
<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028 <path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
C70.399,48.864,71.997,45.844,71.997,45.844z"/> C70.399,48.864,71.997,45.844,71.997,45.844z"/>
</g> </g>
</svg> </svg>

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 473 KiB

After

Width:  |  Height:  |  Size: 472 KiB

View File

@ -1,32 +1,32 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) --> <!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
width="181px" height="212px" viewBox="0 0 181 212" enable-background="new 0 0 181 212" xml:space="preserve"> width="181px" height="212px" viewBox="0 0 181 212" enable-background="new 0 0 181 212" xml:space="preserve">
<g> <g>
<path fill="#FFFFFF" d="M105.134,97.504l-3.598,2.688l-4.197-5.618l-5.754,4.299l-1.733-2.32 <path fill="#FFFFFF" d="M105.134,97.504l-3.598,2.688l-4.197-5.618l-5.754,4.299l-1.733-2.32
c19.007-16.602,22.311-45.299,6.955-65.852C80.691,9.133,50.143,4.711,28.574,20.826s-25.99,46.663-9.875,68.232 c19.007-16.602,22.311-45.299,6.955-65.852C80.691,9.133,50.143,4.711,28.574,20.826s-25.99,46.663-9.875,68.232
c15.355,20.553,43.812,25.521,65.122,12l1.732,2.319l-5.755,4.3l4.198,5.619l-3.598,2.688l68.083,91.125l24.734-18.48 c15.355,20.553,43.812,25.521,65.122,12l1.732,2.319l-5.755,4.3l4.198,5.619l-3.598,2.688l68.083,91.125l24.734-18.48
L105.134,97.504z M29.113,81.277C17.296,65.46,20.538,43.058,36.355,31.24c15.816-11.817,38.219-8.575,50.036,7.242 L105.134,97.504z M29.113,81.277C17.296,65.46,20.538,43.058,36.355,31.24c15.816-11.817,38.219-8.575,50.036,7.242
c11.817,15.817,8.575,38.22-7.241,50.037C63.333,100.337,40.931,97.094,29.113,81.277z"/> c11.817,15.817,8.575,38.22-7.241,50.037C63.333,100.337,40.931,97.094,29.113,81.277z"/>
<g> <g>
<g> <g>
<path fill="#010101" d="M28.574,15.642C7.006,31.756,2.584,62.305,18.699,83.874c16.114,21.568,46.663,25.989,68.231,9.875 <path fill="#010101" d="M28.574,15.642C7.006,31.756,2.584,62.305,18.699,83.874c16.114,21.568,46.663,25.989,68.231,9.875
c21.569-16.115,25.99-46.664,9.876-68.232C80.691,3.948,50.143-0.473,28.574,15.642z M79.15,83.335 c21.569-16.115,25.99-46.664,9.876-68.232C80.691,3.948,50.143-0.473,28.574,15.642z M79.15,83.335
c-15.817,11.817-38.22,8.574-50.037-7.242c-11.817-15.817-8.575-38.219,7.242-50.037c15.816-11.817,38.219-8.575,50.036,7.242 c-15.817,11.817-38.22,8.574-50.037-7.242c-11.817-15.817-8.575-38.219,7.242-50.037c15.816-11.817,38.219-8.575,50.036,7.242
C98.209,49.115,94.967,71.517,79.15,83.335z"/> C98.209,49.115,94.967,71.517,79.15,83.335z"/>
</g> </g>
<rect x="83.577" y="89.507" transform="matrix(0.8011 -0.5986 0.5986 0.8011 -39.0685 71.0375)" fill="#010101" width="7.528" height="9.579"/> <rect x="83.577" y="89.507" transform="matrix(0.8011 -0.5986 0.5986 0.8011 -39.0685 71.0375)" fill="#010101" width="7.528" height="9.579"/>
<rect x="81.819" y="94.546" transform="matrix(0.8011 -0.5985 0.5985 0.8011 -42.3354 75.7256)" fill="#991A37" width="21.894" height="14.026"/> <rect x="81.819" y="94.546" transform="matrix(0.8011 -0.5985 0.5985 0.8011 -42.3354 75.7256)" fill="#991A37" width="21.894" height="14.026"/>
<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="98.5879" y1="122.3535" x2="129.4668" y2="122.3535" gradientTransform="matrix(0.8011 -0.5985 0.5985 0.8011 -37.7672 117.3501)"> <linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="98.5879" y1="122.3535" x2="129.4668" y2="122.3535" gradientTransform="matrix(0.8011 -0.5985 0.5985 0.8011 -37.7672 117.3501)">
<stop offset="0" style="stop-color:#000000"/> <stop offset="0" style="stop-color:#000000"/>
<stop offset="0.3983" style="stop-color:#FFFFFF"/> <stop offset="0.3983" style="stop-color:#FFFFFF"/>
<stop offset="1" style="stop-color:#000000"/> <stop offset="1" style="stop-color:#000000"/>
</linearGradient> </linearGradient>
<polygon fill="url(#SVGID_1_)" points="173.217,183.444 148.482,201.925 80.399,110.8 105.134,92.319 "/> <polygon fill="url(#SVGID_1_)" points="173.217,183.444 148.482,201.925 80.399,110.8 105.134,92.319 "/>
</g> </g>
</g> </g>
</svg> </svg>

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@ -1,30 +1,30 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>' __author__ = 'MrStefan <mrstefaan@gmail.com>'
''' '''
www.autosport.com www.autosport.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class autosport(BasicNewsRecipe): class autosport(BasicNewsRecipe):
title = u'Autosport' title = u'Autosport'
__author__ = 'MrStefan <mrstefaan@gmail.com>' __author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB' language = 'en_GB'
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
masthead_url='http://cdn.images.autosport.com/asdotcom.gif' masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
remove_empty_feeds= True remove_empty_feeds= True
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript=True
no_stylesheets=True no_stylesheets=True
keep_only_tags =[] keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'})) keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'})) keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'})) keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
keep_only_tags.append(dict(name = 'p')) keep_only_tags.append(dict(name = 'p'))
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')] feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]

View File

@ -1,45 +1,45 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.''' '''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
class Buchreport(BasicNewsRecipe) : class Buchreport(BasicNewsRecipe) :
__author__ = 'a.peter' __author__ = 'a.peter'
__copyright__ = 'a.peter' __copyright__ = 'a.peter'
__license__ = 'GPL v3' __license__ = 'GPL v3'
description = 'Buchreport' description = 'Buchreport'
version = 4 version = 4
title = u'Buchreport' title = u'Buchreport'
timefmt = ' [%d.%m.%Y]' timefmt = ' [%d.%m.%Y]'
encoding = 'cp1252' encoding = 'cp1252'
language = 'de' language = 'de'
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \ extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
article, articledate, articledescription { text-align: left; } \ article, articledate, articledescription { text-align: left; } \
h1 { text-align: left; font-size: 140%; font-weight: bold; } \ h1 { text-align: left; font-size: 140%; font-weight: bold; } \
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \ h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \ h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }' h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
oldest_article = 7.0 oldest_article = 7.0
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
publication_type = 'newspaper' publication_type = 'newspaper'
remove_tags_before = dict(name='h2') remove_tags_before = dict(name='h2')
remove_tags_after = [ remove_tags_after = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}) dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}), dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
dict(name='iframe'), dict(name='iframe'),
dict(name='img') dict(name='img')
] ]
feeds = [ feeds = [
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100') (u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
] ]
def get_masthead_url(self): def get_masthead_url(self):
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg' return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'

View File

@ -1,11 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341449(BasicNewsRecipe): class AdvancedUserRecipe1317341449(BasicNewsRecipe):
title = u'Diario La Republica' title = u'Diario La Republica'
__author__ = 'CAVALENCIA' __author__ = 'CAVALENCIA'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True
language = 'es_CO' language = 'es_CO'
feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')] feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]

View File

@ -1,98 +1,98 @@
#!/usr/bin/env python #!/usr/bin/env python
__author__ = 'Darko Spasovski' __author__ = 'Darko Spasovski'
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>' __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
''' '''
dnevnik.com.mk dnevnik.com.mk
''' '''
import re import re
import datetime import datetime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Dnevnik(BasicNewsRecipe): class Dnevnik(BasicNewsRecipe):
INDEX = 'http://www.dnevnik.com.mk' INDEX = 'http://www.dnevnik.com.mk'
__author__ = 'Darko Spasovski' __author__ = 'Darko Spasovski'
title = 'Dnevnik - mk' title = 'Dnevnik - mk'
description = 'Daily Macedonian newspaper' description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif' masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif'
language = 'mk' language = 'mk'
publication_type = 'newspaper' publication_type = 'newspaper'
category = 'news, Macedonia' category = 'news, Macedonia'
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ [
## Remove anything before the start of the article. ## Remove anything before the start of the article.
(r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'), (r'<body.*?<\?xml version=\"1.0\"\?><!--Article start-->', lambda match: '<body>'),
## Remove anything after the end of the article. ## Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match : '</body>'), (r'<!--Article end.*?</body>', lambda match : '</body>'),
] ]
] ]
extra_css = """ extra_css = """
body{font-family: Arial,Helvetica,sans-serif} body{font-family: Arial,Helvetica,sans-serif}
.WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none} .WB_DNEVNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
""" """
conversion_options = { conversion_options = {
'comment' : description, 'comment' : description,
'tags' : category, 'tags' : category,
'language' : language, 'language' : language,
'linearize_tables' : True 'linearize_tables' : True
} }
def parse_index(self): def parse_index(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y') datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum) soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
feeds = [] feeds = []
for section in soup.findAll('td', attrs={'class':'WB_DNEVNIK_ArhivaFormTitle'}): for section in soup.findAll('td', attrs={'class':'WB_DNEVNIK_ArhivaFormTitle'}):
sectionTitle = section.contents[0].string sectionTitle = section.contents[0].string
if sectionTitle.lower().startswith('online'): if sectionTitle.lower().startswith('online'):
# Skip online articles # Skip online articles
continue continue
containerTable = section.findPrevious(name='table').findNextSibling(name='table') containerTable = section.findPrevious(name='table').findNextSibling(name='table')
if containerTable==None: if containerTable==None:
print 'No container table found - page layout may have been changed.' print 'No container table found - page layout may have been changed.'
continue continue
articles = [] articles = []
for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}): for article in containerTable.findAll('a', attrs={'class': 'WB_DNEVNIK_ArhivaFormText'}):
title = self.tag_to_string(article, use_alt=True).strip() title = self.tag_to_string(article, use_alt=True).strip()
articles.append({'title': title, 'url':'http://www.dnevnik.com.mk/' + article['href'], 'description':'', 'date':''}) articles.append({'title': title, 'url':'http://www.dnevnik.com.mk/' + article['href'], 'description':'', 'date':''})
if articles: if articles:
feeds.append((sectionTitle, articles)) feeds.append((sectionTitle, articles))
return sorted(feeds, key=lambda section: self.get_weight(section)) return sorted(feeds, key=lambda section: self.get_weight(section))
def get_weight(self, section): def get_weight(self, section):
""" """
Returns 'weight' of a section. Returns 'weight' of a section.
Used for sorting the sections based on their 'natural' order in the printed edition. Used for sorting the sections based on their 'natural' order in the printed edition.
""" """
natural_order = { u'во фокусот': 1, u'актуелно': 2, u'економија': 3, natural_order = { u'во фокусот': 1, u'актуелно': 2, u'економија': 3,
u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7, u'отворена': 4, u'свет': 5, u'интервју': 6, u'џубокс': 7,
u'репортажа': 8, u'наш туризам': 9, u'живот': 10, u'репортажа': 8, u'наш туризам': 9, u'живот': 10,
u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13 } u'автомобилизам': 11, u'спорт': 12, u'омнибус': 13 }
if section[0].string.lower() in natural_order: if section[0].string.lower() in natural_order:
return natural_order[section[0].string.lower()] return natural_order[section[0].string.lower()]
else: else:
return 999 # section names not on the list go to the bottom return 999 # section names not on the list go to the bottom
def get_cover_url(self): def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d.%m.%Y') datum = datetime.datetime.today().strftime('%d.%m.%Y')
soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum) soup = self.index_to_soup(self.INDEX + '/default.asp?section=arhiva&arhDatum=' + datum)
anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'}) anchor = soup.find('a', attrs={'class': 'WB_DNEVNIK_MoreLink'})
if anchor != None: if anchor != None:
raw = browser().open_novisit(self.INDEX + '/' + anchor['href']).read() raw = browser().open_novisit(self.INDEX + '/' + anchor['href']).read()
cover_soup = BeautifulSoup(raw) cover_soup = BeautifulSoup(raw)
url = cover_soup.find('div', attrs={'class':'WB_DNEVNIK_Datum2'}).findNext('img')['src'] url = cover_soup.find('div', attrs={'class':'WB_DNEVNIK_Datum2'}).findNext('img')['src']
return self.INDEX + '/' + url return self.INDEX + '/' + url
return '' return ''

View File

@ -1,56 +1,56 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311790237(BasicNewsRecipe): class AdvancedUserRecipe1311790237(BasicNewsRecipe):
title = u'Periódico El Colombiano' title = u'Periódico El Colombiano'
__author__ = 'BIGO-CAVA' __author__ = 'BIGO-CAVA'
language = 'es_CO' language = 'es_CO'
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif' cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
remove_tags_before = dict(id='contenidoArt') remove_tags_before = dict(id='contenidoArt')
remove_tags_after = dict(id='enviaTips') remove_tags_after = dict(id='enviaTips')
remove_tags_after = dict(id='zonaPata') remove_tags_after = dict(id='zonaPata')
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif' masthead_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = """
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
""" """
feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'), feeds = [(u'Portada', u'http://www.elcolombiano.com/rss/portada.xml'),
(u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'), (u'Antioquia', u'http://www.elcolombiano.com/rss/Antioquia.xml'),
(u'Colombia', u'http://www.elcolombiano.com/rss/Colombia.xml'), (u'Colombia', u'http://www.elcolombiano.com/rss/Colombia.xml'),
(u'Economia', u'http://www.elcolombiano.com/rss/Economia.xml'), (u'Economia', u'http://www.elcolombiano.com/rss/Economia.xml'),
(u'Internacional', u'http://www.elcolombiano.com/rss/Internacional.xml'), (u'Internacional', u'http://www.elcolombiano.com/rss/Internacional.xml'),
(u'Politica', u'http://www.elcolombiano.com/rss/Politica.xml'), (u'Politica', u'http://www.elcolombiano.com/rss/Politica.xml'),
(u'Cultura', u'http://www.elcolombiano.com/rss/Cultura.xml'), (u'Cultura', u'http://www.elcolombiano.com/rss/Cultura.xml'),
(u'Entretenimiento', u'http://www.elcolombiano.com/rss/Farandula.xml'), (u'Entretenimiento', u'http://www.elcolombiano.com/rss/Farandula.xml'),
(u'Tecnologia', u'http://www.elcolombiano.com/rss/Tecnologia.xml'), (u'Tecnologia', u'http://www.elcolombiano.com/rss/Tecnologia.xml'),
(u'Television', u'http://www.elcolombiano.com/rss/Television.xml'), (u'Television', u'http://www.elcolombiano.com/rss/Television.xml'),
(u'Vida y Sociedad', u'http://www.elcolombiano.com/rss/Vida.xml'), (u'Vida y Sociedad', u'http://www.elcolombiano.com/rss/Vida.xml'),
(u'Turismo', u'http://www.elcolombiano.com/rss/Turismo.xm'), (u'Turismo', u'http://www.elcolombiano.com/rss/Turismo.xm'),
(u'Salud', u'http://www.elcolombiano.com/rss/Salud.xml'), (u'Salud', u'http://www.elcolombiano.com/rss/Salud.xml'),
(u'Ciencia', u'http://www.elcolombiano.com/rss/Ciencia.xml')] (u'Ciencia', u'http://www.elcolombiano.com/rss/Ciencia.xml')]
remove_tags = [dict(name='div', attrs={'class':'objetosRelacionados'}), remove_tags = [dict(name='div', attrs={'class':'objetosRelacionados'}),
dict(name='div', attrs={'class':'notasRelacionadas contenedor'}), dict(name='div', attrs={'class':'notasRelacionadas contenedor'}),
dict(name='div', attrs={'class':'comentarios'}), dict(name='div', attrs={'class':'comentarios'}),
dict(name='div', attrs={'class':'mapaDelSitio'}), dict(name='div', attrs={'class':'mapaDelSitio'}),
dict(name='div', attrs={'class':'creditos'}), dict(name='div', attrs={'class':'creditos'}),
dict(name='div', attrs={'class':'votos'}), dict(name='div', attrs={'class':'votos'}),
dict(name='div', attrs={'class':'divopt2'}), dict(name='div', attrs={'class':'divopt2'}),
dict(name='div', attrs={'class':'comentarios'}), dict(name='div', attrs={'class':'comentarios'}),
dict(name='div', attrs={'class':'pestanasLateral'}), dict(name='div', attrs={'class':'pestanasLateral'}),
dict(name='div', attrs={'class':'resumenSeccion'}), dict(name='div', attrs={'class':'resumenSeccion'}),
dict(name='div', attrs={'class':'zonaComercial'}), dict(name='div', attrs={'class':'zonaComercial'}),
dict(name='div', attrs={'id':'zonaPata'})] dict(name='div', attrs={'id':'zonaPata'})]

View File

@ -1,54 +1,54 @@
# coding=utf-8 # coding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElEspectador(BasicNewsRecipe): class ColombiaElEspectador(BasicNewsRecipe):
title = u'Periódico el Espectador' title = u'Periódico el Espectador'
__author__ = 'BIGO-CAVA' __author__ = 'BIGO-CAVA'
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif' cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
#remove_tags_before = dict(id='fb-root') #remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='content') remove_tags_before = dict(id='content')
remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})] remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})]
language = 'es_CO' language = 'es_CO'
#keep_only_tags = [dict(name='div', id='content')] #keep_only_tags = [dict(name='div', id='content')]
remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}), remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}),
dict(name='div', attrs={'class':'relpauta'}), dict(name='div', attrs={'class':'relpauta'}),
dict(name='div', attrs={'class':'recursosrelacionados'}), dict(name='div', attrs={'class':'recursosrelacionados'}),
dict(name='div', attrs={'class':'nav_negocios'})] dict(name='div', attrs={'class':'nav_negocios'})]
# dict(name='div', attrs={'class':'tags_playerrecurso'}), # dict(name='div', attrs={'class':'tags_playerrecurso'}),
# dict(name='div', attrs={'class':'ico-mail2'}), # dict(name='div', attrs={'class':'ico-mail2'}),
# dict(name='div', attrs={'id':'caja-instapaper'}), # dict(name='div', attrs={'id':'caja-instapaper'}),
# dict(name='div', attrs={'class':'modulo herramientas'})] # dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif' masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = """
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
""" """
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'), feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'), (u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'), (u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
(u'Economía', u'http://www.elespectador.com/economia/feed'), (u'Economía', u'http://www.elespectador.com/economia/feed'),
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'), (u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'), (u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'), (u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'), (u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'), (u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'), (u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'), (u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
(u'Deportes', u'http://www.elespectador.com/deportes/feed'), (u'Deportes', u'http://www.elespectador.com/deportes/feed'),
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'), (u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'), (u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
(u'Opinión', u'http://www.elespectador.com/opinion/feed'), (u'Opinión', u'http://www.elespectador.com/opinion/feed'),
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')] (u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]

View File

@ -1,40 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313609361(BasicNewsRecipe): class AdvancedUserRecipe1313609361(BasicNewsRecipe):
news = True news = True
title = u'El Mostrador' title = u'El Mostrador'
__author__ = 'Alex Mitrani' __author__ = 'Alex Mitrani'
description = u'Chilean online newspaper' description = u'Chilean online newspaper'
publisher = u'La Plaza S.A.' publisher = u'La Plaza S.A.'
category = 'news, rss' category = 'news, rss'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
summary_length = 1000 summary_length = 1000
language = 'es_CL' language = 'es_CL'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg' masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg'
remove_tags_before = dict(name='div', attrs={'class':'news-heading cf'}) remove_tags_before = dict(name='div', attrs={'class':'news-heading cf'})
remove_tags_after = dict(name='div', attrs={'class':'footer-actions cf'}) remove_tags_after = dict(name='div', attrs={'class':'footer-actions cf'})
remove_tags = [dict(name='div', attrs={'class':'footer-actions cb cf'}) remove_tags = [dict(name='div', attrs={'class':'footer-actions cb cf'})
,dict(name='div', attrs={'class':'news-aside fl'}) ,dict(name='div', attrs={'class':'news-aside fl'})
,dict(name='div', attrs={'class':'footer-actions cf'}) ,dict(name='div', attrs={'class':'footer-actions cf'})
,dict(name='div', attrs={'class':'user-bar','id':'top'}) ,dict(name='div', attrs={'class':'user-bar','id':'top'})
,dict(name='div', attrs={'class':'indicators'}) ,dict(name='div', attrs={'class':'indicators'})
,dict(name='div', attrs={'id':'header'}) ,dict(name='div', attrs={'id':'header'})
] ]
feeds = [(u'Temas Destacados' feeds = [(u'Temas Destacados'
, u'http://www.elmostrador.cl/destacado/feed/') , u'http://www.elmostrador.cl/destacado/feed/')
, (u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/') , (u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/')
, (u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/') , (u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/')
, (u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/') , (u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/')
, (u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/') , (u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/')
, (u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/') , (u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/')
, (u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/') , (u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/')
, (u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/') , (u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/')
] ]

View File

@ -1,52 +1,52 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElTiempo02(BasicNewsRecipe): class ColombiaElTiempo02(BasicNewsRecipe):
title = u'Periódico el Tiempo' title = u'Periódico el Tiempo'
__author__ = 'BIGO-CAVA' __author__ = 'BIGO-CAVA'
language = 'es_CO' language = 'es_CO'
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png' cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
#remove_tags_before = dict(id='fb-root') #remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='contenidoArt') remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})] remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
keep_only_tags = [dict(name='div', id='contenidoArt')] keep_only_tags = [dict(name='div', id='contenidoArt')]
remove_tags = [dict(name='div', attrs={'class':'social-media'}), remove_tags = [dict(name='div', attrs={'class':'social-media'}),
dict(name='div', attrs={'class':'recomend-art'}), dict(name='div', attrs={'class':'recomend-art'}),
dict(name='div', attrs={'class':'caja-facebook'}), dict(name='div', attrs={'class':'caja-facebook'}),
dict(name='div', attrs={'class':'caja-twitter'}), dict(name='div', attrs={'class':'caja-twitter'}),
dict(name='div', attrs={'class':'caja-buzz'}), dict(name='div', attrs={'class':'caja-buzz'}),
dict(name='div', attrs={'class':'ico-mail2'}), dict(name='div', attrs={'class':'ico-mail2'}),
dict(name='div', attrs={'id':'caja-instapaper'}), dict(name='div', attrs={'id':'caja-instapaper'}),
dict(name='div', attrs={'class':'modulo herramientas'})] dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png' masthead_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = """
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
""" """
feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'), feeds = [(u'Colombia', u'http://www.eltiempo.com/colombia/rss.xml'),
(u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'), (u'Medellin', u'http://www.eltiempo.com/colombia/medellin/rss.xml'),
(u'Economia', u'http://www.eltiempo.com/economia/rss.xml'), (u'Economia', u'http://www.eltiempo.com/economia/rss.xml'),
(u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'), (u'Deportes', u'http://www.eltiempo.com/deportes/rss.xml'),
(u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'), (u'Mundo', u'http://www.eltiempo.com/mundo/rss.xml'),
(u'Gente', u'http://www.eltiempo.com/gente/rss.xml'), (u'Gente', u'http://www.eltiempo.com/gente/rss.xml'),
(u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'), (u'Vida de Hoy', u'http://www.eltiempo.com/vida-de-hoy/rss.xml'),
(u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'), (u'EEUU', u'http://www.eltiempo.com/mundo/estados-unidos/rss.xml'),
(u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'), (u'LatinoAmerica', u'http://www.eltiempo.com/mundo/latinoamerica/rss.xml'),
(u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'), (u'Europa', u'http://www.eltiempo.com/mundo/europa/rss.xml'),
(u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'), (u'Medio Oriente', u'http://www.eltiempo.com/mundo/medio-oriente/rss.xml'),
(u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'), (u'Vive in Medellin', u'http://medellin.vive.in/medellin/rss.xml'),
(u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'), (u'Don Juan', u'http://www.revistadonjuan.com/feedrss/'),
(u'Alo', u'http://www.eltiempo.com/alo/rss.xml')] (u'Alo', u'http://www.eltiempo.com/alo/rss.xml')]

View File

@ -1,129 +1,129 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError from urllib2 import Request, urlopen, URLError
class Estadao(BasicNewsRecipe): class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br' LANGUAGE = 'pt_br'
language = 'pt' language = 'pt'
LANGHTM = 'pt-br' LANGHTM = 'pt-br'
ENCODING = 'utf' ENCODING = 'utf'
ENCHTM = 'utf-8' ENCHTM = 'utf-8'
directionhtm = 'ltr' directionhtm = 'ltr'
requires_version = (0,7,47) requires_version = (0,7,47)
news = True news = True
title = u'Estad\xe3o' title = u'Estad\xe3o'
__author__ = 'Euler Alves' __author__ = 'Euler Alves'
description = u'Brazilian news from Estad\xe3o' description = u'Brazilian news from Estad\xe3o'
publisher = u'Estad\xe3o' publisher = u'Estad\xe3o'
category = 'news, rss' category = 'news, rss'
oldest_article = 4 oldest_article = 4
max_articles_per_feed = 100 max_articles_per_feed = 100
summary_length = 1000 summary_length = 1000
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]' timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()-timedelta(days=2) hoje = datetime.now()-timedelta(days=2)
pubdate = hoje.strftime('%a, %d %b') pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<10: if hoje.hour<10:
hoje = hoje-timedelta(days=1) hoje = hoje-timedelta(days=1)
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg' CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
SCREENSHOT = 'http://estadao.com.br/' SCREENSHOT = 'http://estadao.com.br/'
cover_margins = (0,0,'white') cover_margins = (0,0,'white')
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png' masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})] keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
remove_tags = [ remove_tags = [
dict(name='div', dict(name='div',
attrs={'id':[ attrs={'id':[
'bb-md-noticia-tabs' 'bb-md-noticia-tabs'
]}) ]})
,dict(name='div', ,dict(name='div',
attrs={'class':[ attrs={'class':[
'tags' 'tags'
,'discussion' ,'discussion'
,'bb-gg adsense_container' ,'bb-gg adsense_container'
]}) ]})
,dict(name='a') ,dict(name='a')
,dict(name='iframe') ,dict(name='iframe')
,dict(name='link') ,dict(name='link')
,dict(name='script') ,dict(name='script')
] ]
feeds = [ feeds = [
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml') (u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml') ,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml') ,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml') ,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/') ,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml') ,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml') ,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml') ,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml') ,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
] ]
conversion_options = { conversion_options = {
'title' : title 'title' : title
,'comments' : description ,'comments' : description
,'publisher' : publisher ,'publisher' : publisher
,'tags' : category ,'tags' : category
,'language' : LANGUAGE ,'language' : LANGUAGE
,'linearize_tables': True ,'linearize_tables': True
} }
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}): if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0) soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}): if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1) soup.head.insert(0,meta1)
return soup return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path #process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src'] iurl = tag['src']
img = Image() img = Image()
img.open(iurl) img.open(iurl)
width, height = img.size width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0: if img < 0:
raise RuntimeError('Out of memory') raise RuntimeError('Out of memory')
pw = PixelWand() pw = PixelWand()
if( width > height and width > 590) : if( width > height and width > 590) :
print 'Rotate image' print 'Rotate image'
img.rotate(pw, -90) img.rotate(pw, -90)
img.save(iurl) img.save(iurl)
return soup return soup
def get_cover_url(self): def get_cover_url(self):
if self.THUMBALIZR_API: if self.THUMBALIZR_API:
cover_url = self.CAPA cover_url = self.CAPA
pedido = Request(self.CAPA) pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)') pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM) pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT) pedido.add_header('Referer',self.SCREENSHOT)
try: try:
resposta = urlopen(pedido) resposta = urlopen(pedido)
soup = BeautifulSoup(resposta) soup = BeautifulSoup(resposta)
cover_item = soup.find('body') cover_item = soup.find('body')
if cover_item: if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url return cover_url
except URLError: except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url return cover_url

View File

@ -1,165 +1,165 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe): class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br' LANGUAGE = 'pt_br'
language = 'pt_BR' language = 'pt_BR'
LANGHTM = 'pt-br' LANGHTM = 'pt-br'
ENCODING = 'cp1252' ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1' ENCHTM = 'iso-8859-1'
directionhtm = 'ltr' directionhtm = 'ltr'
requires_version = (0,7,47) requires_version = (0,7,47)
news = True news = True
title = u'Folha de S\xE3o Paulo' title = u'Folha de S\xE3o Paulo'
__author__ = 'Euler Alves and Alex Mitrani' __author__ = 'Euler Alves and Alex Mitrani'
description = u'Brazilian news from Folha de S\xE3o Paulo' description = u'Brazilian news from Folha de S\xE3o Paulo'
publisher = u'Folha de S\xE3o Paulo' publisher = u'Folha de S\xE3o Paulo'
category = 'news, rss' category = 'news, rss'
oldest_article = 4 oldest_article = 4
max_articles_per_feed = 100 max_articles_per_feed = 100
summary_length = 1000 summary_length = 1000
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]' timefmt = ' [%d %b %Y (%a)]'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
,'--category', category ,'--category', category
,'--publisher', publisher ,'--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
hoje = datetime.now() hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b') pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<6: if hoje.hour<6:
hoje = hoje-timedelta(days=1) hoje = hoje-timedelta(days=1)
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg' CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
SCREENSHOT = 'http://www1.folha.uol.com.br/' SCREENSHOT = 'http://www1.folha.uol.com.br/'
cover_margins = (0,0,'white') cover_margins = (0,0,'white')
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})] keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
remove_tags = [ remove_tags = [
dict(name='div', dict(name='div',
attrs={'id':[ attrs={'id':[
'articleButton' 'articleButton'
,'bookmarklets' ,'bookmarklets'
,'ad-180x150-1' ,'ad-180x150-1'
,'contextualAdsArticle' ,'contextualAdsArticle'
,'articleEnd' ,'articleEnd'
,'articleComments' ,'articleComments'
]}) ]})
,dict(name='div', ,dict(name='div',
attrs={'class':[ attrs={'class':[
'openBox adslibraryArticle' 'openBox adslibraryArticle'
,'toolbar' ,'toolbar'
]}) ]})
,dict(name='a') ,dict(name='a')
,dict(name='iframe') ,dict(name='iframe')
,dict(name='link') ,dict(name='link')
,dict(name='script') ,dict(name='script')
,dict(name='li') ,dict(name='li')
] ]
remove_tags_after = dict(name='div',attrs={'id':'articleEnd'}) remove_tags_after = dict(name='div',attrs={'id':'articleEnd'})
feeds = [ feeds = [
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml') (u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
,(u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml') ,(u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml')
,(u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml') ,(u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml')
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml') ,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml') ,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
,(u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml') ,(u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml')
,(u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml') ,(u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml')
,(u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml') ,(u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml')
,(u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml') ,(u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml')
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml') ,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml') ,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml') ,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml') ,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml') ,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
,(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml') ,(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml')
,(u'Zapping', u'http://feeds.folha.uol.com.br/colunas/zapping/rss091.xml') ,(u'Zapping', u'http://feeds.folha.uol.com.br/colunas/zapping/rss091.xml')
,(u'Cida Santos', u'http://feeds.folha.uol.com.br/colunas/cidasantos/rss091.xml') ,(u'Cida Santos', u'http://feeds.folha.uol.com.br/colunas/cidasantos/rss091.xml')
,(u'Clóvis Rossi', u'http://feeds.folha.uol.com.br/colunas/clovisrossi/rss091.xml') ,(u'Clóvis Rossi', u'http://feeds.folha.uol.com.br/colunas/clovisrossi/rss091.xml')
,(u'Eliane Cantanhêde', u'http://feeds.folha.uol.com.br/colunas/elianecantanhede/rss091.xml') ,(u'Eliane Cantanhêde', u'http://feeds.folha.uol.com.br/colunas/elianecantanhede/rss091.xml')
,(u'Fernando Canzian', u'http://feeds.folha.uol.com.br/colunas/fernandocanzian/rss091.xml') ,(u'Fernando Canzian', u'http://feeds.folha.uol.com.br/colunas/fernandocanzian/rss091.xml')
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/colunas/gilbertodimenstein/rss091.xml') ,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/colunas/gilbertodimenstein/rss091.xml')
,(u'Hélio Schwartsman', u'http://feeds.folha.uol.com.br/colunas/helioschwartsman/rss091.xml') ,(u'Hélio Schwartsman', u'http://feeds.folha.uol.com.br/colunas/helioschwartsman/rss091.xml')
,(u'Humberto Luiz Peron', u'http://feeds.folha.uol.com.br/colunas/futebolnarede/rss091.xml') ,(u'Humberto Luiz Peron', u'http://feeds.folha.uol.com.br/colunas/futebolnarede/rss091.xml')
,(u'João Pereira Coutinho', u'http://feeds.folha.uol.com.br/colunas/joaopereiracoutinho/rss091.xml') ,(u'João Pereira Coutinho', u'http://feeds.folha.uol.com.br/colunas/joaopereiracoutinho/rss091.xml')
,(u'José Antonio Ramalho', u'http://feeds.folha.uol.com.br/colunas/canalaberto/rss091.xml') ,(u'José Antonio Ramalho', u'http://feeds.folha.uol.com.br/colunas/canalaberto/rss091.xml')
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/colunas/kennedyalencar/rss091.xml') ,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/colunas/kennedyalencar/rss091.xml')
,(u'Luiz Caversan', u'http://feeds.folha.uol.com.br/colunas/luizcaversan/rss091.xml') ,(u'Luiz Caversan', u'http://feeds.folha.uol.com.br/colunas/luizcaversan/rss091.xml')
,(u'Luiz Rivoiro', u'http://feeds.folha.uol.com.br/colunas/paiepai/rss091.xml') ,(u'Luiz Rivoiro', u'http://feeds.folha.uol.com.br/colunas/paiepai/rss091.xml')
,(u'Marcelo Leite', u'http://feeds.folha.uol.com.br/colunas/marceloleite/rss091.xml') ,(u'Marcelo Leite', u'http://feeds.folha.uol.com.br/colunas/marceloleite/rss091.xml')
,(u'Sérgio Malbergier', u'http://feeds.folha.uol.com.br/colunas/sergiomalbergier/rss091.xml') ,(u'Sérgio Malbergier', u'http://feeds.folha.uol.com.br/colunas/sergiomalbergier/rss091.xml')
,(u'Sylvia Colombo', u'http://feeds.folha.uol.com.br/colunas/sylviacolombo/rss091.xml') ,(u'Sylvia Colombo', u'http://feeds.folha.uol.com.br/colunas/sylviacolombo/rss091.xml')
,(u'Valdo Cruz', u'http://feeds.folha.uol.com.br/colunas/valdocruz/rss091.xml') ,(u'Valdo Cruz', u'http://feeds.folha.uol.com.br/colunas/valdocruz/rss091.xml')
] ]
conversion_options = { conversion_options = {
'title' : title 'title' : title
,'comments' : description ,'comments' : description
,'publisher' : publisher ,'publisher' : publisher
,'tags' : category ,'tags' : category
,'language' : LANGUAGE ,'language' : LANGUAGE
,'linearize_tables': True ,'linearize_tables': True
} }
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}): if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0) soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}): if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1) soup.head.insert(0,meta1)
return soup return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path #process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src'] iurl = tag['src']
img = Image() img = Image()
img.open(iurl) img.open(iurl)
width, height = img.size width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0: if img < 0:
raise RuntimeError('Out of memory') raise RuntimeError('Out of memory')
pw = PixelWand() pw = PixelWand()
if( width > height and width > 590) : if( width > height and width > 590) :
print 'Rotate image' print 'Rotate image'
img.rotate(pw, -90) img.rotate(pw, -90)
img.save(iurl) img.save(iurl)
return soup return soup
def get_cover_url(self): def get_cover_url(self):
cover_url = self.CAPA cover_url = self.CAPA
pedido = Request(self.CAPA) pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)') pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM) pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT) pedido.add_header('Referer',self.SCREENSHOT)
try: try:
resposta = urlopen(pedido) resposta = urlopen(pedido)
soup = BeautifulSoup(resposta) soup = BeautifulSoup(resposta)
cover_item = soup.find('body') cover_item = soup.find('body')
if cover_item: if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url return cover_url
except URLError: except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url return cover_url

View File

@ -1,35 +1,35 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re import re
import string import string
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPlSzczecin(BasicNewsRecipe): class GazetaPlSzczecin(BasicNewsRecipe):
title = u'Gazeta Wyborcza Szczecin' title = u'Gazeta Wyborcza Szczecin'
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.' description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
__author__ = u'Michał Szkutnik' __author__ = u'Michał Szkutnik'
__license__ = u'GPL v3' __license__ = u'GPL v3'
language = 'pl' language = 'pl'
publisher = 'Agora S.A.' publisher = 'Agora S.A.'
category = 'news, szczecin' category = 'news, szczecin'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True
remove_tags = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}] remove_tags = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
cover_url = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif" cover_url = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')] feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
def get_article_url(self, article): def get_article_url(self, article):
s = re.search("""/0L(szczecin.*)/story01.htm""", article.link) s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
s = s.group(1) s = s.group(1)
replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_"} replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_"}
for (a, b) in replacements.iteritems(): for (a, b) in replacements.iteritems():
s = string.replace(s, a, b) s = string.replace(s, a, b)
s = string.replace(s, "0A", "0") s = string.replace(s, "0A", "0")
return "http://"+s return "http://"+s
def print_version(self, url): def print_version(self, url):
s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url) s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
no1 = s.group(2) no1 = s.group(2)
no2 = s.group(3) no2 = s.group(3)
return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2) return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)

View File

@ -1,43 +1,43 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re import re
import string import string
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1322322819(BasicNewsRecipe): class AdvancedUserRecipe1322322819(BasicNewsRecipe):
title = u'GS24.pl (Głos Szczeciński)' title = u'GS24.pl (Głos Szczeciński)'
description = u'Internetowy serwis Głosu Szczecińskiego' description = u'Internetowy serwis Głosu Szczecińskiego'
__author__ = u'Michał Szkutnik' __author__ = u'Michał Szkutnik'
__license__ = u'GPL v3' __license__ = u'GPL v3'
language = 'pl' language = 'pl'
publisher = 'Media Regionalne sp. z o.o.' publisher = 'Media Regionalne sp. z o.o.'
category = 'news, szczecin' category = 'news, szczecin'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True
cover_url = "http://www.gs24.pl/images/top_logo.png" cover_url = "http://www.gs24.pl/images/top_logo.png"
feeds = [ feeds = [
# (u'Wszystko', u'http://www.gs24.pl/rss.xml'), # (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
(u'Szczecin', u'http://www.gs24.pl/szczecin.xml'), (u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
(u'Stargard', u'http://www.gs24.pl/stargard.xml'), (u'Stargard', u'http://www.gs24.pl/stargard.xml'),
(u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'), (u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
(u'Goleniów', u'http://www.gs24.pl/goleniow.xml'), (u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
(u'Gryfice', u'http://www.gs24.pl/gryfice.xml'), (u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
(u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'), (u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
(u'Police', u'http://www.gs24.pl/police.xml'), (u'Police', u'http://www.gs24.pl/police.xml'),
(u'Region', u'http://www.gs24.pl/region.xml'), (u'Region', u'http://www.gs24.pl/region.xml'),
(u'Sport', u'http://www.gs24.pl/sport.xml'), (u'Sport', u'http://www.gs24.pl/sport.xml'),
] ]
def get_article_url(self, article): def get_article_url(self, article):
s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link) s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
s = s.group(1) s = s.group(1)
replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_", "0D" : "?", "0F" : "="} replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_", "0D" : "?", "0F" : "="}
for (a, b) in replacements.iteritems(): for (a, b) in replacements.iteritems():
s = string.replace(s, a, b) s = string.replace(s, a, b)
s = string.replace(s, "0A", "0") s = string.replace(s, "0A", "0")
return "http://"+s return "http://"+s
def print_version(self, url): def print_version(self, url):
return url + "&Template=printpicart" return url + "&Template=printpicart"

View File

@ -1,47 +1,47 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>' __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
''' '''
Profile to download The Hankyoreh Profile to download The Hankyoreh
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh(BasicNewsRecipe): class Hankyoreh(BasicNewsRecipe):
title = u'Hankyoreh' title = u'Hankyoreh'
language = 'ko' language = 'ko'
description = u'The Hankyoreh News articles' description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo' __author__ = 'Seongkyoun Yoo'
oldest_article = 5 oldest_article = 5
recursions = 1 recursions = 1
max_articles_per_feed = 5 max_articles_per_feed = 5
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ keep_only_tags = [
dict(name='tr', attrs={'height':['60px']}), dict(name='tr', attrs={'height':['60px']}),
dict(id=['fontSzArea']) dict(id=['fontSzArea'])
] ]
remove_tags = [ remove_tags = [
dict(target='_blank'), dict(target='_blank'),
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}), dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
dict(name='iframe', attrs={'width':['590']}), dict(name='iframe', attrs={'width':['590']}),
] ]
remove_tags_after = [ remove_tags_after = [
dict(target='_top') dict(target='_top')
] ]
feeds = [ feeds = [
('All News','http://www.hani.co.kr/rss/'), ('All News','http://www.hani.co.kr/rss/'),
('Politics','http://www.hani.co.kr/rss/politics/'), ('Politics','http://www.hani.co.kr/rss/politics/'),
('Economy','http://www.hani.co.kr/rss/economy/'), ('Economy','http://www.hani.co.kr/rss/economy/'),
('Society','http://www.hani.co.kr/rss/society/'), ('Society','http://www.hani.co.kr/rss/society/'),
('International','http://www.hani.co.kr/rss/international/'), ('International','http://www.hani.co.kr/rss/international/'),
('Culture','http://www.hani.co.kr/rss/culture/'), ('Culture','http://www.hani.co.kr/rss/culture/'),
('Sports','http://www.hani.co.kr/rss/sports/'), ('Sports','http://www.hani.co.kr/rss/sports/'),
('Science','http://www.hani.co.kr/rss/science/'), ('Science','http://www.hani.co.kr/rss/science/'),
('Opinion','http://www.hani.co.kr/rss/opinion/'), ('Opinion','http://www.hani.co.kr/rss/opinion/'),
('Cartoon','http://www.hani.co.kr/rss/cartoon/'), ('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
('English Edition','http://www.hani.co.kr/rss/english_edition/'), ('English Edition','http://www.hani.co.kr/rss/english_edition/'),
('Specialsection','http://www.hani.co.kr/rss/specialsection/'), ('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
('Hanionly','http://www.hani.co.kr/rss/hanionly/'), ('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
('Hkronly','http://www.hani.co.kr/rss/hkronly/'), ('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
('Multihani','http://www.hani.co.kr/rss/multihani/'), ('Multihani','http://www.hani.co.kr/rss/multihani/'),
('Lead','http://www.hani.co.kr/rss/lead/'), ('Lead','http://www.hani.co.kr/rss/lead/'),
('Newsrank','http://www.hani.co.kr/rss/newsrank/'), ('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
] ]

View File

@ -1,25 +1,25 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>' __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
''' '''
Profile to download The Hankyoreh Profile to download The Hankyoreh
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh21(BasicNewsRecipe): class Hankyoreh21(BasicNewsRecipe):
title = u'Hankyoreh21' title = u'Hankyoreh21'
language = 'ko' language = 'ko'
description = u'The Hankyoreh21 Magazine articles' description = u'The Hankyoreh21 Magazine articles'
__author__ = 'Seongkyoun Yoo' __author__ = 'Seongkyoun Yoo'
oldest_article = 20 oldest_article = 20
recursions = 1 recursions = 1
max_articles_per_feed = 120 max_articles_per_feed = 120
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
keep_only_tags = [ keep_only_tags = [
dict(name='font', attrs={'class':'t18bk'}), dict(name='font', attrs={'class':'t18bk'}),
dict(id=['fontSzArea']) dict(id=['fontSzArea'])
] ]
feeds = [ feeds = [
('Hani21','http://h21.hani.co.kr/rss/ '), ('Hani21','http://h21.hani.co.kr/rss/ '),
] ]

View File

@ -1,47 +1,47 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HindustanTimes(BasicNewsRecipe): class HindustanTimes(BasicNewsRecipe):
title = u'Huffington Post UK' title = u'Huffington Post UK'
language = 'en_GB' language = 'en_GB'
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal'
oldest_article = 2 #days oldest_article = 2 #days
max_articles_per_feed = 25 max_articles_per_feed = 25
#encoding = 'cp1252' #encoding = 'cp1252'
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
auto_cleanup = True auto_cleanup = True
auto_cleanup_keep = '//div[@class="articleBody"]' auto_cleanup_keep = '//div[@class="articleBody"]'
feeds = [ feeds = [
('UK Politics', ('UK Politics',
'http://www.huffingtonpost.com/feeds/verticals/uk-politics/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-politics/news.xml'),
('UK Entertainment', ('UK Entertainment',
'http://www.huffingtonpost.com/feeds/verticals/uk-entertainment/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-entertainment/news.xml'),
('UK Style', ('UK Style',
'http://www.huffingtonpost.com/feeds/verticals/uk-style/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-style/news.xml'),
('UK Fashion:', ('UK Fashion:',
'http://www.huffingtonpost.com/feeds/verticals/uk-fashion/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-fashion/news.xml'),
('UK Universities:', ('UK Universities:',
'http://www.huffingtonpost.com/feeds/verticals/uk-universities-education/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-universities-education/news.xml'),
('UK World', ('UK World',
'http://www.huffingtonpost.com/feeds/verticals/uk-world/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-world/news.xml'),
('UK Lifestyle', ('UK Lifestyle',
'http://www.huffingtonpost.com/feeds/verticals/uk-lifestyle/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-lifestyle/news.xml'),
('UK Comedy', ('UK Comedy',
'http://www.huffingtonpost.com/feeds/verticals/uk-comedy/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-comedy/news.xml'),
('UK Celebrity', ('UK Celebrity',
'http://www.huffingtonpost.com/feeds/verticals/uk-celebrity/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-celebrity/news.xml'),
('UK Culture', ('UK Culture',
'http://www.huffingtonpost.com/feeds/verticals/uk-culture/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-culture/news.xml'),
('UK News', ('UK News',
'http://www.huffingtonpost.com/feeds/verticals/uk/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk/news.xml'),
('UK Tech', ('UK Tech',
'http://www.huffingtonpost.com/feeds/verticals/uk-tech/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-tech/news.xml'),
('UK Sport', ('UK Sport',
'http://www.huffingtonpost.com/feeds/verticals/uk-sport/news.xml'), 'http://www.huffingtonpost.com/feeds/verticals/uk-sport/news.xml'),
] ]
def get_article_url(self, entry): def get_article_url(self, entry):
if entry.links: if entry.links:
return entry.links[0]['href'] return entry.links[0]['href']
return BasicNewsRecipe.get_article_url(self, entry) return BasicNewsRecipe.get_article_url(self, entry)

View File

@ -1,110 +1,110 @@
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/' MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
class IlManifesto(BasicNewsRecipe): class IlManifesto(BasicNewsRecipe):
title = 'Il Manifesto' title = 'Il Manifesto'
__author__ = 'Giacomo Lacava' __author__ = 'Giacomo Lacava'
description = 'quotidiano comunista - ultima edizione html disponibile' description = 'quotidiano comunista - ultima edizione html disponibile'
publication_type = 'newspaper' publication_type = 'newspaper'
publisher = 'il manifesto coop. editrice a r.l.' publisher = 'il manifesto coop. editrice a r.l.'
language = 'it' language = 'it'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
delay = 1 delay = 1
no_stylesheets = True no_stylesheets = True
simultaneous_downloads = 5 simultaneous_downloads = 5
timeout = 30 timeout = 30
auto_cleanup = True auto_cleanup = True
remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})] remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'}) remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
remove_tags_after = dict(id='myPrintArea') remove_tags_after = dict(id='myPrintArea')
manifesto_index = None manifesto_index = None
manifesto_datestr = None manifesto_datestr = None
def _set_manifesto_index(self): def _set_manifesto_index(self):
if self.manifesto_index == None: if self.manifesto_index == None:
startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/' startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/'
startSoup = self.index_to_soup(startUrl) startSoup = self.index_to_soup(startUrl)
lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href'] lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
del(startSoup) del(startSoup)
self.manifesto_index = MANIFESTO_BASEURL + lastEdition self.manifesto_index = MANIFESTO_BASEURL + lastEdition
urlsplit = lastEdition.split('/') urlsplit = lastEdition.split('/')
self.manifesto_datestr = urlsplit[-1] self.manifesto_datestr = urlsplit[-1]
if urlsplit[-1] == '': if urlsplit[-1] == '':
self.manifesto_datestr = urlsplit[-2] self.manifesto_datestr = urlsplit[-2]
def get_cover_url(self): def get_cover_url(self):
self._set_manifesto_index() self._set_manifesto_index()
url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
return url return url
def parse_index(self): def parse_index(self):
self._set_manifesto_index() self._set_manifesto_index()
soup = self.index_to_soup(self.manifesto_index) soup = self.index_to_soup(self.manifesto_index)
feedLinks = soup.find('div',id='accordion_inedicola').findAll('a') feedLinks = soup.find('div',id='accordion_inedicola').findAll('a')
result = [] result = []
for feed in feedLinks: for feed in feedLinks:
articles = [] articles = []
feedName = feed.find('h2').string feedName = feed.find('h2').string
feedUrl = MANIFESTO_BASEURL + feed['href'] feedUrl = MANIFESTO_BASEURL + feed['href']
feedSoup = self.index_to_soup(feedUrl) feedSoup = self.index_to_soup(feedUrl)
indexRoot = feedSoup.find('div',attrs={'class':'column1'}) indexRoot = feedSoup.find('div',attrs={'class':'column1'})
for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}): for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
artLink = div.find('a') artLink = div.find('a')
if artLink is None: continue # empty div if artLink is None: continue # empty div
title = artLink.string title = artLink.string
url = MANIFESTO_BASEURL + artLink['href'] url = MANIFESTO_BASEURL + artLink['href']
description = '' description = ''
descNode = div.find('div',attrs={'class':'text_12'}) descNode = div.find('div',attrs={'class':'text_12'})
if descNode is not None: if descNode is not None:
description = descNode.string description = descNode.string
author = '' author = ''
authNode = div.find('div',attrs={'class':'firma'}) authNode = div.find('div',attrs={'class':'firma'})
if authNode is not None: if authNode is not None:
author = authNode.string author = authNode.string
articleText = '' articleText = ''
article = { article = {
'title':title, 'title':title,
'url':url, 'url':url,
'date': strftime('%d %B %Y'), 'date': strftime('%d %B %Y'),
'description': description, 'description': description,
'content': articleText, 'content': articleText,
'author': author 'author': author
} }
articles.append(article) articles.append(article)
result.append((feedName,articles)) result.append((feedName,articles))
return result return result
def extract_readable_article(self, html, url): def extract_readable_article(self, html, url):
bs = BeautifulSoup(html) bs = BeautifulSoup(html)
col1 = bs.find('div',attrs={'class':'column1'}) col1 = bs.find('div',attrs={'class':'column1'})
content = col1.find('div',attrs={'class':'bodytext'}) content = col1.find('div',attrs={'class':'bodytext'})
title = bs.find(id='titolo_articolo').string title = bs.find(id='titolo_articolo').string
author = col1.find('span',attrs={'class':'firma'}) author = col1.find('span',attrs={'class':'firma'})
subtitle = '' subtitle = ''
subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'}) subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
if subNode is not None: if subNode is not None:
subtitle = subNode subtitle = subNode
summary = '' summary = ''
sommNode = bs.find('div',attrs={'class':'sommario'}) sommNode = bs.find('div',attrs={'class':'sommario'})
if sommNode is not None: if sommNode is not None:
summary = sommNode summary = sommNode
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>" template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
del(bs) del(bs)
return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content) return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)

View File

@ -1,34 +1,34 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class JakartaGlobe(BasicNewsRecipe): class JakartaGlobe(BasicNewsRecipe):
title = u'Jakarta Globe' title = u'Jakarta Globe'
oldest_article = 3 oldest_article = 3
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [ feeds = [
(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'), (u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'), (u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'), (u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'), (u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'), (u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'), (u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
] ]
__author__ = 'rty' __author__ = 'rty'
pubisher = 'JakartaGlobe.com' pubisher = 'JakartaGlobe.com'
description = 'JakartaGlobe, Indonesia, Newspaper' description = 'JakartaGlobe, Indonesia, Newspaper'
category = 'News, Indonesia' category = 'News, Indonesia'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'en_ID' language = 'en_ID'
encoding = 'utf-8' encoding = 'utf-8'
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg' masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':'story'}), dict(name='div', attrs={'class':'story'}),
dict(name='span', attrs={'class':'headline'}), dict(name='span', attrs={'class':'headline'}),
dict(name='div', attrs={'class':'story'}), dict(name='div', attrs={'class':'story'}),
dict(name='p', attrs={'id':'bodytext'}) dict(name='p', attrs={'id':'bodytext'})
] ]

View File

@ -1,37 +1,37 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com' __copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
__author__ = 'Vadim Dyadkin' __author__ = 'Vadim Dyadkin'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Computerra(BasicNewsRecipe): class Computerra(BasicNewsRecipe):
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430' title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
oldest_article = 100 oldest_article = 100
__author__ = 'Vadim Dyadkin (edited by A. Chewi)' __author__ = 'Vadim Dyadkin (edited by A. Chewi)'
max_articles_per_feed = 50 max_articles_per_feed = 50
use_embedded_content = False use_embedded_content = False
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
conversion_options = {'linearize_tables' : True} conversion_options = {'linearize_tables' : True}
simultaneous_downloads = 5 simultaneous_downloads = 5
language = 'ru' language = 'ru'
description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии' description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),] keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),] feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}), dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
dict(name='ul', attrs={'class': "related_post"}), dict(name='ul', attrs={'class': "related_post"}),
dict(name='p', attrs={'class': 'info'}), dict(name='p', attrs={'class': 'info'}),
dict(name='a', attrs={'class': 'twitter-share-button'}), dict(name='a', attrs={'class': 'twitter-share-button'}),
dict(name='a', attrs={'type': 'button_count'}), dict(name='a', attrs={'type': 'button_count'}),
dict(name='h2', attrs={}) dict(name='h2', attrs={})
] ]
def print_version(self, url): def print_version(self, url):
return url + '?print=true' return url + '?print=true'

View File

@ -1,34 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.''' '''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) : class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter' __author__ = 'a.peter'
__copyright__ = 'a.peter' __copyright__ = 'a.peter'
__license__ = 'GPL v3' __license__ = 'GPL v3'
language = 'de' language = 'de'
description = 'Leipziger Volkszeitung Online RSS' description = 'Leipziger Volkszeitung Online RSS'
version = 1 version = 1
title = u'Leipziger Volkszeitung Online RSS' title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]' timefmt = ' [%d.%m.%Y]'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
publication_type = 'newspaper' publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})] keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})] remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'), feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'), (u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'), (u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'), (u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'), (u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'), (u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'), (u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'), (u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')] (u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self): def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif' return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'

View File

@ -1,100 +1,100 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime from datetime import datetime
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image, PixelWand from calibre.utils.magick import Image, PixelWand
class LifeHacker(BasicNewsRecipe): class LifeHacker(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'en' LANGUAGE = 'en'
LANGHTM = 'en' LANGHTM = 'en'
language = 'en' language = 'en'
ENCODING = 'utf' ENCODING = 'utf'
ENCHTM = 'utf-8' ENCHTM = 'utf-8'
requires_version = (0,7,47) requires_version = (0,7,47)
news = True news = True
title = u'LifeHacker' title = u'LifeHacker'
__author__ = 'Euler Alves' __author__ = 'Euler Alves'
description = u'Tips, tricks, and downloads for getting things done.' description = u'Tips, tricks, and downloads for getting things done.'
publisher = u'lifehacker.com' publisher = u'lifehacker.com'
author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani' author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani'
category = 'news, rss' category = 'news, rss'
oldest_article = 4 oldest_article = 4
max_articles_per_feed = 20 max_articles_per_feed = 20
summary_length = 1000 summary_length = 1000
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = True use_embedded_content = True
remove_empty_feeds = True remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]' timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now() hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b') pubdate = hoje.strftime('%a, %d %b')
cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90' cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90'
cover_margins = (0,0,'white') cover_margins = (0,0,'white')
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png' masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
remove_tags = [ remove_tags = [
{'class': 'feedflare'}, {'class': 'feedflare'},
dict(name='div', dict(name='div',
attrs={'class':[ attrs={'class':[
'ad_container' 'ad_container'
,'ad_300x250' ,'ad_300x250'
,'ad_interstitial' ,'ad_interstitial'
,'share-wrap' ,'share-wrap'
,'ad_300x600' ,'ad_300x600'
,'ad_perma-footer-adsense' ,'ad_perma-footer-adsense'
,'ad_perma-panorama' ,'ad_perma-panorama'
,'ad panorama' ,'ad panorama'
,'ad_container' ,'ad_container'
]}) ]})
,dict(name='div', ,dict(name='div',
attrs={'id':[ attrs={'id':[
'agegate_container' 'agegate_container'
,'agegate_container_rejected' ,'agegate_container_rejected'
,'sharemenu-wrap' ,'sharemenu-wrap'
]}) ]})
] ]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')] feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
conversion_options = { conversion_options = {
'title' : title 'title' : title
,'comments' : description ,'comments' : description
,'publisher' : publisher ,'publisher' : publisher
,'tags' : category ,'tags' : category
,'language' : LANGUAGE ,'language' : LANGUAGE
,'linearize_tables': True ,'linearize_tables': True
} }
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}): if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0) soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}): if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1) soup.head.insert(0,meta1)
return soup return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path #process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src'] iurl = tag['src']
img = Image() img = Image()
img.open(iurl) img.open(iurl)
width, height = img.size width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0: if img < 0:
raise RuntimeError('Out of memory') raise RuntimeError('Out of memory')
pw = PixelWand() pw = PixelWand()
if( width > height and width > 590) : if( width > height and width > 590) :
print 'Rotate image' print 'Rotate image'
img.rotate(pw, -90) img.rotate(pw, -90)
img.save(iurl) img.save(iurl)
return soup return soup

View File

@ -1,85 +1,85 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Matthew Briggs' __copyright__ = '2009, Matthew Briggs'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
''' '''
http://www.herald sun.com.au/ http://www.herald sun.com.au/
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe): class DailyTelegraph(BasicNewsRecipe):
title = u'Melbourne Herald Sun' title = u'Melbourne Herald Sun'
__author__ = u'Ray Hartley' __author__ = u'Ray Hartley'
description = (u'Victorian and National News' description = (u'Victorian and National News'
'. You will need to have a subscription to ' '. You will need to have a subscription to '
'http://www.heraldsun.com.au to get full articles.') 'http://www.heraldsun.com.au to get full articles.')
language = 'en_AU' language = 'en_AU'
oldest_article = 2 oldest_article = 2
needs_subscription = 'optional' needs_subscription = 'optional'
max_articles_per_feed = 30 max_articles_per_feed = 30
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'en_AU' language = 'en_AU'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://resources2.news.com.au/cs/heraldsun/images/header-and-footer/logo.gif' masthead_url = 'http://resources2.news.com.au/cs/heraldsun/images/header-and-footer/logo.gif'
extra_css = """ extra_css = """
body{font-family: Arial,Helvetica,sans-serif } body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
.caption{display: inline; font-size: x-small} .caption{display: inline; font-size: x-small}
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'language' : language , 'language' : language
} }
keep_only_tags = [dict(attrs={'id':'story'})] keep_only_tags = [dict(attrs={'id':'story'})]
remove_tags_before=dict(attrs={'class':'story-header'}) remove_tags_before=dict(attrs={'class':'story-header'})
remove_tags_after=dict(attrs={'class':'story-footer'}) remove_tags_after=dict(attrs={'class':'story-footer'})
remove_tags = [ remove_tags = [
dict(name=['meta','link','base','iframe','embed','object','media-metadata','media-reference','media-producer']) dict(name=['meta','link','base','iframe','embed','object','media-metadata','media-reference','media-producer'])
,dict(attrs={'class':['story-header-tools','story-sidebar','story-footer','story-summary-list']}) ,dict(attrs={'class':['story-header-tools','story-sidebar','story-footer','story-summary-list']})
] ]
remove_attributes=['lang'] remove_attributes=['lang']
feeds = [(u'Breaking News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_breakingnews_206.xml' ) feeds = [(u'Breaking News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_breakingnews_206.xml' )
,(u'Business' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_business_207.xml' ) ,(u'Business' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_business_207.xml' )
,(u'Entertainment' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_entertainment_208.xml' ) ,(u'Entertainment' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_entertainment_208.xml' )
,(u'Health Science' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_health_212.xml' ) ,(u'Health Science' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_health_212.xml' )
,(u'Music' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_music_449.xml' ) ,(u'Music' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_music_449.xml' )
,(u'National News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_national_209.xml' ) ,(u'National News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_national_209.xml' )
,(u'Sport News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_sport_213.xml' ) ,(u'Sport News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_sport_213.xml' )
,(u'AFL News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_205.xml' ) ,(u'AFL News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_205.xml' )
,(u'State News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_vic_214.xml' ) ,(u'State News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_vic_214.xml' )
,(u'Technology' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tech_215.xml' ) ,(u'Technology' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tech_215.xml' )
,(u'World News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_world_216.xml' ) ,(u'World News' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_world_216.xml' )
,(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/heraldsun_opinion_210.xml' ) ,(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/heraldsun_opinion_210.xml' )
,(u'Andrew Bolt' , u'http://blogs.news.com.au/heraldsun/andrewbolt/index.php/xml/rss_2.0/heraldsun/hs_andrewbolt/') ,(u'Andrew Bolt' , u'http://blogs.news.com.au/heraldsun/andrewbolt/index.php/xml/rss_2.0/heraldsun/hs_andrewbolt/')
,(u'Afl - St Kilda' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_stkilda_565.xml') ,(u'Afl - St Kilda' , u'http://feeds.news.com.au/public/rss/2.0/heraldsun_afl_stkilda_565.xml')
,(u'Terry McCrann' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tmccrann_224.xml' ) ,(u'Terry McCrann' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_tmccrann_224.xml' )
,(u'The Other side' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_otherside_211.xml')] ,(u'The Other side' ,u'http://feeds.news.com.au/public/rss/2.0/heraldsun_otherside_211.xml')]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
if self.username and self.password: if self.username and self.password:
br.open('http://www.heraldsun.com.au') br.open('http://www.heraldsun.com.au')
br.select_form(nr=0) br.select_form(nr=0)
br['username'] = self.username br['username'] = self.username
br['password'] = self.password br['password'] = self.password
raw = br.submit().read() raw = br.submit().read()
if '>log out' not in raw.lower(): if '>log out' not in raw.lower():
raise ValueError('Failed to log in to www.heralsun' raise ValueError('Failed to log in to www.heralsun'
' are your username and password correct?') ' are your username and password correct?')
return br return br
def get_article_url(self, article): def get_article_url(self, article):
return article.id return article.id

View File

@ -1,138 +1,138 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed from calibre.web.feeds import Feed
class Menorca(BasicNewsRecipe): class Menorca(BasicNewsRecipe):
title = 'Menorca' title = 'Menorca'
publisher = 'Editorial Menorca S.A. ' publisher = 'Editorial Menorca S.A. '
__author__ = 'M. Sintes' __author__ = 'M. Sintes'
description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a' description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a'
category = 'news, politics, economy, culture, Menorca, Spain ' category = 'news, politics, economy, culture, Menorca, Spain '
language = 'es' language = 'es'
enconding = 'cp1252' enconding = 'cp1252'
no_stylesheets = True no_stylesheets = True
oldest_article = 5 oldest_article = 5
max_articles_per_feed = 25 max_articles_per_feed = 25
feeds = [ (u'Principal',u'http://www.menorca.info/rss'), feeds = [ (u'Principal',u'http://www.menorca.info/rss'),
(u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'), (u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'),
(u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'), (u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'),
(u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'), (u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'),
(u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'), (u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'),
(u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'), (u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'),
(u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'), (u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'),
(u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'), (u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'),
(u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'), (u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'),
(u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'), (u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'),
(u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'), (u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'),
(u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'), (u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'),
(u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'), (u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'),
(u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'), (u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'),
(u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'), (u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'),
(u'Balears', u'http://www.menorca.info/rss?seccion=balears')] (u'Balears', u'http://www.menorca.info/rss?seccion=balears')]
#Seccions amb link rss erroni. Es recupera directament de la pagina web #Seccions amb link rss erroni. Es recupera directament de la pagina web
seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'), seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'),
(u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'), (u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'),
(u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')] (u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')]
remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'}) remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
remove_tags_after = dict(name='div', attrs={'class':'compartir'}) remove_tags_after = dict(name='div', attrs={'class':'compartir'})
remove_tags = [dict(id = 'utilidades'), remove_tags = [dict(id = 'utilidades'),
dict(name='div', attrs={'class': 'totalComentarios'}), dict(name='div', attrs={'class': 'totalComentarios'}),
dict(name='div', attrs={'class': 'compartir'}), dict(name='div', attrs={'class': 'compartir'}),
dict(name='div', attrs={'class': re.compile("img_noticia*")}) dict(name='div', attrs={'class': re.compile("img_noticia*")})
] ]
def print_version(self, url): def print_version(self, url):
url_imprimir = url + '?d=print' url_imprimir = url + '?d=print'
return url.replace(url, url_imprimir) return url.replace(url, url_imprimir)
def feed_to_index_append(self, feedObject, masterFeed): def feed_to_index_append(self, feedObject, masterFeed):
# Loop thru the feed object and build the correct type of article list # Loop thru the feed object and build the correct type of article list
for feed in feedObject: for feed in feedObject:
newArticles = [] newArticles = []
for article in feed.articles: for article in feed.articles:
newArt = { newArt = {
'title' : article.title, 'title' : article.title,
'url' : article.url, 'url' : article.url,
'date' : article.date, 'date' : article.date,
'description' : article.text_summary 'description' : article.text_summary
} }
newArticles.append(newArt) newArticles.append(newArt)
# append the newly-built list object to the index object # passed in as masterFeed. # append the newly-built list object to the index object # passed in as masterFeed.
masterFeed.append((feed.title,newArticles)) masterFeed.append((feed.title,newArticles))
def parse_index(self): def parse_index(self):
rssFeeds = Feed() rssFeeds = Feed()
rssFeeds = BasicNewsRecipe.parse_feeds(self) rssFeeds = BasicNewsRecipe.parse_feeds(self)
articles = [] articles = []
feeds = [] feeds = []
self.feed_to_index_append(rssFeeds,feeds) self.feed_to_index_append(rssFeeds,feeds)
for (nom_seccio, url_seccio) in self.seccions_web: for (nom_seccio, url_seccio) in self.seccions_web:
articles = [] articles = []
soup = self.index_to_soup(url_seccio) soup = self.index_to_soup(url_seccio)
for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}): for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}):
h = article.find(['h2','h3']) h = article.find(['h2','h3'])
titol = self.tag_to_string(h) titol = self.tag_to_string(h)
a = article.find('a', href=True) a = article.find('a', href=True)
url = 'http://www.menorca.info' + a['href'] url = 'http://www.menorca.info' + a['href']
desc = None desc = None
autor = '' autor = ''
dt = '' dt = ''
soup_art = self.index_to_soup(url) soup_art = self.index_to_soup(url)
aut = soup_art.find('div', attrs={'class':'autor'}) aut = soup_art.find('div', attrs={'class':'autor'})
tx = self.tag_to_string(aut) tx = self.tag_to_string(aut)
ls = re.split('[,;]',tx) ls = re.split('[,;]',tx)
t = len(ls) t = len(ls)
if t >= 1: if t >= 1:
autor = ls[0] autor = ls[0]
if t > 1: if t > 1:
d = ls[t-1] d = ls[t-1]
if len(d) >= 10: if len(d) >= 10:
lt = len(d) - 10 lt = len(d) - 10
dt = d[lt:] dt = d[lt:]
self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt) self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt)
articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor}) articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor})
if articles: if articles:
feeds.append((nom_seccio, articles)) feeds.append((nom_seccio, articles))
return feeds return feeds

View File

@ -1,27 +1,27 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1329123365(BasicNewsRecipe): class AdvancedUserRecipe1329123365(BasicNewsRecipe):
title = u'Mobilebulgaria.com' title = u'Mobilebulgaria.com'
__author__ = 'M3 Web' __author__ = 'M3 Web'
description = 'The biggest Bulgarian site covering mobile consumer electronics. Offers detailed reviews, popular discussion forum, shop and platform for selling new and second hand phones and gadgets.' description = 'The biggest Bulgarian site covering mobile consumer electronics. Offers detailed reviews, popular discussion forum, shop and platform for selling new and second hand phones and gadgets.'
category = 'News, Reviews, Offers, Forum' category = 'News, Reviews, Offers, Forum'
oldest_article = 45 oldest_article = 45
max_articles_per_feed = 10 max_articles_per_feed = 10
language = 'bg' language = 'bg'
encoding = 'windows-1251' encoding = 'windows-1251'
no_stylesheets = False no_stylesheets = False
remove_javascript = True remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'bigblock'}), keep_only_tags = [dict(name='div', attrs={'class':'bigblock'}),
dict(name='div', attrs={'class':'verybigblock'}), dict(name='div', attrs={'class':'verybigblock'}),
dict(name='table', attrs={'class':'obiaviresults'}), dict(name='table', attrs={'class':'obiaviresults'}),
dict(name='div', attrs={'class':'forumblock'}), dict(name='div', attrs={'class':'forumblock'}),
dict(name='div', attrs={'class':'forumblock_b1'}), dict(name='div', attrs={'class':'forumblock_b1'}),
dict(name='div', attrs={'class':'block2_2colswrap'})] dict(name='div', attrs={'class':'block2_2colswrap'})]
feeds = [(u'News', u'http://www.mobilebulgaria.com/rss_full.php'), feeds = [(u'News', u'http://www.mobilebulgaria.com/rss_full.php'),
(u'Reviews', u'http://www.mobilebulgaria.com/rss_reviews.php'), (u'Reviews', u'http://www.mobilebulgaria.com/rss_reviews.php'),
(u'Offers', u'http://www.mobilebulgaria.com/obiavi/rss.php'), (u'Offers', u'http://www.mobilebulgaria.com/obiavi/rss.php'),
(u'Forum', u'http://www.mobilebulgaria.com/rss_forum_last10.php')] (u'Forum', u'http://www.mobilebulgaria.com/rss_forum_last10.php')]
extra_css = ''' extra_css = '''
#gallery1 div{display: block; float: left; margin: 0 10px 10px 0;} ''' #gallery1 div{display: block; float: left; margin: 0 10px 10px 0;} '''

View File

@ -15,7 +15,7 @@ class mojegotowanie(BasicNewsRecipe):
language = 'pl' language = 'pl'
description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.' description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.'
masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif' masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif'
cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif' cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif'
remove_empty_feeds= True remove_empty_feeds= True
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -1,35 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308572538(BasicNewsRecipe): class AdvancedUserRecipe1308572538(BasicNewsRecipe):
title = u'Novinite.com' title = u'Novinite.com'
__author__ = 'Martin Tsanchev' __author__ = 'Martin Tsanchev'
description = 'Real time provider of the latest Bulgarian news in English' description = 'Real time provider of the latest Bulgarian news in English'
category = 'Business, Politics, Society, Sports, Crime, Lifestyle, World, People' category = 'Business, Politics, Society, Sports, Crime, Lifestyle, World, People'
language = 'en_BG' language = 'en_BG'
encoding = 'utf-8' encoding = 'utf-8'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 10 max_articles_per_feed = 10
keep_only_tags = [dict(name='div', attrs={'id':'content'})] keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [dict(name='a', attrs={'class':'twitter-share-button'})] remove_tags = [dict(name='a', attrs={'class':'twitter-share-button'})]
remove_tags_after = dict(id='textsize') remove_tags_after = dict(id='textsize')
no_stylesheets = True no_stylesheets = True
feeds = [(u'Business', u'http://www.novinite.com/services/news_rdf.php?category_id=1'), feeds = [(u'Business', u'http://www.novinite.com/services/news_rdf.php?category_id=1'),
(u'Finance', u'http://www.novinite.com/services/news_rdf.php?category_id=15'), (u'Finance', u'http://www.novinite.com/services/news_rdf.php?category_id=15'),
(u'Energy', u'http://www.novinite.com/services/news_rdf.php?category_id=16'), (u'Energy', u'http://www.novinite.com/services/news_rdf.php?category_id=16'),
(u'Industry', u'http://www.novinite.com/services/news_rdf.php?category_id=17'), (u'Industry', u'http://www.novinite.com/services/news_rdf.php?category_id=17'),
(u'Properties', u'http://www.novinite.com/services/news_rdf.php?category_id=18'), (u'Properties', u'http://www.novinite.com/services/news_rdf.php?category_id=18'),
(u'Politics', u'http://www.novinite.com/services/news_rdf.php?category_id=2'), (u'Politics', u'http://www.novinite.com/services/news_rdf.php?category_id=2'),
(u'Diplomacy', u'http://www.novinite.com/services/news_rdf.php?category_id=20'), (u'Diplomacy', u'http://www.novinite.com/services/news_rdf.php?category_id=20'),
(u'Defense', u'http://www.novinite.com/services/news_rdf.php?category_id=21'), (u'Defense', u'http://www.novinite.com/services/news_rdf.php?category_id=21'),
(u'Bulgaria in EU', u'http://www.novinite.com/services/news_rdf.php?category_id=22'), (u'Bulgaria in EU', u'http://www.novinite.com/services/news_rdf.php?category_id=22'),
(u'Domestic', u'http://www.novinite.com/services/news_rdf.php?category_id=23'), (u'Domestic', u'http://www.novinite.com/services/news_rdf.php?category_id=23'),
(u'Society', u'http://www.novinite.com/services/news_rdf.php?category_id=3'), (u'Society', u'http://www.novinite.com/services/news_rdf.php?category_id=3'),
(u'Environment', u'http://www.novinite.com/services/news_rdf.php?category_id=24'), (u'Environment', u'http://www.novinite.com/services/news_rdf.php?category_id=24'),
(u'Education', u'http://www.novinite.com/services/news_rdf.php?category_id=25'), (u'Education', u'http://www.novinite.com/services/news_rdf.php?category_id=25'),
(u'Culture', u'http://www.novinite.com/services/news_rdf.php?category_id=26'), (u'Culture', u'http://www.novinite.com/services/news_rdf.php?category_id=26'),
(u'Archaeology', u'http://www.novinite.com/services/news_rdf.php?category_id=34'), (u'Archaeology', u'http://www.novinite.com/services/news_rdf.php?category_id=34'),
(u'Health', u'http://www.novinite.com/services/news_rdf.php?category_id=62'), (u'Health', u'http://www.novinite.com/services/news_rdf.php?category_id=62'),
(u'Sports', u'http://www.novinite.com/services/news_rdf.php?category_id=4'), (u'Sports', u'http://www.novinite.com/services/news_rdf.php?category_id=4'),
(u'Crime', u'http://www.novinite.com/services/news_rdf.php?category_id=5'), (u'Crime', u'http://www.novinite.com/services/news_rdf.php?category_id=5'),
(u'Lifestyle', u'http://www.novinite.com/services/news_rdf.php?category_id=6'), (u'Lifestyle', u'http://www.novinite.com/services/news_rdf.php?category_id=6'),
(u'World', u'http://www.novinite.com/services/news_rdf.php?category_id=30')] (u'World', u'http://www.novinite.com/services/news_rdf.php?category_id=30')]

View File

@ -1,36 +1,36 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311799898(BasicNewsRecipe): class AdvancedUserRecipe1311799898(BasicNewsRecipe):
title = u'Periódico Portafolio Colombia' title = u'Periódico Portafolio Colombia'
__author__ = 'BIGO-CAVA' __author__ = 'BIGO-CAVA'
language = 'es_CO' language = 'es_CO'
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png' cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
remove_tags_before = dict(id='contenidoArt') remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})] remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
keep_only_tags = [dict(name='div', id='contenidoArt')] keep_only_tags = [dict(name='div', id='contenidoArt')]
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png' masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
publication_type = 'newspaper' publication_type = 'newspaper'
extra_css = """ extra_css = """
p{text-align: justify; font-size: 100%} p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% } body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
""" """
feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'), feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'),
(u'Economia', u'http://www.portafolio.co/economia/feed'), (u'Economia', u'http://www.portafolio.co/economia/feed'),
(u'Internacional', u'http://www.portafolio.co/internacional/feed'), (u'Internacional', u'http://www.portafolio.co/internacional/feed'),
(u'Indicadores', u'http://www.portafolio.co/indicadores/feed'), (u'Indicadores', u'http://www.portafolio.co/indicadores/feed'),
(u'Opinion', u'http://www.portafolio.co/opinion/feed'), (u'Opinion', u'http://www.portafolio.co/opinion/feed'),
(u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'), (u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'),
(u'Herramientas', u'http://www.portafolio.co/herramientas/feed')] (u'Herramientas', u'http://www.portafolio.co/herramientas/feed')]

View File

@ -1,11 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341570(BasicNewsRecipe): class AdvancedUserRecipe1317341570(BasicNewsRecipe):
title = u'Revista Semana' title = u'Revista Semana'
__author__ = 'BIGO-CAVA' __author__ = 'BIGO-CAVA'
language = 'es_CO' language = 'es_CO'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')] feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]

View File

@ -1,28 +1,28 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>' __author__ = 'MrStefan <mrstefaan@gmail.com>'
''' '''
www.rushisaband.com www.rushisaband.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class rushisaband(BasicNewsRecipe): class rushisaband(BasicNewsRecipe):
title = u'Rushisaband' title = u'Rushisaband'
__author__ = 'MrStefan <mrstefaan@gmail.com>' __author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB' language = 'en_GB'
description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson' description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
remove_empty_feeds= True remove_empty_feeds= True
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript=True
no_stylesheets=True no_stylesheets=True
keep_only_tags =[] keep_only_tags =[]
keep_only_tags.append(dict(name = 'h4')) keep_only_tags.append(dict(name = 'h4'))
keep_only_tags.append(dict(name = 'h5')) keep_only_tags.append(dict(name = 'h5'))
keep_only_tags.append(dict(name = 'p')) keep_only_tags.append(dict(name = 'p'))
feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')] feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]

View File

@ -1,29 +1,29 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>' __copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
''' '''
rybinski.eu rybinski.eu
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Rybinski(BasicNewsRecipe): class Rybinski(BasicNewsRecipe):
title = u'Rybinski.eu - economy of the XXI century' title = u'Rybinski.eu - economy of the XXI century'
description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego' description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
language = 'pl' language = 'pl'
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')] feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
keep_only_tags = [dict(name='div', attrs={'class':'post'})] keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags = [ remove_tags = [
dict(name = 'div', attrs = {'class' : 'post-meta-1'}), dict(name = 'div', attrs = {'class' : 'post-meta-1'}),
dict(name = 'div', attrs = {'class' : 'post-meta-2'}), dict(name = 'div', attrs = {'class' : 'post-meta-2'}),
dict(name = 'div', attrs = {'class' : 'post-comments'}) dict(name = 'div', attrs = {'class' : 'post-comments'})
] ]

View File

@ -1,22 +1,22 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011 Neil Grogan' __copyright__ = '2011 Neil Grogan'
# #
# Silicon Republic Recipe # Silicon Republic Recipe
# #
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class SiliconRepublic(BasicNewsRecipe): class SiliconRepublic(BasicNewsRecipe):
title = u'Silicon Republic' title = u'Silicon Republic'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = u'Neil Grogan' __author__ = u'Neil Grogan'
language = 'en_IE' language = 'en_IE'
remove_tags = [dict(attrs={'class':['thumb','txt','compactbox','icons','catlist','catlistinner','taglist','taglistinner','social','also-in','also-in-inner','also-in-footer','zonek-dfp','paneladvert','rcadvert','panel','h2b']}), remove_tags = [dict(attrs={'class':['thumb','txt','compactbox','icons','catlist','catlistinner','taglist','taglistinner','social','also-in','also-in-inner','also-in-footer','zonek-dfp','paneladvert','rcadvert','panel','h2b']}),
dict(id=['header','logo','header-right','sitesearch','rsslinks','topnav','topvideos','topvideos-list','topnews','topnews-list','slideshow','slides','compactheader','compactnews','compactfeatures','article-type','contactlinks-header','banner-zone-k-dfp','footer-related','directory-services','also-in-section','featuredrelated1','featuredrelated2','featuredrelated3','featuredrelated4','advert2-dfp']), dict(id=['header','logo','header-right','sitesearch','rsslinks','topnav','topvideos','topvideos-list','topnews','topnews-list','slideshow','slides','compactheader','compactnews','compactfeatures','article-type','contactlinks-header','banner-zone-k-dfp','footer-related','directory-services','also-in-section','featuredrelated1','featuredrelated2','featuredrelated3','featuredrelated4','advert2-dfp']),
dict(name=['script', 'style'])] dict(name=['script', 'style'])]
feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')] feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')]

View File

@ -1,15 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1296179411(BasicNewsRecipe): class AdvancedUserRecipe1296179411(BasicNewsRecipe):
title = u'SPIN Magzine' title = u'SPIN Magzine'
__author__ = 'Quistopher' __author__ = 'Quistopher'
language = 'en' language = 'en'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [ feeds = [
(u'Daily Noise Blog | SPIN.com', u'http://www.spin.com/blog/feed'), (u'Daily Noise Blog | SPIN.com', u'http://www.spin.com/blog/feed'),
(u'It Happened Last Night | SPIN.com', u'http://www.spin.com/it-happened-last-night/feed'), (u'It Happened Last Night | SPIN.com', u'http://www.spin.com/it-happened-last-night/feed'),
(u'Album Reviews | SPIN.com', u'http://www.spin.com/album-reviews/feed') (u'Album Reviews | SPIN.com', u'http://www.spin.com/album-reviews/feed')
] ]

View File

@ -1,18 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299054026(BasicNewsRecipe): class AdvancedUserRecipe1299054026(BasicNewsRecipe):
title = u'Thai Post Daily' title = u'Thai Post Daily'
__author__ = 'Chotechai P.' __author__ = 'Chotechai P.'
language = 'th' language = 'th'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png' cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
feeds = [(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')] feeds = [(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
def print_version(self, url): def print_version(self, url):
return url.replace(url, 'http://www.thaipost.net/print/' + url [32:]) return url.replace(url, 'http://www.thaipost.net/print/' + url [32:])
remove_tags = [] remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-logo'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-logo'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-site_name'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-site_name'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-breadcrumb'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-breadcrumb'}))

View File

@ -1,27 +1,27 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313555075(BasicNewsRecipe): class AdvancedUserRecipe1313555075(BasicNewsRecipe):
news = True news = True
title = u'The Clinic' title = u'The Clinic'
__author__ = 'Alex Mitrani' __author__ = 'Alex Mitrani'
description = u'Online version of Chilean satirical weekly' description = u'Online version of Chilean satirical weekly'
publisher = u'The Clinic' publisher = u'The Clinic'
category = 'news, politics, Chile, rss' category = 'news, politics, Chile, rss'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
summary_length = 1000 summary_length = 1000
language = 'es_CL' language = 'es_CL'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.theclinic.cl/wp-content/themes/tc12m/css/ui/mainLogoTC-top.png' masthead_url = 'http://www.theclinic.cl/wp-content/themes/tc12m/css/ui/mainLogoTC-top.png'
remove_tags_before = dict(name='article', attrs={'class':'scope bordered'}) remove_tags_before = dict(name='article', attrs={'class':'scope bordered'})
remove_tags_after = dict(name='div', attrs={'id':'commentsSection'}) remove_tags_after = dict(name='div', attrs={'id':'commentsSection'})
remove_tags = [dict(name='span', attrs={'class':'relTags'}) remove_tags = [dict(name='span', attrs={'class':'relTags'})
,dict(name='div', attrs={'class':'articleActivity hdcol'}) ,dict(name='div', attrs={'class':'articleActivity hdcol'})
,dict(name='div', attrs={'id':'commentsSection'}) ,dict(name='div', attrs={'id':'commentsSection'})
] ]
feeds = [(u'The Clinic Online', u'http://www.theclinic.cl/feed/')] feeds = [(u'The Clinic Online', u'http://www.theclinic.cl/feed/')]

View File

@ -1,63 +1,63 @@
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class TNR(BasicNewsRecipe): class TNR(BasicNewsRecipe):
title = 'The New Republic' title = 'The New Republic'
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal'
description = '''The New Republic is a journal of opinion with an emphasis description = '''The New Republic is a journal of opinion with an emphasis
on politics and domestic and international affairs. It carries feature on politics and domestic and international affairs. It carries feature
articles by staff and contributing editors. The second half of each issue articles by staff and contributing editors. The second half of each issue
is devoted to book and the arts, theater, motion pictures, music and art.''' is devoted to book and the arts, theater, motion pictures, music and art.'''
language = 'en' language = 'en'
encoding = 'UTF-8' encoding = 'UTF-8'
needs_subscription = True needs_subscription = True
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''), (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''), (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
] ]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
br.open('http://www.newrepublic.com/user') br.open('http://www.newrepublic.com/user')
br.select_form(nr=1) br.select_form(nr=1)
try: try:
br['user'] = self.username br['user'] = self.username
except: except:
br['name'] = self.username br['name'] = self.username
br['pass'] = self.password br['pass'] = self.password
self.log('Logging in...') self.log('Logging in...')
raw = br.submit().read() raw = br.submit().read()
if 'SIGN OUT' not in raw: if 'SIGN OUT' not in raw:
raise ValueError('Failed to log in to tnr.com, check your username and password') raise ValueError('Failed to log in to tnr.com, check your username and password')
self.log('Logged in successfully') self.log('Logged in successfully')
return br return br
def parse_index(self): def parse_index(self):
raw = self.index_to_soup('http://www.newrepublic.com/current-issue', raw=True) raw = self.index_to_soup('http://www.newrepublic.com/current-issue', raw=True)
# raw = self.index_to_soup(open('/t/raw.html').read().decode('utf-8'), raw=True) # raw = self.index_to_soup(open('/t/raw.html').read().decode('utf-8'), raw=True)
for pat, sub in self.preprocess_regexps: for pat, sub in self.preprocess_regexps:
raw = pat.sub(sub, raw) raw = pat.sub(sub, raw)
soup = self.index_to_soup(raw) soup = self.index_to_soup(raw)
feed_title = 'The New Republic Magazine Articles' feed_title = 'The New Republic Magazine Articles'
articles = [] articles = []
for div in soup.findAll('div', attrs={'class':lambda x: x and 'field-item' in x.split()}): for div in soup.findAll('div', attrs={'class':lambda x: x and 'field-item' in x.split()}):
a = div.find('a', href=True, attrs={'class':lambda x: x != 'author'}) a = div.find('a', href=True, attrs={'class':lambda x: x != 'author'})
if a is not None: if a is not None:
art_title = self.tag_to_string(a) art_title = self.tag_to_string(a)
url = a.get('href') url = a.get('href')
num = re.search(r'/(\d+)/', url) num = re.search(r'/(\d+)/', url)
if num is not None: if num is not None:
art = num.group(1) art = num.group(1)
url = 'http://www.newrepublic.com/node/%s/print'%art url = 'http://www.newrepublic.com/node/%s/print'%art
self.log.info('\tFound article:', art_title, 'at', url) self.log.info('\tFound article:', art_title, 'at', url)
article = {'title':art_title, 'url':url, 'description':'', 'date':''} article = {'title':art_title, 'url':url, 'description':'', 'date':''}
articles.append(article) articles.append(article)
return [(feed_title, articles)] return [(feed_title, articles)]

View File

@ -1,76 +1,76 @@
#!/usr/bin/env python #!/usr/bin/env python
__author__ = 'Darko Spasovski' __author__ = 'Darko Spasovski'
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>' __copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
''' '''
utrinski.com.mk utrinski.com.mk
''' '''
import re import re
import datetime import datetime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre import browser from calibre import browser
class UtrinskiVesnik(BasicNewsRecipe): class UtrinskiVesnik(BasicNewsRecipe):
INDEX = 'http://www.utrinski.com.mk/' INDEX = 'http://www.utrinski.com.mk/'
title = 'Utrinski Vesnik' title = 'Utrinski Vesnik'
description = 'Daily Macedonian newspaper' description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg' masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
language = 'mk' language = 'mk'
remove_javascript = True remove_javascript = True
publication_type = 'newspaper' publication_type = 'newspaper'
category = 'news, Macedonia' category = 'news, Macedonia'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ [
## Remove anything before the start of the article. ## Remove anything before the start of the article.
(r'<body.*?Article start-->', lambda match: '<body>'), (r'<body.*?Article start-->', lambda match: '<body>'),
## Remove anything after the end of the article. ## Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match : '</body>'), (r'<!--Article end.*?</body>', lambda match : '</body>'),
] ]
] ]
extra_css = """ extra_css = """
body{font-family: Arial,Helvetica,sans-serif} body{font-family: Arial,Helvetica,sans-serif}
.WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none} .WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
""" """
conversion_options = { conversion_options = {
'comment' : description, 'comment' : description,
'tags' : category, 'tags' : category,
'language' : language, 'language' : language,
'linearize_tables' : True 'linearize_tables' : True
} }
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
# open main page # open main page
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_MainMenu' # find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_MainMenu'
for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_MainMenu'}): for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_MainMenu'}):
sectionTitle = section.contents[0].string sectionTitle = section.contents[0].string
sectionUrl = self.INDEX + section['href'].strip() sectionUrl = self.INDEX + section['href'].strip()
# open the anchor link # open the anchor link
raw = browser().open_novisit(sectionUrl).read() raw = browser().open_novisit(sectionUrl).read()
sectionSoup = BeautifulSoup(raw) sectionSoup = BeautifulSoup(raw)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_ONLINEArticleTitle' # find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'
sectionArticles = sectionSoup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_ONLINEArticleTitle'}) sectionArticles = sectionSoup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_ONLINEArticleTitle'})
articles = [] articles = []
for sectionArticle in sectionArticles: for sectionArticle in sectionArticles:
# article title = anchor's contents, article url = anchor's href # article title = anchor's contents, article url = anchor's href
articleTitle = sectionArticle.contents[0].string.strip() articleTitle = sectionArticle.contents[0].string.strip()
articleUrl = self.INDEX + sectionArticle['href'].strip() articleUrl = self.INDEX + sectionArticle['href'].strip()
articleDate = datetime.datetime.today().strftime('%d.%m.%Y') articleDate = datetime.datetime.today().strftime('%d.%m.%Y')
articles.append({'title': articleTitle, 'url':articleUrl, 'description':'', 'date': articleDate}) articles.append({'title': articleTitle, 'url':articleUrl, 'description':'', 'date': articleDate})
if articles: if articles:
feeds.append((sectionTitle, articles)) feeds.append((sectionTitle, articles))
return feeds return feeds
def get_cover_url(self): def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d_%m_%Y') datum = datetime.datetime.today().strftime('%d_%m_%Y')
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg' return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'

View File

@ -1,19 +1,19 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class AdvancedUserRecipe1350731826(BasicNewsRecipe): class AdvancedUserRecipe1350731826(BasicNewsRecipe):
title = u'Yazihane' title = u'Yazihane'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'A Erdogan' __author__ = 'A Erdogan'
description = 'Sports Blog' description = 'Sports Blog'
publisher = 'yazihaneden.com' publisher = 'yazihaneden.com'
category = 'sports, basketball, nba, cycling, euroleague' category = 'sports, basketball, nba, cycling, euroleague'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
masthead_url = 'http://www.yazihaneden.com/wp-content/uploads/Untitled-1.png' masthead_url = 'http://www.yazihaneden.com/wp-content/uploads/Untitled-1.png'
language = 'tr' language = 'tr'
keep_only_tags = [ dict(name='div', attrs={'id':re.compile('(^|| )post-($|| )', re.DOTALL)})] keep_only_tags = [ dict(name='div', attrs={'id':re.compile('(^|| )post-($|| )', re.DOTALL)})]
remove_tags_after = dict(name='div', attrs={'class':'post-footer clear'}) remove_tags_after = dict(name='div', attrs={'class':'post-footer clear'})
feeds = [(u'Yazihane', u'http://www.yazihaneden.com/feed/')] feeds = [(u'Yazihane', u'http://www.yazihaneden.com/feed/')]

View File

@ -1,208 +1,208 @@
@echo OFF @echo OFF
REM Calibre-Portable.bat REM Calibre-Portable.bat
REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬ REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬¬
REM REM
REM Batch File to start a Calibre configuration on Windows REM Batch File to start a Calibre configuration on Windows
REM giving explicit control of the location of: REM giving explicit control of the location of:
REM - Calibre Program Files REM - Calibre Program Files
REM - Calibre Library Files REM - Calibre Library Files
REM - Calibre Config Files REM - Calibre Config Files
REM - Calibre Metadata database REM - Calibre Metadata database
REM - Calibre Source files REM - Calibre Source files
REM - Calibre Temp Files REM - Calibre Temp Files
REM By setting the paths correctly it can be used to run: REM By setting the paths correctly it can be used to run:
REM - A "portable calibre" off a USB stick. REM - A "portable calibre" off a USB stick.
REM - A network installation with local metadata database REM - A network installation with local metadata database
REM (for performance) and books stored on a network share REM (for performance) and books stored on a network share
REM - A local installation using customised settings REM - A local installation using customised settings
REM REM
REM If trying to run off a USB stick then the folder structure REM If trying to run off a USB stick then the folder structure
REM shown below is recommended (relative to the location of REM shown below is recommended (relative to the location of
REM this batch file). This can structure can also be used REM this batch file). This can structure can also be used
REM when running of a local hard disk if you want to get the REM when running of a local hard disk if you want to get the
REM level of control this batch file provides. REM level of control this batch file provides.
REM - Calibre2 Location of program files REM - Calibre2 Location of program files
REM - CalibreConfig Location of Configuration files REM - CalibreConfig Location of Configuration files
REM - CalibreLibrary Location of Books and metadata REM - CalibreLibrary Location of Books and metadata
REM - CalibreSource Location of Calibre Source files (Optional) REM - CalibreSource Location of Calibre Source files (Optional)
REM REM
REM This batch file is designed so that if you create the recommended REM This batch file is designed so that if you create the recommended
REM folder structure then it can be used 'as is' without modification. REM folder structure then it can be used 'as is' without modification.
REM REM
REM More information on the Environment Variables used by Calibre can REM More information on the Environment Variables used by Calibre can
REM be found at: REM be found at:
REM http://manual.calibre-ebook.com/customize.html#environment-variables REM http://manual.calibre-ebook.com/customize.html#environment-variables
REM REM
REM The documentation for this file in the Calibre manual can be found at: REM The documentation for this file in the Calibre manual can be found at:
REM http://manual.calibre-ebook.com/portable.html REM http://manual.calibre-ebook.com/portable.html
REM REM
REM CHANGE HISTORY REM CHANGE HISTORY
REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬ REM ¬¬¬¬¬¬¬¬¬¬¬¬¬¬
REM 22 Jan 2012 itimpi - Updated to keep it in line with the calibre-portable.sh REM 22 Jan 2012 itimpi - Updated to keep it in line with the calibre-portable.sh
REM file for Linux systems REM file for Linux systems
REM ------------------------------------- REM -------------------------------------
REM Set up Calibre Config folder REM Set up Calibre Config folder
REM REM
REM This is where user specific settings REM This is where user specific settings
REM are stored. REM are stored.
REM ------------------------------------- REM -------------------------------------
IF EXIST CalibreConfig ( IF EXIST CalibreConfig (
SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
ECHO CONFIG FILES: %cd%\CalibreConfig ECHO CONFIG FILES: %cd%\CalibreConfig
) )
REM -------------------------------------------------------------- REM --------------------------------------------------------------
REM Specify Location of ebooks REM Specify Location of ebooks
REM REM
REM Location where Book files are located REM Location where Book files are located
REM Either set explicit path, or if running from a USB stick REM Either set explicit path, or if running from a USB stick
REM a relative path can be used to avoid need to know the REM a relative path can be used to avoid need to know the
REM drive letter of the USB stick. REM drive letter of the USB stick.
REM REM
REM Comment out any of the following that are not to be used REM Comment out any of the following that are not to be used
REM (although leaving them in does not really matter) REM (although leaving them in does not really matter)
REM -------------------------------------------------------------- REM --------------------------------------------------------------
IF EXIST U:\eBooks\CalibreLibrary ( IF EXIST U:\eBooks\CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
ECHO LIBRARY FILES: U:\eBOOKS\CalibreLibrary ECHO LIBRARY FILES: U:\eBOOKS\CalibreLibrary
) )
IF EXIST CalibreLibrary ( IF EXIST CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
ECHO LIBRARY FILES: %cd%\CalibreLibrary ECHO LIBRARY FILES: %cd%\CalibreLibrary
) )
REM -------------------------------------------------------------- REM --------------------------------------------------------------
REM Specify Location of metadata database (optional) REM Specify Location of metadata database (optional)
REM REM
REM Location where the metadata.db file is located. If not set REM Location where the metadata.db file is located. If not set
REM the same location as Books files will be assumed. This. REM the same location as Books files will be assumed. This.
REM option is typically set to get better performance when the REM option is typically set to get better performance when the
REM Library is on a (slow) network drive. Putting the metadata.db REM Library is on a (slow) network drive. Putting the metadata.db
REM file locally then makes gives a big performance improvement. REM file locally then makes gives a big performance improvement.
REM REM
REM NOTE. If you use this option, then the ability to switch REM NOTE. If you use this option, then the ability to switch
REM libraries within Calibre will be disabled. Therefore REM libraries within Calibre will be disabled. Therefore
REM you do not want to set it if the metadata.db file REM you do not want to set it if the metadata.db file
REM is at the same location as the book files. REM is at the same location as the book files.
REM REM
REM Another point to watch is that plugins can cause problems REM Another point to watch is that plugins can cause problems
REM as they often store absolute path information REM as they often store absolute path information
REM -------------------------------------------------------------- REM --------------------------------------------------------------
IF EXIST %cd%\CalibreMetadata\metadata.db ( IF EXIST %cd%\CalibreMetadata\metadata.db (
IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" ( IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
ECHO DATABASE: %cd%\CalibreMetadata\metadata.db ECHO DATABASE: %cd%\CalibreMetadata\metadata.db
ECHO ' ECHO '
ECHO ***CAUTION*** Library Switching will be disabled ECHO ***CAUTION*** Library Switching will be disabled
ECHO ' ECHO '
) )
) )
REM -------------------------------------------------------------- REM --------------------------------------------------------------
REM Specify Location of source (optional) REM Specify Location of source (optional)
REM REM
REM It is easy to run Calibre from source REM It is easy to run Calibre from source
REM Just set the environment variable to where the source is located REM Just set the environment variable to where the source is located
REM When running from source the GUI will have a '*' after the version. REM When running from source the GUI will have a '*' after the version.
REM number that is displayed at the bottom of the Calibre main screen. REM number that is displayed at the bottom of the Calibre main screen.
REM REM
REM More information on setting up a development environment can REM More information on setting up a development environment can
REM be found at: REM be found at:
REM http://manual.calibre-ebook.com/develop.html#develop REM http://manual.calibre-ebook.com/develop.html#develop
REM -------------------------------------------------------------- REM --------------------------------------------------------------
IF EXIST CalibreSource\src ( IF EXIST CalibreSource\src (
SET CALIBRE_DEVELOP_FROM=%cd%\CalibreSource\src SET CALIBRE_DEVELOP_FROM=%cd%\CalibreSource\src
ECHO SOURCE FILES: %cd%\CalibreSource\src ECHO SOURCE FILES: %cd%\CalibreSource\src
) ELSE ( ) ELSE (
ECHO SOURCE FILES: *** Not being Used *** ECHO SOURCE FILES: *** Not being Used ***
) )
REM -------------------------------------------------------------- REM --------------------------------------------------------------
REM Specify Location of calibre Windows binaries (optional) REM Specify Location of calibre Windows binaries (optional)
REM REM
REM To avoid needing Calibre to be set in the search path, ensure REM To avoid needing Calibre to be set in the search path, ensure
REM that Calibre Program Files is current directory when starting. REM that Calibre Program Files is current directory when starting.
REM The following test falls back to using search path . REM The following test falls back to using search path .
REM This folder can be populated by copying the Calibre2 folder from REM This folder can be populated by copying the Calibre2 folder from
REM an existing installation or by installing direct to here. REM an existing installation or by installing direct to here.
REM REM
REM NOTE. Do not try and put both Windows and Linux binaries into REM NOTE. Do not try and put both Windows and Linux binaries into
REM same folder as this can cause problems. REM same folder as this can cause problems.
REM -------------------------------------------------------------- REM --------------------------------------------------------------
IF EXIST %cd%\Calibre2 ( IF EXIST %cd%\Calibre2 (
CD %cd%\Calibre2 CD %cd%\Calibre2
ECHO PROGRAM FILES: %cd% ECHO PROGRAM FILES: %cd%
) ELSE ( ) ELSE (
ECHO PROGRAM FILES: *** Use System search PATH *** ECHO PROGRAM FILES: *** Use System search PATH ***
) )
REM -------------------------------------------------------------- REM --------------------------------------------------------------
REM Location of Calibre Temporary files (optional) REM Location of Calibre Temporary files (optional)
REM REM
REM Calibre creates a lot of temporary files while running REM Calibre creates a lot of temporary files while running
REM In theory these are removed when Calibre finishes, but REM In theory these are removed when Calibre finishes, but
REM in practise files can be left behind (particularily if REM in practise files can be left behind (particularily if
REM any errors occur). Using this option allows some REM any errors occur). Using this option allows some
REM explicit clean-up of these files. REM explicit clean-up of these files.
REM If not set Calibre uses the normal system TEMP location REM If not set Calibre uses the normal system TEMP location
REM -------------------------------------------------------------- REM --------------------------------------------------------------
SET CALIBRE_TEMP_DIR=%TEMP%\CALIBRE_TEMP SET CALIBRE_TEMP_DIR=%TEMP%\CALIBRE_TEMP
ECHO TEMPORARY FILES: %CALIBRE_TEMP_DIR% ECHO TEMPORARY FILES: %CALIBRE_TEMP_DIR%
IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%" IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%"
MKDIR "%CALIBRE_TEMP_DIR%" MKDIR "%CALIBRE_TEMP_DIR%"
REM set the following for any components that do REM set the following for any components that do
REM not obey the CALIBRE_TEMP_DIR setting REM not obey the CALIBRE_TEMP_DIR setting
SET TMP=%CALIBRE_TEMP_DIR% SET TMP=%CALIBRE_TEMP_DIR%
SET TEMP=%CALIBRE_TEMP_DIR% SET TEMP=%CALIBRE_TEMP_DIR%
REM -------------------------------------------------------------- REM --------------------------------------------------------------
REM Set the Interface language (optional) REM Set the Interface language (optional)
REM REM
REM If not set Calibre uses the language set in Preferences REM If not set Calibre uses the language set in Preferences
REM -------------------------------------------------------------- REM --------------------------------------------------------------
SET CALIBRE_OVERRIDE_LANG=EN SET CALIBRE_OVERRIDE_LANG=EN
ECHO INTERFACE LANGUAGE: %CALIBRE_OVERRIDE_LANG% ECHO INTERFACE LANGUAGE: %CALIBRE_OVERRIDE_LANG%
REM ---------------------------------------------------------- REM ----------------------------------------------------------
REM The following gives a chance to check the settings before REM The following gives a chance to check the settings before
REM starting Calibre. It can be commented out if not wanted. REM starting Calibre. It can be commented out if not wanted.
REM ---------------------------------------------------------- REM ----------------------------------------------------------
ECHO ' ECHO '
ECHO Press CTRL-C if you do not want to continue ECHO Press CTRL-C if you do not want to continue
PAUSE PAUSE
REM -------------------------------------------------------- REM --------------------------------------------------------
REM Start up the calibre program. REM Start up the calibre program.
REM REM
REM The use of 'belownormal' priority helps keep the system REM The use of 'belownormal' priority helps keep the system
REM responsive while Calibre is running. Within Calibre itself REM responsive while Calibre is running. Within Calibre itself
REM the backgound processes should be set to run with 'low' priority. REM the backgound processes should be set to run with 'low' priority.
REM Using the START command starts up Calibre in a separate process. REM Using the START command starts up Calibre in a separate process.
REM If used without /WAIT option it launches Calibre and contines batch file. REM If used without /WAIT option it launches Calibre and contines batch file.
REM normally this would simply run off the end and close the Command window. REM normally this would simply run off the end and close the Command window.
REM Use with /WAIT to wait until Calibre completes to run a task on exit REM Use with /WAIT to wait until Calibre completes to run a task on exit
REM -------------------------------------------------------- REM --------------------------------------------------------
ECHO "Starting up Calibre" ECHO "Starting up Calibre"
ECHO OFF ECHO OFF
ECHO %cd% ECHO %cd%
START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%" START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"

View File

@ -1,10 +1,10 @@
/** /**
* Version: 1.0 Alpha-1 * Version: 1.0 Alpha-1
* Build Date: 13-Nov-2007 * Build Date: 13-Nov-2007
* Copyright (c) 2006-2007, Coolite Inc. (http://www.coolite.com/). All rights reserved. * Copyright (c) 2006-2007, Coolite Inc. (http://www.coolite.com/). All rights reserved.
* License: Licensed under The MIT License. See license.txt and http://www.datejs.com/license/. * License: Licensed under The MIT License. See license.txt and http://www.datejs.com/license/.
* Website: http://www.datejs.com/ or http://www.coolite.com/datejs/ * Website: http://www.datejs.com/ or http://www.coolite.com/datejs/
*/ */
Date.CultureInfo={name:"en-US",englishName:"English (United States)",nativeName:"English (United States)",dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],abbreviatedDayNames:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],shortestDayNames:["Su","Mo","Tu","We","Th","Fr","Sa"],firstLetterDayNames:["S","M","T","W","T","F","S"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],abbreviatedMonthNames:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],amDesignator:"AM",pmDesignator:"PM",firstDayOfWeek:0,twoDigitYearMax:2029,dateElementOrder:"mdy",formatPatterns:{shortDate:"M/d/yyyy",longDate:"dddd, MMMM dd, yyyy",shortTime:"h:mm tt",longTime:"h:mm:ss tt",fullDateTime:"dddd, MMMM dd, yyyy h:mm:ss tt",sortableDateTime:"yyyy-MM-ddTHH:mm:ss",universalSortableDateTime:"yyyy-MM-dd HH:mm:ssZ",rfc1123:"ddd, dd MMM yyyy HH:mm:ss GMT",monthDay:"MMMM dd",yearMonth:"MMMM, yyyy"},regexPatterns:{jan:/^jan(uary)?/i,feb:/^feb(ruary)?/i,mar:/^mar(ch)?/i,apr:/^apr(il)?/i,may:/^may/i,jun:/^jun(e)?/i,jul:/^jul(y)?/i,aug:/^aug(ust)?/i,sep:/^sep(t(ember)?)?/i,oct:/^oct(ober)?/i,nov:/^nov(ember)?/i,dec:/^dec(ember)?/i,sun:/^su(n(day)?)?/i,mon:/^mo(n(day)?)?/i,tue:/^tu(e(s(day)?)?)?/i,wed:/^we(d(nesday)?)?/i,thu:/^th(u(r(s(day)?)?)?)?/i,fri:/^fr(i(day)?)?/i,sat:/^sa(t(urday)?)?/i,future:/^next/i,past:/^last|past|prev(ious)?/i,add:/^(\+|after|from)/i,subtract:/^(\-|before|ago)/i,yesterday:/^yesterday/i,today:/^t(oday)?/i,tomorrow:/^tomorrow/i,now:/^n(ow)?/i,millisecond:/^ms|milli(second)?s?/i,second:/^sec(ond)?s?/i,minute:/^min(ute)?s?/i,hour:/^h(ou)?rs?/i,week:/^w(ee)?k/i,month:/^m(o(nth)?s?)?/i,day:/^d(ays?)?/i,year:/^y((ea)?rs?)?/i,shortMeridian:/^(a|p)/i,longMeridian:/^(a\.?m?\.?|p\.?m?\.?)/i,timezone:/^((e(s|d)t|c(s|d)t|m(s|d)t|p(s|d)t)|((gmt)?\s*(\+|\-)\s*\d\d\d\d?)|gmt)/i,ordinalSuffix:/^\s*(st|nd|rd|th)/i,timeContext:/^\s*(\:|a|p)/i},abbreviatedTimeZoneStandard:{GMT:"-000",EST:"-0400",CST:"-0500",MST:"-0600",PST:"-0700"},abbreviatedTimeZoneDST:{GMT:"-000",EDT:"-0500",CDT:"-0600",MDT:"-0700",PDT:"-0800"}}; Date.CultureInfo={name:"en-US",englishName:"English (United States)",nativeName:"English (United States)",dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],abbreviatedDayNames:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],shortestDayNames:["Su","Mo","Tu","We","Th","Fr","Sa"],firstLetterDayNames:["S","M","T","W","T","F","S"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],abbreviatedMonthNames:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],amDesignator:"AM",pmDesignator:"PM",firstDayOfWeek:0,twoDigitYearMax:2029,dateElementOrder:"mdy",formatPatterns:{shortDate:"M/d/yyyy",longDate:"dddd, MMMM dd, yyyy",shortTime:"h:mm tt",longTime:"h:mm:ss tt",fullDateTime:"dddd, MMMM dd, yyyy h:mm:ss tt",sortableDateTime:"yyyy-MM-ddTHH:mm:ss",universalSortableDateTime:"yyyy-MM-dd HH:mm:ssZ",rfc1123:"ddd, dd MMM yyyy HH:mm:ss GMT",monthDay:"MMMM dd",yearMonth:"MMMM, yyyy"},regexPatterns:{jan:/^jan(uary)?/i,feb:/^feb(ruary)?/i,mar:/^mar(ch)?/i,apr:/^apr(il)?/i,may:/^may/i,jun:/^jun(e)?/i,jul:/^jul(y)?/i,aug:/^aug(ust)?/i,sep:/^sep(t(ember)?)?/i,oct:/^oct(ober)?/i,nov:/^nov(ember)?/i,dec:/^dec(ember)?/i,sun:/^su(n(day)?)?/i,mon:/^mo(n(day)?)?/i,tue:/^tu(e(s(day)?)?)?/i,wed:/^we(d(nesday)?)?/i,thu:/^th(u(r(s(day)?)?)?)?/i,fri:/^fr(i(day)?)?/i,sat:/^sa(t(urday)?)?/i,future:/^next/i,past:/^last|past|prev(ious)?/i,add:/^(\+|after|from)/i,subtract:/^(\-|before|ago)/i,yesterday:/^yesterday/i,today:/^t(oday)?/i,tomorrow:/^tomorrow/i,now:/^n(ow)?/i,millisecond:/^ms|milli(second)?s?/i,second:/^sec(ond)?s?/i,minute:/^min(ute)?s?/i,hour:/^h(ou)?rs?/i,week:/^w(ee)?k/i,month:/^m(o(nth)?s?)?/i,day:/^d(ays?)?/i,year:/^y((ea)?rs?)?/i,shortMeridian:/^(a|p)/i,longMeridian:/^(a\.?m?\.?|p\.?m?\.?)/i,timezone:/^((e(s|d)t|c(s|d)t|m(s|d)t|p(s|d)t)|((gmt)?\s*(\+|\-)\s*\d\d\d\d?)|gmt)/i,ordinalSuffix:/^\s*(st|nd|rd|th)/i,timeContext:/^\s*(\:|a|p)/i},abbreviatedTimeZoneStandard:{GMT:"-000",EST:"-0400",CST:"-0500",MST:"-0600",PST:"-0700"},abbreviatedTimeZoneDST:{GMT:"-000",EDT:"-0500",CDT:"-0600",MDT:"-0700",PDT:"-0800"}};
Date.getMonthNumberFromName=function(name){var n=Date.CultureInfo.monthNames,m=Date.CultureInfo.abbreviatedMonthNames,s=name.toLowerCase();for(var i=0;i<n.length;i++){if(n[i].toLowerCase()==s||m[i].toLowerCase()==s){return i;}} Date.getMonthNumberFromName=function(name){var n=Date.CultureInfo.monthNames,m=Date.CultureInfo.abbreviatedMonthNames,s=name.toLowerCase();for(var i=0;i<n.length;i++){if(n[i].toLowerCase()==s||m[i].toLowerCase()==s){return i;}}
return-1;};Date.getDayNumberFromName=function(name){var n=Date.CultureInfo.dayNames,m=Date.CultureInfo.abbreviatedDayNames,o=Date.CultureInfo.shortestDayNames,s=name.toLowerCase();for(var i=0;i<n.length;i++){if(n[i].toLowerCase()==s||m[i].toLowerCase()==s){return i;}} return-1;};Date.getDayNumberFromName=function(name){var n=Date.CultureInfo.dayNames,m=Date.CultureInfo.abbreviatedDayNames,o=Date.CultureInfo.shortestDayNames,s=name.toLowerCase();for(var i=0;i<n.length;i++){if(n[i].toLowerCase()==s||m[i].toLowerCase()==s){return i;}}
@ -101,4 +101,4 @@ return _.any.apply(null,rx);}else{return _get(fx);}};g._formats=g.formats(["yyyy
return g._start.call({},s);};}());Date._parse=Date.parse;Date.parse=function(s){var r=null;if(!s){return null;} return g._start.call({},s);};}());Date._parse=Date.parse;Date.parse=function(s){var r=null;if(!s){return null;}
try{r=Date.Grammar.start.call({},s);}catch(e){return null;} try{r=Date.Grammar.start.call({},s);}catch(e){return null;}
return((r[1].length===0)?r[0]:null);};Date.getParseFunction=function(fx){var fn=Date.Grammar.formats(fx);return function(s){var r=null;try{r=fn.call({},s);}catch(e){return null;} return((r[1].length===0)?r[0]:null);};Date.getParseFunction=function(fx){var fn=Date.Grammar.formats(fx);return function(s){var r=null;try{r=fn.call({},s);}catch(e){return null;}
return((r[1].length===0)?r[0]:null);};};Date.parseExact=function(s,fx){return Date.getParseFunction(fx)(s);}; return((r[1].length===0)?r[0]:null);};};Date.parseExact=function(s,fx){return Date.getParseFunction(fx)(s);};

View File

@ -1,215 +1,215 @@
/** /**
* jQuery.ScrollTo * jQuery.ScrollTo
* Copyright (c) 2007-2009 Ariel Flesler - aflesler(at)gmail(dot)com | http://flesler.blogspot.com * Copyright (c) 2007-2009 Ariel Flesler - aflesler(at)gmail(dot)com | http://flesler.blogspot.com
* Dual licensed under MIT and GPL. * Dual licensed under MIT and GPL.
* Date: 5/25/2009 * Date: 5/25/2009
* *
* @projectDescription Easy element scrolling using jQuery. * @projectDescription Easy element scrolling using jQuery.
* http://flesler.blogspot.com/2007/10/jqueryscrollto.html * http://flesler.blogspot.com/2007/10/jqueryscrollto.html
* Works with jQuery +1.2.6. Tested on FF 2/3, IE 6/7/8, Opera 9.5/6, Safari 3, Chrome 1 on WinXP. * Works with jQuery +1.2.6. Tested on FF 2/3, IE 6/7/8, Opera 9.5/6, Safari 3, Chrome 1 on WinXP.
* *
* @author Ariel Flesler * @author Ariel Flesler
* @version 1.4.2 * @version 1.4.2
* *
* @id jQuery.scrollTo * @id jQuery.scrollTo
* @id jQuery.fn.scrollTo * @id jQuery.fn.scrollTo
* @param {String, Number, DOMElement, jQuery, Object} target Where to scroll the matched elements. * @param {String, Number, DOMElement, jQuery, Object} target Where to scroll the matched elements.
* The different options for target are: * The different options for target are:
* - A number position (will be applied to all axes). * - A number position (will be applied to all axes).
* - A string position ('44', '100px', '+=90', etc ) will be applied to all axes * - A string position ('44', '100px', '+=90', etc ) will be applied to all axes
* - A jQuery/DOM element ( logically, child of the element to scroll ) * - A jQuery/DOM element ( logically, child of the element to scroll )
* - A string selector, that will be relative to the element to scroll ( 'li:eq(2)', etc ) * - A string selector, that will be relative to the element to scroll ( 'li:eq(2)', etc )
* - A hash { top:x, left:y }, x and y can be any kind of number/string like above. * - A hash { top:x, left:y }, x and y can be any kind of number/string like above.
* - A percentage of the container's dimension/s, for example: 50% to go to the middle. * - A percentage of the container's dimension/s, for example: 50% to go to the middle.
* - The string 'max' for go-to-end. * - The string 'max' for go-to-end.
* @param {Number} duration The OVERALL length of the animation, this argument can be the settings object instead. * @param {Number} duration The OVERALL length of the animation, this argument can be the settings object instead.
* @param {Object,Function} settings Optional set of settings or the onAfter callback. * @param {Object,Function} settings Optional set of settings or the onAfter callback.
* @option {String} axis Which axis must be scrolled, use 'x', 'y', 'xy' or 'yx'. * @option {String} axis Which axis must be scrolled, use 'x', 'y', 'xy' or 'yx'.
* @option {Number} duration The OVERALL length of the animation. * @option {Number} duration The OVERALL length of the animation.
* @option {String} easing The easing method for the animation. * @option {String} easing The easing method for the animation.
* @option {Boolean} margin If true, the margin of the target element will be deducted from the final position. * @option {Boolean} margin If true, the margin of the target element will be deducted from the final position.
* @option {Object, Number} offset Add/deduct from the end position. One number for both axes or { top:x, left:y }. * @option {Object, Number} offset Add/deduct from the end position. One number for both axes or { top:x, left:y }.
* @option {Object, Number} over Add/deduct the height/width multiplied by 'over', can be { top:x, left:y } when using both axes. * @option {Object, Number} over Add/deduct the height/width multiplied by 'over', can be { top:x, left:y } when using both axes.
* @option {Boolean} queue If true, and both axis are given, the 2nd axis will only be animated after the first one ends. * @option {Boolean} queue If true, and both axis are given, the 2nd axis will only be animated after the first one ends.
* @option {Function} onAfter Function to be called after the scrolling ends. * @option {Function} onAfter Function to be called after the scrolling ends.
* @option {Function} onAfterFirst If queuing is activated, this function will be called after the first scrolling ends. * @option {Function} onAfterFirst If queuing is activated, this function will be called after the first scrolling ends.
* @return {jQuery} Returns the same jQuery object, for chaining. * @return {jQuery} Returns the same jQuery object, for chaining.
* *
* @desc Scroll to a fixed position * @desc Scroll to a fixed position
* @example $('div').scrollTo( 340 ); * @example $('div').scrollTo( 340 );
* *
* @desc Scroll relatively to the actual position * @desc Scroll relatively to the actual position
* @example $('div').scrollTo( '+=340px', { axis:'y' } ); * @example $('div').scrollTo( '+=340px', { axis:'y' } );
* *
* @dec Scroll using a selector (relative to the scrolled element) * @dec Scroll using a selector (relative to the scrolled element)
* @example $('div').scrollTo( 'p.paragraph:eq(2)', 500, { easing:'swing', queue:true, axis:'xy' } ); * @example $('div').scrollTo( 'p.paragraph:eq(2)', 500, { easing:'swing', queue:true, axis:'xy' } );
* *
* @ Scroll to a DOM element (same for jQuery object) * @ Scroll to a DOM element (same for jQuery object)
* @example var second_child = document.getElementById('container').firstChild.nextSibling; * @example var second_child = document.getElementById('container').firstChild.nextSibling;
* $('#container').scrollTo( second_child, { duration:500, axis:'x', onAfter:function(){ * $('#container').scrollTo( second_child, { duration:500, axis:'x', onAfter:function(){
* alert('scrolled!!'); * alert('scrolled!!');
* }}); * }});
* *
* @desc Scroll on both axes, to different values * @desc Scroll on both axes, to different values
* @example $('div').scrollTo( { top: 300, left:'+=200' }, { axis:'xy', offset:-20 } ); * @example $('div').scrollTo( { top: 300, left:'+=200' }, { axis:'xy', offset:-20 } );
*/ */
;(function( $ ){ ;(function( $ ){
var $scrollTo = $.scrollTo = function( target, duration, settings ){ var $scrollTo = $.scrollTo = function( target, duration, settings ){
$(window).scrollTo( target, duration, settings ); $(window).scrollTo( target, duration, settings );
}; };
$scrollTo.defaults = { $scrollTo.defaults = {
axis:'xy', axis:'xy',
duration: parseFloat($.fn.jquery) >= 1.3 ? 0 : 1 duration: parseFloat($.fn.jquery) >= 1.3 ? 0 : 1
}; };
// Returns the element that needs to be animated to scroll the window. // Returns the element that needs to be animated to scroll the window.
// Kept for backwards compatibility (specially for localScroll & serialScroll) // Kept for backwards compatibility (specially for localScroll & serialScroll)
$scrollTo.window = function( scope ){ $scrollTo.window = function( scope ){
return $(window)._scrollable(); return $(window)._scrollable();
}; };
// Hack, hack, hack :) // Hack, hack, hack :)
// Returns the real elements to scroll (supports window/iframes, documents and regular nodes) // Returns the real elements to scroll (supports window/iframes, documents and regular nodes)
$.fn._scrollable = function(){ $.fn._scrollable = function(){
return this.map(function(){ return this.map(function(){
var elem = this, var elem = this,
isWin = !elem.nodeName || $.inArray( elem.nodeName.toLowerCase(), ['iframe','#document','html','body'] ) != -1; isWin = !elem.nodeName || $.inArray( elem.nodeName.toLowerCase(), ['iframe','#document','html','body'] ) != -1;
if( !isWin ) if( !isWin )
return elem; return elem;
var doc = (elem.contentWindow || elem).document || elem.ownerDocument || elem; var doc = (elem.contentWindow || elem).document || elem.ownerDocument || elem;
return $.browser.safari || doc.compatMode == 'BackCompat' ? return $.browser.safari || doc.compatMode == 'BackCompat' ?
doc.body : doc.body :
doc.documentElement; doc.documentElement;
}); });
}; };
$.fn.scrollTo = function( target, duration, settings ){ $.fn.scrollTo = function( target, duration, settings ){
if( typeof duration == 'object' ){ if( typeof duration == 'object' ){
settings = duration; settings = duration;
duration = 0; duration = 0;
} }
if( typeof settings == 'function' ) if( typeof settings == 'function' )
settings = { onAfter:settings }; settings = { onAfter:settings };
if( target == 'max' ) if( target == 'max' )
target = 9e9; target = 9e9;
settings = $.extend( {}, $scrollTo.defaults, settings ); settings = $.extend( {}, $scrollTo.defaults, settings );
// Speed is still recognized for backwards compatibility // Speed is still recognized for backwards compatibility
duration = duration || settings.speed || settings.duration; duration = duration || settings.speed || settings.duration;
// Make sure the settings are given right // Make sure the settings are given right
settings.queue = settings.queue && settings.axis.length > 1; settings.queue = settings.queue && settings.axis.length > 1;
if( settings.queue ) if( settings.queue )
// Let's keep the overall duration // Let's keep the overall duration
duration /= 2; duration /= 2;
settings.offset = both( settings.offset ); settings.offset = both( settings.offset );
settings.over = both( settings.over ); settings.over = both( settings.over );
return this._scrollable().each(function(){ return this._scrollable().each(function(){
var elem = this, var elem = this,
$elem = $(elem), $elem = $(elem),
targ = target, toff, attr = {}, targ = target, toff, attr = {},
win = $elem.is('html,body'); win = $elem.is('html,body');
switch( typeof targ ){ switch( typeof targ ){
// A number will pass the regex // A number will pass the regex
case 'number': case 'number':
case 'string': case 'string':
if( /^([+-]=)?\d+(\.\d+)?(px|%)?$/.test(targ) ){ if( /^([+-]=)?\d+(\.\d+)?(px|%)?$/.test(targ) ){
targ = both( targ ); targ = both( targ );
// We are done // We are done
break; break;
} }
// Relative selector, no break! // Relative selector, no break!
targ = $(targ,this); targ = $(targ,this);
case 'object': case 'object':
// DOMElement / jQuery // DOMElement / jQuery
if( targ.is || targ.style ) if( targ.is || targ.style )
// Get the real position of the target // Get the real position of the target
toff = (targ = $(targ)).offset(); toff = (targ = $(targ)).offset();
} }
$.each( settings.axis.split(''), function( i, axis ){ $.each( settings.axis.split(''), function( i, axis ){
var Pos = axis == 'x' ? 'Left' : 'Top', var Pos = axis == 'x' ? 'Left' : 'Top',
pos = Pos.toLowerCase(), pos = Pos.toLowerCase(),
key = 'scroll' + Pos, key = 'scroll' + Pos,
old = elem[key], old = elem[key],
max = $scrollTo.max(elem, axis); max = $scrollTo.max(elem, axis);
if( toff ){// jQuery / DOMElement if( toff ){// jQuery / DOMElement
attr[key] = toff[pos] + ( win ? 0 : old - $elem.offset()[pos] ); attr[key] = toff[pos] + ( win ? 0 : old - $elem.offset()[pos] );
// If it's a dom element, reduce the margin // If it's a dom element, reduce the margin
if( settings.margin ){ if( settings.margin ){
attr[key] -= parseInt(targ.css('margin'+Pos)) || 0; attr[key] -= parseInt(targ.css('margin'+Pos)) || 0;
attr[key] -= parseInt(targ.css('border'+Pos+'Width')) || 0; attr[key] -= parseInt(targ.css('border'+Pos+'Width')) || 0;
} }
attr[key] += settings.offset[pos] || 0; attr[key] += settings.offset[pos] || 0;
if( settings.over[pos] ) if( settings.over[pos] )
// Scroll to a fraction of its width/height // Scroll to a fraction of its width/height
attr[key] += targ[axis=='x'?'width':'height']() * settings.over[pos]; attr[key] += targ[axis=='x'?'width':'height']() * settings.over[pos];
}else{ }else{
var val = targ[pos]; var val = targ[pos];
// Handle percentage values // Handle percentage values
attr[key] = val.slice && val.slice(-1) == '%' ? attr[key] = val.slice && val.slice(-1) == '%' ?
parseFloat(val) / 100 * max parseFloat(val) / 100 * max
: val; : val;
} }
// Number or 'number' // Number or 'number'
if( /^\d+$/.test(attr[key]) ) if( /^\d+$/.test(attr[key]) )
// Check the limits // Check the limits
attr[key] = attr[key] <= 0 ? 0 : Math.min( attr[key], max ); attr[key] = attr[key] <= 0 ? 0 : Math.min( attr[key], max );
// Queueing axes // Queueing axes
if( !i && settings.queue ){ if( !i && settings.queue ){
// Don't waste time animating, if there's no need. // Don't waste time animating, if there's no need.
if( old != attr[key] ) if( old != attr[key] )
// Intermediate animation // Intermediate animation
animate( settings.onAfterFirst ); animate( settings.onAfterFirst );
// Don't animate this axis again in the next iteration. // Don't animate this axis again in the next iteration.
delete attr[key]; delete attr[key];
} }
}); });
animate( settings.onAfter ); animate( settings.onAfter );
function animate( callback ){ function animate( callback ){
$elem.animate( attr, duration, settings.easing, callback && function(){ $elem.animate( attr, duration, settings.easing, callback && function(){
callback.call(this, target, settings); callback.call(this, target, settings);
}); });
}; };
}).end(); }).end();
}; };
// Max scrolling position, works on quirks mode // Max scrolling position, works on quirks mode
// It only fails (not too badly) on IE, quirks mode. // It only fails (not too badly) on IE, quirks mode.
$scrollTo.max = function( elem, axis ){ $scrollTo.max = function( elem, axis ){
var Dim = axis == 'x' ? 'Width' : 'Height', var Dim = axis == 'x' ? 'Width' : 'Height',
scroll = 'scroll'+Dim; scroll = 'scroll'+Dim;
if( !$(elem).is('html,body') ) if( !$(elem).is('html,body') )
return elem[scroll] - $(elem)[Dim.toLowerCase()](); return elem[scroll] - $(elem)[Dim.toLowerCase()]();
var size = 'client' + Dim, var size = 'client' + Dim,
html = elem.ownerDocument.documentElement, html = elem.ownerDocument.documentElement,
body = elem.ownerDocument.body; body = elem.ownerDocument.body;
return Math.max( html[scroll], body[scroll] ) return Math.max( html[scroll], body[scroll] )
- Math.min( html[size] , body[size] ); - Math.min( html[size] , body[size] );
}; };
function both( val ){ function both( val ){
return typeof val == 'object' ? val : { top:val, left:val }; return typeof val == 'object' ? val : { top:val, left:val };
}; };
})( jQuery ); })( jQuery );

View File

@ -1,67 +1,67 @@
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>' __copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, htmlentitydefs import re, htmlentitydefs
from future_builtins import map from future_builtins import map
_ascii_pat = None _ascii_pat = None
def clean_ascii_chars(txt, charlist=None): def clean_ascii_chars(txt, charlist=None):
r''' r'''
Remove ASCII control chars. Remove ASCII control chars.
This is all control chars except \t, \n and \r This is all control chars except \t, \n and \r
''' '''
if not txt: if not txt:
return '' return ''
global _ascii_pat global _ascii_pat
if _ascii_pat is None: if _ascii_pat is None:
chars = set(xrange(32)) chars = set(xrange(32))
chars.add(127) chars.add(127)
for x in (9, 10, 13): for x in (9, 10, 13):
chars.remove(x) chars.remove(x)
_ascii_pat = re.compile(u'|'.join(map(unichr, chars))) _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None: if charlist is None:
pat = _ascii_pat pat = _ascii_pat
else: else:
pat = re.compile(u'|'.join(map(unichr, charlist))) pat = re.compile(u'|'.join(map(unichr, charlist)))
return pat.sub('', txt) return pat.sub('', txt)
def allowed(x): def allowed(x):
x = ord(x) x = ord(x)
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff) return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
def clean_xml_chars(unicode_string): def clean_xml_chars(unicode_string):
return u''.join(filter(allowed, unicode_string)) return u''.join(filter(allowed, unicode_string))
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html # Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
# Removes HTML or XML character references and entities from a text string. # Removes HTML or XML character references and entities from a text string.
# #
# @param text The HTML (or XML) source text. # @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary. # @return The plain text, as a Unicode string, if necessary.
def unescape(text, rm=False, rchar=u''): def unescape(text, rm=False, rchar=u''):
def fixup(m, rm=rm, rchar=rchar): def fixup(m, rm=rm, rchar=rchar):
text = m.group(0) text = m.group(0)
if text[:2] == "&#": if text[:2] == "&#":
# character reference # character reference
try: try:
if text[:3] == "&#x": if text[:3] == "&#x":
return unichr(int(text[3:-1], 16)) return unichr(int(text[3:-1], 16))
else: else:
return unichr(int(text[2:-1])) return unichr(int(text[2:-1]))
except ValueError: except ValueError:
pass pass
else: else:
# named entity # named entity
try: try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError: except KeyError:
pass pass
if rm: if rm:
return rchar # replace by char return rchar # replace by char
return text # leave as is return text # leave as is
return re.sub("&#?\w+;", fixup, text) return re.sub("&#?\w+;", fixup, text)

View File

@ -1,25 +1,25 @@
Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org) Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org)
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met: are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, * Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer. this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, * Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution. and/or other materials provided with the distribution.
* Neither the name of the CherryPy Team nor the names of its contributors * Neither the name of the CherryPy Team nor the names of its contributors
may be used to endorse or promote products derived from this software may be used to endorse or promote products derived from this software
without specific prior written permission. without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,175 +1,175 @@
from lxml import etree from lxml import etree
from html5lib.treebuilders.etree import tag_regexp from html5lib.treebuilders.etree import tag_regexp
from gettext import gettext from gettext import gettext
_ = gettext _ = gettext
import _base import _base
from html5lib.constants import voidElements from html5lib.constants import voidElements
from html5lib import ihatexml from html5lib import ihatexml
class Root(object): class Root(object):
def __init__(self, et): def __init__(self, et):
self.elementtree = et self.elementtree = et
self.children = [] self.children = []
if et.docinfo.internalDTD: if et.docinfo.internalDTD:
self.children.append(Doctype(self, et.docinfo.root_name, self.children.append(Doctype(self, et.docinfo.root_name,
et.docinfo.public_id, et.docinfo.public_id,
et.docinfo.system_url)) et.docinfo.system_url))
root = et.getroot() root = et.getroot()
node = root node = root
while node.getprevious() is not None: while node.getprevious() is not None:
node = node.getprevious() node = node.getprevious()
while node is not None: while node is not None:
self.children.append(node) self.children.append(node)
node = node.getnext() node = node.getnext()
self.text = None self.text = None
self.tail = None self.tail = None
def __getitem__(self, key): def __getitem__(self, key):
return self.children[key] return self.children[key]
def getnext(self): def getnext(self):
return None return None
def __len__(self): def __len__(self):
return 1 return 1
class Doctype(object): class Doctype(object):
def __init__(self, root_node, name, public_id, system_id): def __init__(self, root_node, name, public_id, system_id):
self.root_node = root_node self.root_node = root_node
self.name = name self.name = name
self.public_id = public_id self.public_id = public_id
self.system_id = system_id self.system_id = system_id
self.text = None self.text = None
self.tail = None self.tail = None
def getnext(self): def getnext(self):
return self.root_node.children[1] return self.root_node.children[1]
class FragmentRoot(Root): class FragmentRoot(Root):
def __init__(self, children): def __init__(self, children):
self.children = [FragmentWrapper(self, child) for child in children] self.children = [FragmentWrapper(self, child) for child in children]
self.text = self.tail = None self.text = self.tail = None
def getnext(self): def getnext(self):
return None return None
class FragmentWrapper(object): class FragmentWrapper(object):
def __init__(self, fragment_root, obj): def __init__(self, fragment_root, obj):
self.root_node = fragment_root self.root_node = fragment_root
self.obj = obj self.obj = obj
if hasattr(self.obj, 'text'): if hasattr(self.obj, 'text'):
self.text = self.obj.text self.text = self.obj.text
else: else:
self.text = None self.text = None
if hasattr(self.obj, 'tail'): if hasattr(self.obj, 'tail'):
self.tail = self.obj.tail self.tail = self.obj.tail
else: else:
self.tail = None self.tail = None
self.isstring = isinstance(obj, basestring) self.isstring = isinstance(obj, basestring)
def __getattr__(self, name): def __getattr__(self, name):
return getattr(self.obj, name) return getattr(self.obj, name)
def getnext(self): def getnext(self):
siblings = self.root_node.children siblings = self.root_node.children
idx = siblings.index(self) idx = siblings.index(self)
if idx < len(siblings) - 1: if idx < len(siblings) - 1:
return siblings[idx + 1] return siblings[idx + 1]
else: else:
return None return None
def __getitem__(self, key): def __getitem__(self, key):
return self.obj[key] return self.obj[key]
def __nonzero__(self): def __nonzero__(self):
return bool(self.obj) return bool(self.obj)
def getparent(self): def getparent(self):
return None return None
def __str__(self): def __str__(self):
return str(self.obj) return str(self.obj)
def __len__(self): def __len__(self):
return len(self.obj) return len(self.obj)
class TreeWalker(_base.NonRecursiveTreeWalker): class TreeWalker(_base.NonRecursiveTreeWalker):
def __init__(self, tree): def __init__(self, tree):
if hasattr(tree, "getroot"): if hasattr(tree, "getroot"):
tree = Root(tree) tree = Root(tree)
elif isinstance(tree, list): elif isinstance(tree, list):
tree = FragmentRoot(tree) tree = FragmentRoot(tree)
_base.NonRecursiveTreeWalker.__init__(self, tree) _base.NonRecursiveTreeWalker.__init__(self, tree)
self.filter = ihatexml.InfosetFilter() self.filter = ihatexml.InfosetFilter()
def getNodeDetails(self, node): def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node if isinstance(node, tuple): # Text node
node, key = node node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
return _base.TEXT, getattr(node, key) return _base.TEXT, getattr(node, key)
elif isinstance(node, Root): elif isinstance(node, Root):
return (_base.DOCUMENT,) return (_base.DOCUMENT,)
elif isinstance(node, Doctype): elif isinstance(node, Doctype):
return _base.DOCTYPE, node.name, node.public_id, node.system_id return _base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and node.isstring: elif isinstance(node, FragmentWrapper) and node.isstring:
return _base.TEXT, node return _base.TEXT, node
elif node.tag == etree.Comment: elif node.tag == etree.Comment:
return _base.COMMENT, node.text return _base.COMMENT, node.text
else: else:
#This is assumed to be an ordinary element #This is assumed to be an ordinary element
match = tag_regexp.match(node.tag) match = tag_regexp.match(node.tag)
if match: if match:
namespace, tag = match.groups() namespace, tag = match.groups()
else: else:
namespace = None namespace = None
tag = node.tag tag = node.tag
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
[(self.filter.fromXmlName(name), value) for [(self.filter.fromXmlName(name), value) for
name,value in node.attrib.iteritems()], name,value in node.attrib.iteritems()],
len(node) > 0 or node.text) len(node) > 0 or node.text)
def getFirstChild(self, node): def getFirstChild(self, node):
assert not isinstance(node, tuple), _("Text nodes have no children") assert not isinstance(node, tuple), _("Text nodes have no children")
assert len(node) or node.text, "Node has no children" assert len(node) or node.text, "Node has no children"
if node.text: if node.text:
return (node, "text") return (node, "text")
else: else:
return node[0] return node[0]
def getNextSibling(self, node): def getNextSibling(self, node):
if isinstance(node, tuple): # Text node if isinstance(node, tuple): # Text node
node, key = node node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
if key == "text": if key == "text":
# XXX: we cannot use a "bool(node) and node[0] or None" construct here # XXX: we cannot use a "bool(node) and node[0] or None" construct here
# because node[0] might evaluate to False if it has no child element # because node[0] might evaluate to False if it has no child element
if len(node): if len(node):
return node[0] return node[0]
else: else:
return None return None
else: # tail else: # tail
return node.getnext() return node.getnext()
return node.tail and (node, "tail") or node.getnext() return node.tail and (node, "tail") or node.getnext()
def getParentNode(self, node): def getParentNode(self, node):
if isinstance(node, tuple): # Text node if isinstance(node, tuple): # Text node
node, key = node node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
if key == "text": if key == "text":
return node return node
# else: fallback to "normal" processing # else: fallback to "normal" processing
return node.getparent() return node.getparent()

View File

@ -1,81 +1,81 @@
ACKNOWLEDGMENTS ACKNOWLEDGMENTS
* RAR text compression algorithm is based on Dmitry Shkarin PPMII * RAR text compression algorithm is based on Dmitry Shkarin PPMII
and Dmitry Subbotin carryless rangecoder public domain source code. and Dmitry Subbotin carryless rangecoder public domain source code.
You may find it in ftp.elf.stuba.sk/pub/pc/pack. You may find it in ftp.elf.stuba.sk/pub/pc/pack.
* RAR encryption includes parts of code from Szymon Stefanek * RAR encryption includes parts of code from Szymon Stefanek
and Brian Gladman AES implementations also as Steve Reid SHA-1 source. and Brian Gladman AES implementations also as Steve Reid SHA-1 source.
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
Copyright (c) 2002, Dr Brian Gladman < >, Worcester, UK. Copyright (c) 2002, Dr Brian Gladman < >, Worcester, UK.
All rights reserved. All rights reserved.
LICENSE TERMS LICENSE TERMS
The free distribution and use of this software in both source and binary The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that: form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright 1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer; notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright 2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials; in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products 3. the copyright holder's name is not used to endorse products
built using this software without specific written permission. built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL), may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above. in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose. and/or fitness for purpose.
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
Source code of this package also as other cryptographic technology Source code of this package also as other cryptographic technology
and computing project related links are available on Brian Gladman's and computing project related links are available on Brian Gladman's
web site: http://www.gladman.me.uk web site: http://www.gladman.me.uk
* RAR uses CRC32 function based on Intel Slicing-by-8 algorithm. * RAR uses CRC32 function based on Intel Slicing-by-8 algorithm.
Original Intel Slicing-by-8 code is available here: Original Intel Slicing-by-8 code is available here:
http://sourceforge.net/projects/slicing-by-8/ http://sourceforge.net/projects/slicing-by-8/
Original Intel Slicing-by-8 code is licensed under BSD License Original Intel Slicing-by-8 code is licensed under BSD License
available at http://www.opensource.org/licenses/bsd-license.html available at http://www.opensource.org/licenses/bsd-license.html
Copyright (c) 2004-2006 Intel Corporation. Copyright (c) 2004-2006 Intel Corporation.
All Rights Reserved All Rights Reserved
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
are met: are met:
Redistributions of source code must retain the above copyright notice, Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer. this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with in the documentation and/or other materials provided with
the distribution. the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE. SUCH DAMAGE.
* Useful hints provided by Alexander Khoroshev and Bulat Ziganshin allowed * Useful hints provided by Alexander Khoroshev and Bulat Ziganshin allowed
to significantly improve RAR compression and speed. to significantly improve RAR compression and speed.