Merge from trunk
12
COPYRIGHT
@ -9,6 +9,12 @@ License: GPL-2 or later
|
||||
The full text of the GPL is distributed as in
|
||||
/usr/share/common-licenses/GPL-2 on Debian systems.
|
||||
|
||||
Files: setup/iso_639/*
|
||||
Copyright: Various
|
||||
License: LGPL 2.1
|
||||
The full text of the LGPL is distributed as in
|
||||
/usr/share/common-licenses/LGPL-2.1 on Debian systems.
|
||||
|
||||
Files: src/calibre/ebooks/BeautifulSoup.py
|
||||
Copyright: Copyright (c) 2004-2007, Leonard Richardson
|
||||
License: BSD
|
||||
@ -28,6 +34,12 @@ License: other
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved.
|
||||
|
||||
Files: src/calibre/ebooks/readability/*
|
||||
Copyright: Unknown
|
||||
License: Apache 2.0
|
||||
The full text of the Apache 2.0 license is available at:
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Files: /src/cherrypy/*
|
||||
Copyright: Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org)
|
||||
Copyright: Copyright (C) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
||||
|
247
Changelog.yaml
@ -19,6 +19,253 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.18
|
||||
date: 2011-09-09
|
||||
|
||||
new features:
|
||||
- title: "Kindle news download: On Kindle 3 and newer have the View Articles and Sections menu remember the current article."
|
||||
tickets: [748741]
|
||||
|
||||
- title: "Conversion: Add option to unsmarten puctuation under Look & Feel"
|
||||
|
||||
- title: "Driver of Motorola Ex124G and Pandigital Nova Tablet"
|
||||
|
||||
- title: "Allow downloading metadata from amazon.co.jp. To use it, configure the amazon metadata source to use the Japanese amazon site."
|
||||
tickets: [842447]
|
||||
|
||||
- title: "When automatically generating author sort for author name, ignore common prefixes like Mr. Dr. etc. Controllable via tweak. Also add a tweak to allow control of how a string is split up into multiple authors."
|
||||
tickets: [795984]
|
||||
|
||||
- title: "TXT Output: Preserve as much formatting as possible when generating Markdown output including various CSS styles"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix pubdate incorrect when used in save to disk template in timezones ahead of GMT."
|
||||
tickets: [844445]
|
||||
|
||||
- title: "When attempting to stop multiple device jobs at once, only show a single error message"
|
||||
tickets: [841588]
|
||||
|
||||
- title: "Fix conversion of large EPUB files to PDF erroring out on systems with a limited number of available file handles"
|
||||
tickets: [816616]
|
||||
|
||||
- title: "EPUB catalog generation: Fix some entries going off the left edge of the page for unread/wishlist items"
|
||||
|
||||
- title: "When setting language in an EPUB file always use the 2 letter language code in preference to the three letter code, when possible."
|
||||
tickets: [841201]
|
||||
|
||||
- title: "Content server: Fix --url-prefix not used for links in the book details view."
|
||||
|
||||
- title: "MOBI Input: When links in a MOBI file point to just before block elements, and there is a page break on the block element, the links can end up pointing to the wrong place on conversion. Adjust the location in such cases to point to the block element directly."
|
||||
|
||||
improved recipes:
|
||||
- Kopalnia Wiedzy
|
||||
- FilmWeb.pl
|
||||
- Philadelphia Inquirer
|
||||
- Honolulu Star Advertiser
|
||||
- Counterpunch
|
||||
- Philadelphia Inquirer
|
||||
|
||||
new recipes:
|
||||
- title: Various Polish news sources
|
||||
author: fenuks
|
||||
|
||||
- version: 0.8.17
|
||||
date: 2011-09-02
|
||||
|
||||
new features:
|
||||
- title: "Basic support for Amazon AZW4 format (PDF wrapped inside a MOBI)"
|
||||
|
||||
- title: "When showing the cover browser in a separate window, allow the use of the V, D shortcut keys to view the current book and send it to device respectively."
|
||||
tickets: [836402]
|
||||
|
||||
- title: "Add an option in Preferences->Miscellaneous to abort conversion jobs that take too long."
|
||||
tickets: [835233]
|
||||
|
||||
- title: "Driver for HTC Evo and HP TouchPad (with kindle app)"
|
||||
|
||||
- title: "Preferences->Adding books, detect when the user specifies a test expression with no file extension and popup a warning"
|
||||
|
||||
bug fixes:
|
||||
- title: "E-book viewer: Ensure toolbars are always visible"
|
||||
|
||||
- title: "Content server: Fix grouping of Tags/authors not working for some non english languages with Internet Explorer"
|
||||
tickets: [835238]
|
||||
|
||||
- title: "When downloading metadata from amazon, fix italics inside brackets getting lost."
|
||||
tickets: [836857]
|
||||
|
||||
- title: "Get Books: Add EscapeMagazine.pl and RW2010.pl stores"
|
||||
|
||||
- title: "Conversion pipeline: Fix conversion of cm/mm to pts. Fixes use of cm as a length unit when converting to MOBI."
|
||||
|
||||
- title: "When showing the cover browser in a separate window, focus the cover browser so that keyboard shortcuts work immediately."
|
||||
tickets: [835933]
|
||||
|
||||
- title: "HTMLZ Output: Fix special chars like ampersands, etc. not being converted to entities"
|
||||
|
||||
- title: "Keyboard shortcuts config: Fix clicking done in the shortcut editor with shortcuts set to default caused the displayed shortcut to be always set to None"
|
||||
|
||||
- title: "Fix bottom most entries in keyboard shortcuts not editable"
|
||||
|
||||
improved recipes:
|
||||
- Hacker News
|
||||
- Nikkei News
|
||||
|
||||
new recipes:
|
||||
- title: "Haber 7 and Hira"
|
||||
authors: thomass
|
||||
|
||||
- title: "NTV and NTVSpor by A Erdogan"
|
||||
author: A Erdogan
|
||||
|
||||
|
||||
- version: 0.8.16
|
||||
date: 2011-08-26
|
||||
|
||||
new features:
|
||||
- title: "News download: Add algorithms to automatically clean up downloaded HTML"
|
||||
description: "Use the algorithms from the Readability project to automatically cleanup downloaded HTML. You can turn this on in your own recipes by adding auto_cleanup=True to the recipe. It is turned on by default for basic recipes created via the GUI. This makes it a little easier to develop recipes for beginners."
|
||||
type: major
|
||||
|
||||
- title: "Add an option to Preferences->Look and Feel->Cover Browser to show the cover browser full screen. When showing the cover browser in a separate window, you can make it fullscreen by pressing the F11 key."
|
||||
tickets: [829855 ]
|
||||
|
||||
- title: "Show the languages currently used at the top of the drop down list of languages"
|
||||
|
||||
- title: "When automatically computing author sort from author's name, if the name contains certain words like Inc., Company, Team, etc. use the author name as the sort string directly. The list of such words can be controlled via Preferences->Tweaks."
|
||||
tickets: [797895]
|
||||
|
||||
- title: "Add a search for individual tweaks to Preferences->Tweaks"
|
||||
|
||||
- title: "Drivers for a few new android phones"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix line unwrapping algorithms to account for some central European characters as well."
|
||||
tickets: [822744]
|
||||
|
||||
- title: "Switch to using more modern language names/translations from the iso-codes package"
|
||||
|
||||
- title: "Allow cases insensitive entering of language names for convenience."
|
||||
tickets: [832761]
|
||||
|
||||
- title: "When adding a text indent to paragraphs as part of the remove spacing between paragraphs transformation, do not add an indent to paragraphs that are directly centered or right aligned."
|
||||
tickets: [830439]
|
||||
|
||||
- title: "Conversion pipeline: More robust handling of case insensitive tag and class css selectors"
|
||||
|
||||
- title: "MOBI Output: Add support for the start attribute on <ol> tags"
|
||||
|
||||
- title: "When adding books that have no language specified, do not automatically set the language to calibre's interface language."
|
||||
tickets: [830092]
|
||||
|
||||
- title: "Fix use of tag browser to search for languages when calibre is translated to a non English language"
|
||||
tickets: [830078]
|
||||
|
||||
- title: "When downloading news, set the language field correctly"
|
||||
|
||||
- title: "Fix languages field in the Edit metadata dialog too wide"
|
||||
tickets: [829912]
|
||||
|
||||
- title: "Fix setting of languages that have commas in their names broken"
|
||||
|
||||
- title: "FB2 Input: When convert FB2 files, read the cover from the FB2 file correctly."
|
||||
tickets: [829240]
|
||||
|
||||
improved recipes:
|
||||
- Politifact
|
||||
- Reuters
|
||||
- Sueddeutsche
|
||||
- CNN
|
||||
- Financial Times UK
|
||||
- MSDN Magazine
|
||||
- Houston Chronicle
|
||||
- Harvard Business Review
|
||||
|
||||
new recipes:
|
||||
- title: CBN News and Fairbanks Daily
|
||||
author: by Roger
|
||||
|
||||
- title: Hacker News
|
||||
author: Tom Scholl
|
||||
|
||||
- title: Various Turkish news sources
|
||||
author: thomass
|
||||
|
||||
- title: Cvece Zla
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Various Polish news sources
|
||||
author: fenuks
|
||||
|
||||
- title: Fluter
|
||||
author: Armin Geller
|
||||
|
||||
- title: Brasil de Fato
|
||||
author: Alex Mitrani
|
||||
|
||||
- version: 0.8.15
|
||||
date: 2011-08-19
|
||||
|
||||
new features:
|
||||
- title: "Add a 'languages' metadata field."
|
||||
type: major
|
||||
description: "This is useful if you have a multi-lingual book collection. You can now set one or more languages per book via the Edit Metadata dialog. If you want the languages
|
||||
column to be visible, then go to Preferences->Add your own columns and unhide the languages columns. You can also bulk set the languages on multiple books via the bulk edit metadata dialog. You can also have the languages show up in the book details panel on the right by going to Preferences->Look and Feel->Book details"
|
||||
|
||||
- title: "Get Books: Add XinXii store."
|
||||
|
||||
- title: "Metadata download plugin for ozon.ru, enabled only when user selects russian as their language in the welcome wizard."
|
||||
|
||||
- title: "Bambook driver: Allow direct transfer of PDF files to Bambook devices"
|
||||
|
||||
- title: "Driver for Coby MID7015A and Asus EEE Note"
|
||||
|
||||
- title: "Edit metadata dialog: The keyboard shortcut Ctrl+D can now be used to trigger a metadata download. Also show the row number of the book being edited in the titlebar"
|
||||
|
||||
- title: "Add an option to not preserve the date when using the 'Copy to Library' function (found in Preferences->Adding books)"
|
||||
|
||||
bug fixes:
|
||||
- title: "Linux binary: Use readlink -f rather than readlink -e in the launcher scripts so that they work with recent releases of busybox"
|
||||
|
||||
- title: "When bulk downloading metadata for more than 100 books at a time, automatically split up the download into batches of 100."
|
||||
tickets: [828373]
|
||||
|
||||
- title: "When deleting books from the Kindle also delete 'sidecar' .apnx and .ph1 files as the kindle does not clean them up automatically"
|
||||
tickets: [827684]
|
||||
|
||||
- title: "Fix a subtle bug in the device drivers that caused calibre to lose track of some books on the device if you used author_sort in the send to device template and your books have author sort values that differ only in case."
|
||||
tickets: [825706]
|
||||
|
||||
- title: "Fix scene break character pattern not saved in conversion preferences"
|
||||
tickets: [826038]
|
||||
|
||||
- title: "Keyboard shortcuts: Fix a bug triggered by some third party plugins that made the keyboard preferences unusable in OS X."
|
||||
tickets: [826325]
|
||||
|
||||
- title: "Search box: Fix completion no longer working after using Tag Browser to do a search. Also ensure that completer popup is always hidden when a search is performed."
|
||||
|
||||
- title: "Fix pressing Enter in the search box causes the same search to be executed twice in the plugins and keyboard shortcuts preferences panels"
|
||||
|
||||
- title: "Catalog generation: Fix error creating epub/mobi catalogs on non UTF-8 windows systems when the metadata contained non ASCII characters"
|
||||
|
||||
improved recipes:
|
||||
- Financial Times UK
|
||||
- La Tercera
|
||||
- Folha de Sao Paolo
|
||||
- Metro niews NL
|
||||
- La Nacion
|
||||
- Juventud Rebelde
|
||||
- Rzeczpospolita Online
|
||||
- Newsweek Polska
|
||||
- CNET news
|
||||
|
||||
new recipes:
|
||||
- title: El Mostrador and The Clinic
|
||||
author: Alex Mitrani
|
||||
|
||||
- title: Patente de Corso
|
||||
author: Oscar Megia Lopez
|
||||
|
||||
- version: 0.8.14
|
||||
date: 2011-08-12
|
||||
|
||||
|
98
imgsrc/languages.svg
Normal file
@ -0,0 +1,98 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="128" width="128" version="1.0" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 256 256">
|
||||
<defs>
|
||||
<linearGradient id="b" y2="158.07" gradientUnits="userSpaceOnUse" x2="141.27" gradientTransform="matrix(1.68, 0, 0, 1.68, -86.7, -86.7)" y1="70.428" x1="141.27">
|
||||
<stop stop-color="#FFF" offset="0"/>
|
||||
<stop stop-color="#00a200" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="a" y2="158.07" gradientUnits="userSpaceOnUse" y1="70.428" gradientTransform="matrix(1.68, 0, 0, 1.68, -86.7, -86.7)" x2="141.27" x1="141.27">
|
||||
<stop stop-color="#FFF" offset="0"/>
|
||||
<stop stop-color="#00a100" offset="0.5"/>
|
||||
<stop stop-color="#000" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="c" y2="397.34" gradientUnits="userSpaceOnUse" x2="12.991" gradientTransform="matrix(2.573, 0, 0, -2.573, 207.924, 1307.73)" y1="397.34" x1="-117">
|
||||
<stop stop-color="#0053BD" offset="0"/>
|
||||
<stop stop-color="#0032A4" offset="1"/>
|
||||
</linearGradient>
|
||||
<radialGradient id="d" gradientUnits="userSpaceOnUse" cy="439.63" cx="-57.022" gradientTransform="matrix(2.573, 0, 0, -2.573, 207.924, 1307.73)" r="98">
|
||||
<stop stop-color="#FFF" offset="0"/>
|
||||
<stop stop-color="#57ADFF" offset="0.6"/>
|
||||
<stop stop-color="#C9E6FF" offset="1"/>
|
||||
</radialGradient>
|
||||
<linearGradient id="e" y2="183.37" gradientUnits="userSpaceOnUse" x2="127.66" gradientTransform="matrix(2.573, 0, 0, 2.573, -251.365, -39.26)" y1="63.215" x1="127.66">
|
||||
<stop stop-color="#006a00" offset="0"/>
|
||||
<stop stop-color="#004000" offset="0.2"/>
|
||||
<stop stop-color="#00d000" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="f" y2="361.42" gradientUnits="userSpaceOnUse" x2="-52.251" gradientTransform="matrix(2.573, 0, 0, -2.573, 207.924, 1307.73)" y1="457.03" x1="-52.251">
|
||||
<stop stop-color="#FFF" offset="0"/>
|
||||
<stop stop-color="#94CAFF" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="g" y2="158.07" xlink:href="#a" gradientUnits="userSpaceOnUse" x2="141.27" gradientTransform="matrix(2.573, 0, 0, 2.573, -251.365, -39.26)" y1="70.428" x1="141.27"/>
|
||||
<linearGradient id="h" y2="130.03" xlink:href="#a" gradientUnits="userSpaceOnUse" x2="100.51" gradientTransform="matrix(2.573, 0, 0, 2.573, -251.365, -39.26)" y1="70.033" x1="100.51"/>
|
||||
<linearGradient id="i" y2="85.32" xlink:href="#b" gradientUnits="userSpaceOnUse" x2="120.48" gradientTransform="matrix(2.573, 0, 0, 2.573, -251.365, -39.26)" y1="68.117" x1="120.48"/>
|
||||
<linearGradient id="j" y2="79.161" xlink:href="#b" gradientUnits="userSpaceOnUse" x2="124.57" y1="73.444" x1="124.57"/>
|
||||
<linearGradient id="k" y2="73.865" xlink:href="#b" gradientUnits="userSpaceOnUse" x2="132.78" y1="67.756" x1="132.78"/>
|
||||
<linearGradient id="l" y2="323.36" gradientUnits="userSpaceOnUse" x2="258.77" gradientTransform="translate(5.58, -12.8322)" y1="408.7" x1="258.77">
|
||||
<stop stop-color="#3434ff" offset="0"/>
|
||||
<stop stop-color="#b9b9b9" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="m" y2="85.792" gradientUnits="userSpaceOnUse" x2="-60.735" gradientTransform="translate(2.16, -1.33)" y1="171.13" x1="-60.735">
|
||||
<stop stop-color="#ffff01" offset="0"/>
|
||||
<stop stop-color="#b9b9b9" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="n" y2="298.71" gradientUnits="userSpaceOnUse" x2="-105.42" y1="384.04" x1="-105.42">
|
||||
<stop stop-color="red" offset="0"/>
|
||||
<stop stop-color="#b9b9b9" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="o" y2="408.7" gradientUnits="userSpaceOnUse" x2="32.595" gradientTransform="translate(-3.45, -0.43)" y1="494.61" x1="32.595">
|
||||
<stop stop-color="lime" offset="0"/>
|
||||
<stop stop-color="#b9b9b9" offset="1"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="p" y2="99.849" gradientUnits="userSpaceOnUse" x2="230.67" gradientTransform="translate(1.59, 1.61)" y1="171.13" x1="230.67">
|
||||
<stop stop-color="#F0F" offset="0"/>
|
||||
<stop stop-color="#b9b9b9" offset="1"/>
|
||||
</linearGradient>
|
||||
</defs>
|
||||
<g transform="translate(-3.417, 1.068)">
|
||||
<g transform="matrix(0.6, 0, 0, 0.6, 83.43, -47.62)">
|
||||
<path fill-opacity="0.3" d="M-39.634,171.47c-31.743,31.66-49.227,73.53-49.227,117.89,0,92.35,75.02,167.48,167.23,167.48,92.218,0,167.25-75.13,167.25-167.48,0-92.06-75.03-166.96-167.25-166.96-44.38,0.01-86.288,17.43-118.01,49.07z" fill="#000"/>
|
||||
<path d="M-43.9,167.2c-31.744,31.66-49.228,73.53-49.228,117.89,0,92.35,75.02,167.48,167.23,167.48,92.225,0,167.24-75.13,167.24-167.48,0-92.06-75.02-166.96-167.24-166.96-44.38,0.01-86.287,17.43-118,49.07z" fill="url(#c)"/>
|
||||
<path d="M-39.03,172.09c-30.439,30.35-47.207,70.49-47.207,113,0,88.55,71.929,160.59,160.34,160.59,88.42,0,160.35-72.04,160.35-160.59,0-88.25-71.93-160.06-160.35-160.06-42.533,0.01-82.714,16.72-113.13,47.06z" fill="#b0d9ff"/>
|
||||
<path d="M74.105,440.51c85.675,0,155.18-69.8,155.18-155.42,0-85.08-69.51-154.88-155.18-154.88-85.068,0-155.16,69.8-155.16,154.88,0,85.62,70.098,155.42,155.16,155.42z" fill="#FFF"/>
|
||||
<path d="M74.105,440.51c85.675,0,155.18-69.8,155.18-155.42,0-85.08-69.51-154.88-155.18-154.88-85.068,0-155.16,69.8-155.16,154.88,0,85.62,70.098,155.42,155.16,155.42z" fill="url(#d)"/>
|
||||
<path d="M22.564,147.28c-0.767,0-1.608,0.31-2.467,0.8,2.019-0.59,2.969-0.8,2.467-0.8m49.961,9.74l0.502-7.31-7.712,0.49,1.019,6.82h6.191m-87.044,126.31c-1.554-1.46-0.512-7.81-0.512-7.81s-23.183-12.18-48.417-19.5c-3.111-0.9-1.541-7.31,1.539-9.74l-1.022-6.84c-0.512-3.41,5.157-19.99,10.82-21.44,5.669-1.47-0.517,9.74-0.517,9.74l-5.661,3.41s6.693,7.8,8.244,7.8c1.542,0,4.117-3.9,4.117-3.9l-7.207-4.87,6.695-2.93,0.422-2.57,1.125-0.36,11.087-16.79c7.662-3.14,17.08-7.02,18.276-7.59,2.064-0.97,16.478-9.26,19.056-11.2,2.5833-1.97,8.2368-1.47,10.292-1.47,2.072,0,5.157-0.98,5.669-6.35,0.514-5.36,2.578-6.34,4.127-4.87,1.549,1.45-1.549,3.9,2.061,4.87,3.602,0.98,6.69,3.42,9.273,0.98,1.881-1.78-0.638-3.82-2.336-5.35h28.599l3.096-8.8-6.695-0.97-24.735-2.43v-2.93l-1.971,0.33c2.679-14.71,18.58-12.29,6.608-20.8-0.729-0.53-11.203,16.5-13.869,16.17-4.83-0.64-11.067-0.69-12.395,0.88-1.7595,2.08,3.95-7.13,8.862-9.92-7.845,2.31-32.626,10.79-58.82,38.7-25.046,26.67-35.032,62.62-35.032,63.88,0,2.43,5.156,3.41,5.668,6.34,0.518,2.91-9.785,12.68-9.785,17.55,0,2.25-2.426,32.3,6.17,57.06,10.051,28.89,31.505,53.03,34.001,54.62l5.146-2.44s-11.847-20.97-12.359-22.93c-0.509-1.94,13.391-30.23,20.093-29.24,6.693,0.95,5.152,2.92,9.271,0.48,4.122-2.43,6.6951-22.44,11.332-24.38,4.6392-1.96,9.7853-4.39,9.2733-9.27-0.5378-4.89-18.047-13.66-19.589-15.12m111.77-143.37l-13.905-4.88,2.578,7.81,11.327-2.93m-52.022,18.04c1.552,0,32.449-20.47,29.359-20.96-3.08-0.49-3.598,0-11.854-0.98-8.229-0.98-16.993,11.7-19.049,13.66-2.061,1.95-1.366,8.28,1.544,8.28m141.8,147.04l3.99-5.18-3.99-1.4-2.99,3.76-3.48,5.16,2.99,1.41,3.48-3.75m17.45,16.93l-1-7.53h-6.48l-0.5,5.64-5.99-0.93-1.48-6.12-2.99-1.88-3.5,4.23-3.48-0.94-1,3.29,3.99,0.95v30.54l14.11,3.4c-0.33,0.56-0.57,1.03-0.65,1.29-1.01,3.29,3.98,4.72,7.46,3.29,1.3-0.51,5.4-4.66,7.98-10.8,3.59-8.54,6.73-19.82,7.36-23.8l1.61-3.45-9.97,3.75-5.49-0.93h0.02m20.94-56.88l-2.61-5.04c-3.08-17.39-10.15-39.64-25.3-62.64-22.8-34.57-86.26-54.51-86.26-54.51l-3.48,4.22-2-3.28-4.99-1.89v4.24l4.5,3.76-2.99,1.41-11.476,0.93-25.918,14.11,2.488,11.28-2.997,0.94-1.483,2.35,8.474,12.68,0.496,4.24-6.978,1.41v8.45l-3.986,0.94,0.5,6.58-33.906,23.52,0.999,13.14c2.488,3.29,21.935,23.04,21.935,23.04s22.429,0.92,27.413-1.88c4.987-2.82,1.493,2.82,2.995,4.23,1.488,1.42,1.989,11.28,3.482,12.22,1.495,0.93,0,6.57,1.994,8.46,1.991,1.87,1.991,24.44,1.991,24.44s11.967,20.2,11.967,25.37c0,5.18-0.504,4.7,8.97,4.24,9.48-0.47,11.47-4.24,13.45-5.64,2.01-1.41,2.01-4.7,4-7.52,2-2.83,5.48-13.63,9.98-17.39,4.48-3.77,16.45-6.59,17.43-13.16,1-6.58,5.49-11.75,5.49-11.75l21.55-22.8-0.6,3.06-0.5,11.74,6.48-2.34-0.49-12.7-2.33-2.46,0.33-0.36s-1.49-2.82-3.48-2.82-13.97,2.82-15.95,2.35c-2-0.47-10.48-23.03-11.97-23.96-1.5-0.94-10.97-16.46-10.97-16.46s21.93,26.32,25.43,36.66c2.02,6.02,9.63,0.41,15.82-5.87l1.64,4,3.98-0.95-0.5-4.7h4.48v7.05l-1.49,3.76-0.51,6.1,3.99,3.77,2-3.28,6.47-6.12,7.48-3.76,2,3.76,1,5.18-2,5.63-3.99,3.29-1.99,8.46v4.23l-4.48-2.82-0.49-8.93-6.49,0.48-2.99,7.98,4.49,6.59,10.46,1.41,8.48-8,1-15.49,3.77-4.98c2.45,6.31,4.21,12.92,4.21,19.08,0,6.74,3.08-4.68,0.46-25.26l1.52-2.02m-125.63-47.94l-26.917-0.95,11.46-9.39h5.983l9.474,6.57v3.77m32.913-3.29v4.23h-11.46l0.99,2.83-6.98,0.95-0.49,2.34-4.99-0.93-8.98-1.89,1.5-2.34,1.5-2.84,4.98-5.16,2,3.76,7.48-0.48,3.98-4.23,15.46,2.82-4.99,0.94m0.98-6.11l-5.97,0.94-1-4.24,7.47-0.93,1-4.23,5.5,5.65-7,2.8v0.01m28.93,146.16l-3.5,2.82,0.5,7.06h4.49v-6.1l3.99-5.18v-10.81l-2.5-0.48-2.98,12.69m-33.4-14.08s-3.49,0.91,0.49,2.33,19.94-23.01,19.94-23.01l-13.46,8.45-6.98,12.23h0.01m-27.14,90.07l-2.978-2.84-5.985-0.95-0.991,2.84-7.976-0.94-0.499-3.78h-5.98l-6.476,3.78h-11.458l-0.996-2.84-18.433-1.9-2.995,2.84-7.462-1.88-1.001-6.63-3.487-0.49-3.988,7.12-13.452-0.47c2.4088,1.13,22.491,13.12,53.301,15.61,40.856,3.31,60.296-6.62,60.296-6.62l-1.5-1.43-17.94-1.41v-0.01z" fill="url(#e)"/>
|
||||
<path d="M73.583,254c53.147,0,99.387-18.31,123.18-45.31-23.96-45.72-70.24-76.92-123.41-76.92-52.699,0-99.045,31.1-123.15,76.69,23.726,27.12,70.075,45.54,123.38,45.54z" fill="url(#f)"/>
|
||||
<path d="M141.67,229.84s3.61,4.33,8.13,10.17c19.53-7.86,35.75-18.61,46.96-31.32-7.94-15.15-18.35-28.7-30.64-40.06-25.84-16.6-54.87-25.73-54.87-25.73l-3.48,4.22-2-3.28-4.99-1.89v4.24l4.5,3.76-2.99,1.41-11.476,0.93-25.918,14.11,2.488,11.28-2.997,0.94-1.483,2.35,8.474,12.68,0.496,4.24-6.978,1.41v8.45l-3.986,0.94,0.5,6.58-33.906,23.52,0.803,10.55c14.209,3,29.423,4.66,45.276,4.66,27.537,0,53.187-4.94,74.847-13.44-3.12-4.76-6.76-10.72-6.76-10.72zm-41.883-12.69l-26.917-0.95,11.46-9.39h5.983l9.474,6.57v3.77zm32.913-3.29v4.23h-11.46l0.99,2.83-6.98,0.95-0.49,2.34-4.99-0.93-8.98-1.89,1.5-2.34,1.5-2.84,4.98-5.16,2,3.76,7.48-0.48,3.98-4.23,15.46,2.82-4.99,0.94zm0.98-6.12v0.01l-5.97,0.94-1-4.24,7.47-0.93,1-4.23,5.5,5.65-7,2.8z" fill="url(#g)"/>
|
||||
<path d="M-15.54,199.94c2.063-0.97,16.478-9.26,19.056-11.2,2.5833-1.97,8.2361-1.47,10.292-1.47,2.071,0,5.156-0.98,5.669-6.35,0.514-5.36,2.578-6.34,4.127-4.87,1.549,1.45-1.549,3.9,2.061,4.87,3.602,0.98,6.69,3.42,9.273,0.98,1.881-1.78-0.638-3.82-2.336-5.35h28.599l3.095-8.8-6.695-0.97-24.734-2.43v-2.93l-1.971,0.33c2.678-14.71,18.58-12.29,6.607-20.8-0.728-0.53-11.203,16.5-13.868,16.17-4.83-0.64-11.067-0.69-12.395,0.88-1.7598,2.08,3.95-7.13,8.862-9.92-6.106,1.8-22.478,7.38-41.781,22.9-11.17,10.8-20.701,23.45-28.121,37.48,2.789,3.19,5.918,6.25,9.309,9.18l6.674-10.11c7.663-3.13,17.083-7.02,18.277-7.59z" fill="url(#h)"/>
|
||||
<path d="M43.679,149.72c-2.059,1.95-1.364,8.28,1.546,8.28,1.552,0,32.449-20.47,29.359-20.96-3.08-0.49-3.598,0-11.854-0.98-8.232-0.98-16.996,11.7-19.051,13.66z" fill="url(#i)"/>
|
||||
<polygon points="126.07,73.444,123.08,73.631,123.47,76.284,125.88,76.284,125.88,76.285,126.07,73.444" transform="matrix(2.573, 0, 0, 2.573, -251.365, -39.26)" fill="url(#j)"/>
|
||||
<polygon points="135.49,69.653,130.08,67.756,131.08,70.792,135.49,69.653" transform="matrix(2.573, 0, 0, 2.573, -251.365, -39.26)" fill="url(#k)"/>
|
||||
</g>
|
||||
<g transform="matrix(0.6, 0, 0, 0.6, 83.43, -47.62)">
|
||||
<path d="M247.4,375.48l15.77,2.64c-2.02,5.79-5.23,10.19-9.6,13.21-4.38,3.02-9.85,4.54-16.42,4.54-10.4,0-18.1-3.4-23.09-10.2-3.95-5.44-5.92-12.32-5.92-20.61,0-9.92,2.59-17.68,7.78-23.3,5.18-5.61,11.73-8.42,19.65-8.42,8.9,0,15.92,2.94,21.07,8.82,5.14,5.88,7.6,14.88,7.38,27.01h-39.66c0.12,4.69,1.39,8.34,3.83,10.95,2.45,2.61,5.49,3.92,9.13,3.92,2.48,0,4.56-0.68,6.25-2.03s2.97-3.53,3.83-6.53zm0.9-16c-0.11-4.58-1.29-8.07-3.55-10.45-2.25-2.38-4.99-3.58-8.22-3.58-3.45,0-6.31,1.26-8.56,3.78-2.25,2.51-3.36,5.93-3.32,10.25h23.65zm-20.22-32.11l7.78-16.84h17.74l-15.49,16.84h-10.03z" stroke="#000064" stroke-width="10" fill="none"/>
|
||||
<path fill-opacity="0.3" d="M236.03,308.41l-1.72,3.71-7.78,16.85-3.06,6.62c-2.91,1.64-5.67,3.57-8,6.1-6.46,6.99-9.47,16.59-9.47,27.62,0,9.31,2.28,17.68,7.12,24.38l0.04,0.03c6.25,8.5,16.45,12.81,28.25,12.81,7.55,0,14.45-1.78,20.06-5.65,5.56-3.85,9.61-9.57,12-16.38l2.53-7.16-1.38-0.25,0.07-3.53c0.24-13.05-2.36-23.79-8.97-31.34-4.14-4.73-9.57-7.82-15.66-9.5l12.5-13.6,9.88-10.71h-36.41zm-2.75,36.59c-2.2,1.01-4.2,2.42-5.81,4.22-3.48,3.88-5.02,9.15-4.97,14.59l0.03,3.22h-0.47l0.16,6.56c0.14,5.78,1.85,11.19,5.56,15.16,1.5,1.6,3.33,2.86,5.28,3.84-4.13-1.16-7.22-3.21-9.59-6.43-3.04-4.2-4.66-9.56-4.66-16.85,0-8.8,2.16-14.7,6.07-18.93,2.5-2.72,5.24-4.41,8.4-5.38zm15.94,0.59c0.6,0.25,1.1,0.64,1.66,0.94-0.53-0.36-1.1-0.63-1.66-0.94zm1.72,0.97c1.9,1.05,3.63,2.35,5.15,4.1,2.93,3.34,4.95,8.71,5.57,16.37h-2.6l-0.09-3.44c-0.14-5.6-1.66-10.85-5.28-14.68-0.83-0.88-1.77-1.66-2.75-2.35zm-10.13,9.56c1.43,0.01,2.18,0.34,3.13,1.22h-6.44c0.91-0.86,1.7-1.21,3.31-1.22zm-3.78,23.72h7.75c-0.38,0.77-0.75,1.3-0.94,1.44-0.49,0.4-0.93,0.63-2.25,0.63-2.11,0-3.11-0.43-4.47-1.88-0.04-0.04-0.05-0.14-0.09-0.19zm18.53,7.07l1.97,0.31c-1,1.18-2.09,2.26-3.34,3.12-1.14,0.79-2.44,1.38-3.85,1.91,0.51-0.31,1.04-0.6,1.5-0.97,1.54-1.23,2.75-2.72,3.72-4.37z" fill="#000"/>
|
||||
<path d="M247.4,375.48l15.77,2.64c-2.02,5.79-5.23,10.19-9.6,13.21-4.38,3.02-9.85,4.54-16.42,4.54-10.4,0-18.1-3.4-23.09-10.2-3.95-5.44-5.92-12.32-5.92-20.61,0-9.92,2.59-17.68,7.78-23.3,5.18-5.61,11.73-8.42,19.65-8.42,8.9,0,15.92,2.94,21.07,8.82,5.14,5.88,7.6,14.88,7.38,27.01h-39.66c0.12,4.69,1.39,8.34,3.83,10.95,2.45,2.61,5.49,3.92,9.13,3.92,2.48,0,4.56-0.68,6.25-2.03s2.97-3.53,3.83-6.53zm0.9-16c-0.11-4.58-1.29-8.07-3.55-10.45-2.25-2.38-4.99-3.58-8.22-3.58-3.45,0-6.31,1.26-8.56,3.78-2.25,2.51-3.36,5.93-3.32,10.25h23.65zm-20.22-32.11l7.78-16.84h17.74l-15.49,16.84h-10.03z" fill="url(#l)"/>
|
||||
</g>
|
||||
<g transform="matrix(0.6, 0, 0, 0.6, 83.43, -47.62)">
|
||||
<path fill-opacity="0.3" d="M25.844,80.688c-3.427,0.201-6.099,1.831-7.969,3.968-0.97,1.099-1.8,2.437-2.344,4-2.276,0.558-4.235,1.611-5.656,3.032l-0.0312-0.032c-0.1083,0.101-0.2094,0.206-0.3126,0.313-0.0412,0.042-0.0845,0.081-0.125,0.125l-0.1562,0.156-0.125,0.156c-1.8144,2.203-3.1928,5.222-2.5625,9.034,0.4099,2.47,1.6306,4.23,2.9063,5.62-0.4704-0.02-0.9399-0.09-1.4063-0.09h-14.75c-0.0153-1.66-0.0326-3.43-0.0937-6.13v-2.871c0.0081-0.22,0.0081-0.218,0-0.438v-0.219c-0.2142-2.996-1.4385-6.385-4.5628-8.781-2.742-2.103-5.854-2.492-8.312-2.406v-0.063h-0.313c-2.23,0.081-5.351,0.632-8,2.782-3.122,2.534-4.187,5.874-4.187,9.125-0.004,0.146-0.004,0.135,0,0.281v0.188c0.148,2.732,0.27,5.532,0.344,8.342,0.001,0.07-0.002,0.12,0,0.19h-17.563c-2.952,0-6.189,0.93-8.719,3.78-2.285,2.57-2.885,5.66-2.968,7.91v0.03c-0.077,2.15,0.238,5.34,2.343,8.12,2.478,3.27,5.856,4.32,9,4.31h15.875c-1.561,6.33-4.261,11.82-8.281,16.69-3.494,4.25-8.049,8.02-13.844,11.28-2.61,1.47-5.285,3.91-6.437,7.66-0.904,2.94-0.651,5.87,0.594,8.59,0.075,0.17,0.135,0.34,0.218,0.5,0.133,0.32,0.25,0.63,0.25,0.63l0.063,0.09c0.007,0.01,0.024,0.02,0.031,0.03l0.031,0.07c-0.541-0.91-0.521-0.79,0.157,0.28l3.093,4.87,1.563-0.78c0.986,0.54,1.933,1.15,3.062,1.41,3.446,0.79,6.763,0.03,9.406-1.38l0.219-0.12,0.125-0.1c0.021-0.01,0.042-0.02,0.063-0.03,0.111-0.07,0.235-0.14,0.343-0.22v-0.03c10.4-6.22,18.697-14.33,24.75-24.03h0.376l1.812-3.34c3.595-6.66,6.063-14.07,7.6875-22.07h9.5625c-0.44,5.74-0.9,11.66-1.1562,14.1v0.09l-0.03,0.22c-0.5064,5.19-1.1158,9.15-1.5625,10.94-0.2162,0.78-0.4454,1.14-0.625,1.47h-6.4688c-2.9742,0-6.5122,0.82-9.3122,3.62-2.488,2.49-3.5,5.52-3.5,8.63-0.011,0.25-0.011,0.24,0,0.5v0.25c0.061,0.73,0.386,1.33,0.562,2l-1.344,1.34,4.532,4.53c1.718,1.72,1.831,1.74,0.187,0.16-0.202-0.2-0.188-0.18,0.063,0.06,2.71,2.56,6.053,3.5,9.156,3.5l7.2812,0.13h0.1876c6.6615,0,12.942-2.73,17.25-7.57h0.719l1.812-3.15,0.031-0.03c2.821-4.91,4.094-11.02,5.313-19.07l0.156-0.31,0.125-1.06c0.479-4.07,0.943-9.08,1.406-15.09,0.467-5.21,0.696-8.98,0.781-11.72,0.104-1.87-0.192-3.7-0.687-5.5,2.84,0.2,5.131-0.73,7.031-2l0.032,0.06c0.073-0.04,0.146-0.06,0.218-0.1l0.688-0.37,0.625-0.53c1.368-1.22,2.599-2.87,3.343-4.94,1.367-0.35,2.474-0.98,3.532-1.69l0.031,0.07c0.073-0.04,0.146-0.06,0.219-0.1l0.687-0.37,0.625-0.53c2.042-1.82,3.909-4.57,4.032-8.32,0.116-3.585-1.544-6.502-3.188-8.34l-0.062-0.062-0.094-0.094c-0.072-0.075-0.144-0.147-0.219-0.219-1.61-1.691-2.862-2.91-4.094-4.063l-0.093-0.093-0.094-0.094c-1.897-1.724-3.728-3.203-5.625-4.469-1.827-1.279-4.511-2.402-7.625-2.218zm-17.75,37.752c0.1539,0.02,0.9063,0.12,0.9062,0.12-0.0002,0,0.3432,0.11,0.625,0.19-0.3653-0.07-1.4337-0.29-1.5312-0.31zm-4.2188,1.34h0.1875c-0.3096,0.23-0.3931,0.27-0.625,0.44,0.0494-0.07,0.25-0.35,0.25-0.34,0,0,0.1622-0.09,0.1875-0.1zm8.063,0.78c0.02,0.01,0.042,0.02,0.062,0.03l-0.625,0.53,0.563-0.56zm0.843,0.53c0.027,0.03,0.037,0.07,0.063,0.1l-0.938,0.65,0.875-0.75zm0.5,0.69c0.093,0.16,0.184,0.31,0.25,0.5-0.048-0.08-0.113-0.26-0.25-0.5zm-11.594,1.03c-0.0233,0.11-0.0516,0.24-0.0937,0.44,0.0065-0.07,0.0312-0.34,0.0312-0.34s0.0587-0.09,0.0625-0.1zm11.906,4.28c-0.003,0.06-0.028,0.16-0.032,0.22-0.137,0.23-1,1.72-1,1.72,0.001,0-0.369,0.28-0.593,0.44,0.331-0.49,1.323-1.94,1.625-2.38zm-10.938,1.6c0.1258,0.16,0.316,0.33,0.4688,0.5l-0.0625,0.06c-0.0913-0.1-0.2038-0.23-0.25-0.28-0.1353-0.16-0.1402-0.2-0.1563-0.22-0.004,0-0.0291-0.03-0.0312-0.03l0.0312-0.03zm0.6876,0.75c0.1283,0.12,0.1857,0.25,0.3437,0.37-0.1548-0.12-0.3158-0.25-0.4063-0.34l0.0626-0.03zm0.5312,0.53c0.2412,0.19,0.5718,0.42,1.25,0.72-0.1829-0.08-0.3407-0.14-0.5938-0.28-0.0098-0.01-0.0212-0.03-0.0312-0.03,0.0002,0-0.4694-0.29-0.625-0.41zm7.031,0.25c-0.319,0.23-0.75,0.53-0.75,0.53s-0.3648,0.06-0.531,0.09c0.399-0.19,0.73-0.35,1.281-0.62z" fill="#000"/>
|
||||
<path d="M21.968,82.795c-1.392,0.082-2.666,0.824-3.531,1.812-0.889,1.008-1.555,2.502-1.312,4,0.242,1.499,1.174,2.603,2.218,3.438,1.879,1.503,3.31,2.692,4.219,3.531,1.214,1.143,2.159,2.174,2.906,3.125,0.021,0.032,0.041,0.063,0.063,0.094,0.933,1.088,2.154,1.985,3.687,2.185,1.534,0.21,3.014-0.43,4.094-1.341,0.021-0.01,0.042-0.021,0.063-0.032,1.018-0.905,1.857-2.235,1.906-3.75,0.049-1.514-0.646-2.818-1.563-3.843-0.02-0.021-0.041-0.042-0.062-0.063-1.559-1.636-2.918-2.965-4.094-4.062-0.01-0.011-0.021-0.021-0.031-0.032-1.741-1.582-3.356-2.893-4.875-3.906-1.063-0.744-2.266-1.24-3.688-1.156zm-45.968,5.406c-1.526,0.055-2.976,0.36-4.188,1.344-1.228,0.997-1.844,2.662-1.844,4.156-0.001,0.042-0.001,0.084,0,0.125,0.151,2.794,0.269,5.661,0.344,8.534,0.055,2.48,0.026,4.61,0,6.75h-23.969c-1.377,0-2.914,0.51-3.906,1.62-0.992,1.12-1.323,2.52-1.375,3.91-0.05,1.4,0.145,2.82,1.063,4.03,0.917,1.21,2.503,1.78,3.875,1.78h23.5c-1.118,10.52-4.697,19.55-10.969,27.16-4.045,4.91-9.235,9.18-15.625,12.78-1.585,0.89-2.932,2.22-3.469,3.97-0.472,1.53-0.261,3.32,0.563,4.72,0.003,0-0.004,0.02,0,0.03,0.023,0.04,0.037,0.08,0.062,0.12l0.063-0.03c0.814,1.38,2.219,2.37,3.718,2.72,1.664,0.38,3.407,0.04,4.938-0.78,0.032-0.02,0.063-0.04,0.094-0.06,10.928-6.45,19.303-14.87,24.937-25.19h0.031c3.976-7.36,6.576-15.91,8-25.44h20.688c0.9846,0,0.952,0.2,0.875,0.09,0.0205,0.03,0.0413,0.05,0.0625,0.07-0.1075-0.15,0.1515,0.24,0.0937,1.28-0.6844,9.66-1.2515,16.24-1.625,19.75v0.03c-0.5275,5.5-1.1278,9.6-1.75,12.09-0.76,2.76-1.7441,4.35-2.5937,5.07-0.021,0.01-0.0418,0.02-0.0625,0.03-0.8088,0.72-2.0336,1.22-4.125,1.22h-6.5309c-1.748,0-3.534,0.5-4.782,1.75-1.062,1.06-1.625,2.63-1.625,4.09-0.003,0.07-0.003,0.15,0,0.22,0.123,1.47,0.862,2.86,1.907,3.84l-0.032,0.03c0.027,0.03,0.067,0.04,0.094,0.07,0.011,0.01,0.021,0.02,0.031,0.03,1.262,1.19,3.032,1.75,4.75,1.75l7.4066,0.12h0.0313c6.2648,0,11.418-2.61,14.281-7.53h0.0313c2.1206-3.69,3.5126-9.5,4.7496-17.94,0.011-0.02,0.022-0.04,0.032-0.06,0.462-3.93,0.916-8.88,1.375-14.84,0.459-5.13,0.702-8.88,0.781-11.41,0.184-3.32-0.967-6.4-3.406-8.44v-0.03c-2.262-1.85-5.2882-2.62-8.7191-2.62h-21.188c0.036-1.59,0.094-3.12,0.094-4.88,0-1.65-0.049-4.14-0.125-7.498v-3.031c0.002-0.062,0.002-0.125,0-0.187-0.11-1.546-0.755-3.154-2.062-4.157-1.29-0.989-2.871-1.245-4.438-1.156h-0.062zm37.406,2.094c-1.368,0.121-2.581,0.835-3.4689,1.781-0.0431,0.04-0.0848,0.082-0.125,0.125-0.8348,1.014-1.4285,2.481-1.1875,3.938,0.2411,1.457,1.1663,2.537,2.1874,3.343,1.792,1.498,3.121,2.698,4.156,3.658,0.011,0.01,0.021,0.02,0.032,0.03,1.298,1.15,2.297,2.17,3,3.06,0.02,0.03,0.041,0.07,0.062,0.1,0.934,1.09,2.159,2,3.688,2.21,1.529,0.22,3.029-0.42,4.125-1.34,0.021-0.01,0.041-0.02,0.062-0.03,1.026-0.91,1.875-2.27,1.906-3.78,0.032-1.52-0.683-2.8-1.593-3.814-0.021-0.021-0.042-0.042-0.063-0.062-1.547-1.625-2.9-2.98-4.094-4.094-0.01-0.011-0.02-0.021-0.031-0.031-1.727-1.57-3.347-2.912-4.937-4-1.059-0.725-2.349-1.215-3.719-1.094z" stroke="#3c3c00" stroke-width="10" fill="none"/>
|
||||
<path d="M21.968,82.795c-1.392,0.082-2.666,0.824-3.531,1.812-0.889,1.008-1.555,2.502-1.312,4,0.242,1.499,1.174,2.603,2.218,3.438,1.879,1.503,3.31,2.692,4.219,3.531,1.214,1.143,2.159,2.174,2.906,3.125,0.021,0.032,0.041,0.063,0.063,0.094,0.933,1.088,2.154,1.985,3.687,2.185,1.534,0.21,3.014-0.43,4.094-1.341,0.021-0.01,0.042-0.021,0.063-0.032,1.018-0.905,1.857-2.235,1.906-3.75,0.049-1.514-0.646-2.818-1.563-3.843-0.02-0.021-0.041-0.042-0.062-0.063-1.559-1.636-2.918-2.965-4.094-4.062-0.01-0.011-0.021-0.021-0.031-0.032-1.741-1.582-3.356-2.893-4.875-3.906-1.063-0.744-2.266-1.24-3.688-1.156zm-45.968,5.406c-1.526,0.055-2.976,0.36-4.188,1.344-1.228,0.997-1.844,2.662-1.844,4.156-0.001,0.042-0.001,0.084,0,0.125,0.151,2.794,0.269,5.661,0.344,8.534,0.055,2.48,0.026,4.61,0,6.75h-23.969c-1.377,0-2.914,0.51-3.906,1.62-0.992,1.12-1.323,2.52-1.375,3.91-0.05,1.4,0.145,2.82,1.063,4.03,0.917,1.21,2.503,1.78,3.875,1.78h23.5c-1.118,10.52-4.697,19.55-10.969,27.16-4.045,4.91-9.235,9.18-15.625,12.78-1.585,0.89-2.932,2.22-3.469,3.97-0.472,1.53-0.261,3.32,0.563,4.72,0.003,0-0.004,0.02,0,0.03,0.023,0.04,0.037,0.08,0.062,0.12l0.063-0.03c0.814,1.38,2.219,2.37,3.718,2.72,1.664,0.38,3.407,0.04,4.938-0.78,0.032-0.02,0.063-0.04,0.094-0.06,10.928-6.45,19.303-14.87,24.937-25.19h0.031c3.976-7.36,6.576-15.91,8-25.44h20.688c0.9846,0,0.952,0.2,0.875,0.09,0.0205,0.03,0.0413,0.05,0.0625,0.07-0.1075-0.15,0.1515,0.24,0.0937,1.28-0.6844,9.66-1.2515,16.24-1.625,19.75v0.03c-0.5275,5.5-1.1278,9.6-1.75,12.09-0.76,2.76-1.7441,4.35-2.5937,5.07-0.021,0.01-0.0418,0.02-0.0625,0.03-0.8088,0.72-2.0336,1.22-4.125,1.22h-6.5309c-1.748,0-3.534,0.5-4.782,1.75-1.062,1.06-1.625,2.63-1.625,4.09-0.003,0.07-0.003,0.15,0,0.22,0.123,1.47,0.862,2.86,1.907,3.84l-0.032,0.03c0.027,0.03,0.067,0.04,0.094,0.07,0.011,0.01,0.021,0.02,0.031,0.03,1.262,1.19,3.032,1.75,4.75,1.75l7.4066,0.12h0.0313c6.2648,0,11.418-2.61,14.281-7.53h0.0313c2.1206-3.69,3.5126-9.5,4.7496-17.94,0.011-0.02,0.022-0.04,0.032-0.06,0.462-3.93,0.916-8.88,1.375-14.84,0.459-5.13,0.702-8.88,0.781-11.41,0.184-3.32-0.967-6.4-3.406-8.44v-0.03c-2.262-1.85-5.2882-2.62-8.7191-2.62h-21.188c0.036-1.59,0.094-3.12,0.094-4.88,0-1.65-0.049-4.14-0.125-7.498v-3.031c0.002-0.062,0.002-0.125,0-0.187-0.11-1.546-0.755-3.154-2.062-4.157-1.29-0.989-2.871-1.245-4.438-1.156h-0.062zm37.406,2.094c-1.368,0.121-2.581,0.835-3.4689,1.781-0.0431,0.04-0.0848,0.082-0.125,0.125-0.8348,1.014-1.4285,2.481-1.1875,3.938,0.2411,1.457,1.1663,2.537,2.1874,3.343,1.792,1.498,3.121,2.698,4.156,3.658,0.011,0.01,0.021,0.02,0.032,0.03,1.298,1.15,2.297,2.17,3,3.06,0.02,0.03,0.041,0.07,0.062,0.1,0.934,1.09,2.159,2,3.688,2.21,1.529,0.22,3.029-0.42,4.125-1.34,0.021-0.01,0.041-0.02,0.062-0.03,1.026-0.91,1.875-2.27,1.906-3.78,0.032-1.52-0.683-2.8-1.593-3.814-0.021-0.021-0.042-0.042-0.063-0.062-1.547-1.625-2.9-2.98-4.094-4.094-0.01-0.011-0.02-0.021-0.031-0.031-1.727-1.57-3.347-2.912-4.937-4-1.059-0.725-2.349-1.215-3.719-1.094z" fill="url(#m)"/>
|
||||
</g>
|
||||
<g transform="matrix(0.6, 0, 0, 0.6, 83.43, -47.62)">
|
||||
<path fill-opacity="0.3" d="M-61.025,286.89c-3.782,0-7.29,2.17-8.968,5.56-2.725,5.45-4,11.45-4,17.66,0.002,2.9,1.311,5.59,3.437,7.47l1,6.03c1.254,7.88,2.268,14.86,3.125,21.22,0.957,7.16,1.294,11.61,1.406,14.46-0.503,0.33-0.753,0.54-2.125,1.1-3.979,1.6-8.262,2.44-13.218,2.44-4.428,0-6.878-0.95-7.75-1.57-1.089-0.76-0.594,0.48-0.594-1.93,0-1.31,0.182-2.78,0.656-4.47,0.403-1.41,1.087-3.24,2.094-5.41,0.778-1.65,1.762-3.55,3.031-5.84,2.602-4.68,0.949-10.72-3.656-13.44l-1.688-1.03c-2.324-1.4-5.099-1.77-7.718-1.06-2.619,0.7-4.847,2.43-6.157,4.81-1.99,3.6-3.62,6.73-4.91,9.56-1.75,3.83-3.1,7.47-4.03,10.97-1.11,4.12-1.68,8.22-1.68,12.28,0,7.05,2.85,13.72,7.9,18.47v0.62l2.81,1.88c6.143,4.11,13.616,5.53,21.785,5.53,6.2,0,11.614-0.55,16.532-1.75h0.437l0.656-0.19,0.125-0.03,0.063-0.03c6.188-1.71,11.792-4.84,15.781-9.78l0.031-0.03c4.435-5.56,6.282-12.64,6.282-20.03,0-4.72-0.518-10.68-1.407-18.38-0.561-4.86-1.368-11.12-2.5-18.75v-0.03c-0.239-1.59-0.264-1.84-0.375-2.59,0.733-1.01,1.337-2.11,1.657-3.35,0.59-2.28,1.016-4.12,1.281-5.81,0.335-2.15,0.406-4.35,0.406-7.09-0.003-3.79-2.172-7.29-5.562-8.97-0.744-0.37-1.617-0.81-2.657-1.31-0.48-0.24-0.886-0.39-1.343-0.6-1.431-3.8-4.989-6.58-9.188-6.59h-0.969zm2.5,11.25h0.032c-0.007,0.01-0.025,0.02-0.032,0.03v-0.03zm1,6.37c0.79,0.35,1.549,0.68,2.407,1.1h0.031v0.03c0.334,0.16,0.597,0.28,0.906,0.44-0.038,1.29-0.061,2.76-0.156,3.37-0.103,0.66-0.436,1.97-0.719,3.16l-3.187,1.09-0.157-0.97-0.531-3.09-2.094-1.13c0.071-1.36,0.191-2.7,0.438-3.97l3.062-0.03zm4.938,2.35h0.031v0.03c-0.013-0.01-0.018-0.03-0.031-0.03zm-47.344,59.28c1.065,2.28,2.593,4.26,4.438,5.56,3.942,2.78,8.992,3.94,15.125,3.94,6.437,0,12.488-1.14,18.031-3.38,3.08-1.24,5.327-2.45,7.312-4.37,0.407-0.4,0.767-1.03,1.157-1.53-0.585,2.41-1.505,4.43-2.782,6.03-1.939,2.4-4.919,4.22-9.062,5.4l-0.188,0.07c-3.645,0.99-8.417,1.53-14.375,1.53-6.53,0-11.23-1.15-14.437-3.22l-0.219-0.16c-3.183-2.14-4.887-5.42-5-9.87z" fill="#000"/>
|
||||
<path d="M-63.656,295.12c-1.366,0.01-2.613,0.78-3.219,2-2.252,4.51-3.344,9.49-3.344,14.82,0.001,1.34,0.748,2.57,1.938,3.18l0.937,0.5,1.469,8.76c1.262,7.93,2.292,15.03,3.156,21.43,1.113,8.33,1.656,14.29,1.657,17.41-0.001,0.55-0.107,1.01-1.032,1.9-0.924,0.9-2.69,1.99-5.281,3.04-4.761,1.91-9.928,2.9-15.625,2.9-5.28,0-9.03-1.05-11.438-2.75-2.331-1.64-3.312-3.57-3.312-7.15,0-1.96,0.297-4.01,0.906-6.19,0.512-1.79,1.306-3.94,2.438-6.38,0.868-1.84,1.945-3.88,3.25-6.25,0.948-1.7,0.365-3.85-1.313-4.84l-1.719-1.03c-0.836-0.5-1.842-0.65-2.785-0.39-0.943,0.25-1.743,0.88-2.215,1.73-1.942,3.52-3.502,6.53-4.692,9.13-1.62,3.55-2.83,6.87-3.65,9.97-0.98,3.63-1.47,7.17-1.47,10.62,0,6.5,2.76,12.18,7.88,15.63-0.01,0.01-0.01,0.02,0,0.03,4.669,3.13,10.796,4.44,18.214,4.43,6.324,0,11.633-0.54,16.062-1.74h0.032c5.224-1.45,9.549-3.97,12.531-7.66,3.326-4.17,4.875-9.67,4.875-16.03,0-4.25-0.468-10.07-1.344-17.66-0.55-4.76-1.375-10.95-2.5-18.53-0.547-3.63-0.617-4.16-0.875-6,1.142-0.39,2.01-1.33,2.313-2.5,0.563-2.18,0.948-3.89,1.156-5.22,0.239-1.53,0.312-3.47,0.312-6.09-0.001-1.37-0.776-2.62-2-3.22-0.754-0.38-1.612-0.81-2.594-1.28-1.378-0.67-2.668-1.25-3.874-1.75h-0.032c0.001-0.03-0.201-0.54-0.312-1.6-0.193-1.82-1.728-3.21-3.563-3.22h-0.937z" transform="translate(-1.63, -6.1)" stroke="#510000" stroke-width="10" fill="none"/>
|
||||
<path transform="translate(-1.63, -6.1)" d="M-63.656,295.12c-1.366,0.01-2.613,0.78-3.219,2-2.252,4.51-3.344,9.49-3.344,14.82,0.001,1.34,0.748,2.57,1.938,3.18l0.937,0.5,1.469,8.76c1.262,7.93,2.292,15.03,3.156,21.43,1.113,8.33,1.656,14.29,1.657,17.41-0.001,0.55-0.107,1.01-1.032,1.9-0.924,0.9-2.69,1.99-5.281,3.04-4.761,1.91-9.928,2.9-15.625,2.9-5.28,0-9.03-1.05-11.438-2.75-2.331-1.64-3.312-3.57-3.312-7.15,0-1.96,0.297-4.01,0.906-6.19,0.512-1.79,1.306-3.94,2.438-6.38,0.868-1.84,1.945-3.88,3.25-6.25,0.948-1.7,0.365-3.85-1.313-4.84l-1.719-1.03c-0.836-0.5-1.842-0.65-2.785-0.39-0.943,0.25-1.743,0.88-2.215,1.73-1.942,3.52-3.502,6.53-4.692,9.13-1.62,3.55-2.83,6.87-3.65,9.97-0.98,3.63-1.47,7.17-1.47,10.62,0,6.5,2.76,12.18,7.88,15.63-0.01,0.01-0.01,0.02,0,0.03,4.669,3.13,10.796,4.44,18.214,4.43,6.324,0,11.633-0.54,16.062-1.74h0.032c5.224-1.45,9.549-3.97,12.531-7.66,3.326-4.17,4.875-9.67,4.875-16.03,0-4.25-0.468-10.07-1.344-17.66-0.55-4.76-1.375-10.95-2.5-18.53-0.547-3.63-0.617-4.16-0.875-6,1.142-0.39,2.01-1.33,2.313-2.5,0.563-2.18,0.948-3.89,1.156-5.22,0.239-1.53,0.312-3.47,0.312-6.09-0.001-1.37-0.776-2.62-2-3.22-0.754-0.38-1.612-0.81-2.594-1.28-1.378-0.67-2.668-1.25-3.874-1.75h-0.032c0.001-0.03-0.201-0.54-0.312-1.6-0.193-1.82-1.728-3.21-3.563-3.22h-0.937z" fill="url(#n)"/>
|
||||
</g>
|
||||
<g transform="matrix(0.6, 0, 0, 0.6, 83.43, -47.62)">
|
||||
<path fill-opacity="0.3" d="M84.844,406.06c-1.134,0.12-2.236,0.56-3.25,1.25-2.029,1.38-3.196,3.74-3.063,6.19,0.174,5.03,0.324,11.84,0.5,18.62h-3.937l-1.406-1.24c0.945-0.89,1.658-2.03,2-3.32,0.671-2.53-0.141-5.28-2.094-7.03l-0.094-0.09-0.125-0.1-6.156-5.06c-1.462-1.3-3.212-1.76-5.157-1.56-0.608-0.95-1.353-1.81-2.374-2.38l-7.032-4.25c-1.169-0.73-2.433-1.03-3.812-1h-0.032c-3.379,0.1-6.28,2.69-6.75,6.03,0.078-0.54-0.176,0.75-0.812,2.35-0.636,1.59-1.628,3.76-2.906,6.44-2.398,5.01-6.437,11.81-12.094,20.06-1.892,2.72-1.551,6.46,0.75,8.84l0.031,0.03,0.031,0.04,1.094,1.09c1.083,1.09,2.488,1.66,3.938,1.87-0.042,8.6-0.07,18.19-0.219,22.94v0.03c-0.029,1.03,0.291,1.99,0.687,2.91-0.999,3.22-2.13,5.28-2.093,5.25-1.466,1.29-3.047,2.89-3.907,5.47-1.038,3.11-0.225,6.71,2.126,9.06,2.968,2.97,8.227,3.76,11.937,1.91,1.539-0.77,2.807-1.85,3.875-3.07,2.082,1.47,4.625,1.82,6.625,1.54,1.928-0.28,3.638-1.36,5.094-2.69,0.346,0.2,0.676,0.45,1.031,0.59-0.108,1.44,0.166,2.93,0.969,4.25l0.062,0.09,0.063,0.1,0.718,1.09c2.045,3.15,6.379,4.08,9.563,2.1,7.964-4.74,14.225-11.13,18.937-18.88v2.44c0,3.22,0.735,6.66,3.094,9.47,2.405,2.86,5.991,4.2,9.282,4.4h0.252c0.12,0.01,0.23,0.01,0.34,0h11.06c5.8,0,11.08-4.17,13.29-9.31,0.95-2.06,0.78-4.28-0.35-6.25-0.64-1.12-1.61-1.95-2.72-2.56-0.19-2.55-0.31-5.72-0.31-9.84,0.02-1.89-0.73-3.64-2.06-4.97-1.34-1.34-3.18-2.09-5.06-2.07h-1.72c-2.79-0.03-5.22,1.65-6.35,4.07v-22.94h12.13c2.86,0.05,5.41-1.69,6.5-4.35,1.09-2.65,0.44-5.73-1.63-7.71l-6.03-6.07c-0.43-0.43-0.98-0.69-1.5-1-0.14-2.99-1.36-5.84-3.06-7.96-3-3.75-7.01-5.8-12.69-8.38-0.143-0.07-0.294-0.04-0.436-0.09-0.658-0.82-1.432-1.58-2.406-2.03l-0.126-0.07-0.124-0.03-8.688-3.62c-1.113-0.52-2.272-0.72-3.406-0.6zm-5.813,42.6c-0.196,5.17-0.706,10.05-1.75,14.46-0.072-4-0.164-8.36-0.187-14.12,0.509-0.04,0.999-0.09,1.5-0.25l0.062-0.03h0.063c0.085-0.03,0.225-0.04,0.312-0.06z" fill="#000"/>
|
||||
<path d="M44.734,408.22c-0.301,0.01-0.552,0.23-0.593,0.53-0.21,1.47-1.637,5.13-4.282,10.66-2.628,5.5-6.819,12.48-12.593,20.91-0.169,0.24-0.143,0.56,0.062,0.78l1.094,1.09c0.229,0.23,0.598,0.25,0.844,0.03,1.939-1.74,3.586-3.5,5.062-5.25-0.009,15.45-0.083,27.42-0.312,34.75-0.007,0.22,0.104,0.43,0.291,0.54,0.187,0.12,0.421,0.13,0.615,0.02l4.687-2.53c0.204-0.1,0.336-0.3,0.344-0.53v-2.62h20.438v3.97c-0.06-0.03-0.097-0.07-0.157-0.1-0.286-0.15-0.641-0.05-0.812,0.22l-0.719,1.09c-0.152,0.24-0.126,0.55,0.063,0.75,1.673,1.92,2.84,3.45,3.531,4.6,0.687,1.14,1.345,2.7,1.937,4.59,0.325,1.04,0.759,1.82,1.469,2.22s1.604,0.24,2.375-0.25c0.324-0.21,0.57-0.47,0.813-0.75-2.695,3.53-5.749,6.78-9.313,9.69-0.253,0.19-0.32,0.54-0.156,0.81l0.719,1.09c0.183,0.29,0.558,0.37,0.843,0.19,17.903-10.64,26.682-29.9,26.344-57.4h2.375v46.37c0,2.26,0.507,4.08,1.594,5.38,1.086,1.29,2.737,1.99,4.781,2.12h11.252c3.35,0,5.9-1.92,7.4-5.44,0.09-0.18,0.08-0.39-0.02-0.56-0.1-0.18-0.28-0.29-0.48-0.31-0.95-0.11-1.59-0.47-2.09-1.13s-0.82-1.66-0.94-3.03c-0.24-2.87-0.37-6.56-0.37-11.12,0-0.17-0.07-0.33-0.18-0.45-0.12-0.12-0.28-0.18-0.45-0.18h-1.81c-0.33,0-0.61,0.26-0.62,0.6-0.25,5.29-0.55,9.38-0.91,12.25-0.17,1.37-0.53,2.34-1,2.93s-1,0.85-1.81,0.85h-4.03c-1.827,0.22-2.995-0.07-3.661-0.78-0.666-0.72-0.949-2.02-0.718-3.97,0.001-0.02,0.001-0.05,0-0.07v-43.46h18.529c0.26,0,0.49-0.15,0.58-0.39,0.1-0.23,0.04-0.5-0.14-0.68l-6.13-6.15c-0.11-0.12-0.27-0.19-0.43-0.19-0.17,0-0.32,0.07-0.44,0.19l-4.16,4.15h-16.122v-19.31l2.969-1.97c0.179-0.13,0.278-0.34,0.258-0.56-0.019-0.22-0.153-0.41-0.352-0.5l-8.687-3.63c-0.197-0.09-0.427-0.07-0.606,0.05s-0.281,0.33-0.269,0.55c0.235,6.82,0.482,15.34,0.718,25.37h-12c-0.238,0-0.457,0.13-0.562,0.35l-5.188-4.53c-0.128-0.11-0.292-0.16-0.457-0.14-0.164,0.02-0.315,0.1-0.418,0.23l-2.718,3.37h-7.094c2.095-2.77,3.962-5.22,5.312-6.84,1.548-1.86,2.694-2.98,3.094-3.22,1.15-0.69,2.306-1.15,3.438-1.37,0.226-0.05,0.406-0.22,0.465-0.45,0.06-0.22-0.012-0.46-0.184-0.62l-6.156-5.06c-0.13-0.11-0.302-0.17-0.474-0.15-0.173,0.02-0.33,0.11-0.433,0.25l-2.343,3h-8.688c0.965-1.64,1.886-3,2.688-3.85,0.961-1.02,1.763-1.4,2.343-1.4,0.284,0,0.534-0.19,0.608-0.46s-0.048-0.56-0.295-0.7l-7.219-4.34c-0.103-0.07-0.222-0.1-0.344-0.1zm48.094,5.78c-0.154,0.01-0.299,0.08-0.406,0.19l-1.094,1.09c-0.119,0.13-0.179,0.3-0.168,0.47,0.012,0.17,0.096,0.33,0.231,0.44,3.795,3.08,6.048,6.14,6.843,9.09,0.432,1.61,0.907,2.79,1.5,3.6,0.296,0.4,0.626,0.72,1.036,0.9,0.4,0.19,0.88,0.23,1.31,0.1,1.44-0.44,2.42-1.73,2.94-3.53,0.54-1.9-0.04-3.85-1.57-5.75-1.59-1.99-5.003-4.11-10.341-6.53-0.087-0.05-0.184-0.07-0.281-0.07zm-47.75,8.13h9.531l-5.562,11.4h-8.094l-2.562-1.71c2.227-2.93,4.462-6.15,6.687-9.69zm24.438,13.81l2.281,2.28c0.162,0.17,0.404,0.23,0.625,0.16,2.098-0.7,4.468-1.06,7.062-1.06h1.907c0.622,18.59-3.482,33.69-12.282,45.37,0.239-0.33,0.435-0.72,0.563-1.12,0.278-0.89,0.285-1.89,0.094-3.04-0.221-1.32-1.188-2.59-2.782-3.9-1.344-1.11-3.247-2.29-5.531-3.5l4.875-2.06c0.236-0.11,0.386-0.34,0.375-0.6-0.238-6.18-0.342-16.38-0.344-30.37l2.969-2c0.072-0.04,0.136-0.1,0.188-0.16zm-29.563,0.66h7.438v11.75h-7.438v-11.75zm13.031,0h7.407v11.75h-7.407v-11.75zm-13.031,14.81h7.438v12.12h-7.438v-12.12zm13.031,0h7.407v12.12h-7.407v-12.12zm-1.812,20.09c-0.089,0.02-0.175,0.05-0.25,0.1l-1.094,0.72c-0.282,0.18-0.365,0.55-0.187,0.84,0.953,1.67,1.786,3.33,2.5,5,0.7,1.63,1.238,3.49,1.593,5.62,0.193,1.16,0.497,2.05,1.157,2.57,0.659,0.52,1.595,0.48,2.437,0.06,1.68-0.84,2.471-2.67,2.344-5.09-0.072-1.37-0.845-2.79-2.188-4.38s-3.29-3.37-5.843-5.31c-0.133-0.11-0.302-0.15-0.469-0.13zm-8.656,1.44c-0.144,0.02-0.276,0.08-0.375,0.19l-0.719,0.72c-0.167,0.17-0.216,0.43-0.125,0.65,0.466,1.17,0.894,2.59,1.25,4.25,0.345,1.62,0.431,3.56,0.312,5.82-0.064,1.22,0.113,2.2,0.657,2.9,0.543,0.7,1.46,0.95,2.437,0.81,0.993-0.14,1.811-0.67,2.344-1.5,0.532-0.82,0.811-1.91,0.875-3.25,0.137-2.88-2.012-6.26-6.156-10.4-0.13-0.14-0.314-0.21-0.5-0.19zm-6.875,0.72c-0.256,0.04-0.455,0.24-0.5,0.5-1.192,5.24-2.85,8.69-4.719,10.34-1.067,0.94-1.765,1.8-2.063,2.69s-0.075,1.86,0.594,2.53c1.23,1.23,2.914,1.5,4.531,0.69,1.597-0.8,2.767-2.25,3.532-4.16,0.816-2.04,1.023-5.81,0.781-11.62-0.016-0.27-0.206-0.5-0.469-0.56l-1.437-0.38c-0.081-0.03-0.166-0.04-0.25-0.03z" stroke="#003c00" stroke-width="10" fill="none"/>
|
||||
<path d="M44.734,408.22c-0.301,0.01-0.552,0.23-0.593,0.53-0.21,1.47-1.637,5.13-4.282,10.66-2.628,5.5-6.819,12.48-12.593,20.91-0.169,0.24-0.143,0.56,0.062,0.78l1.094,1.09c0.229,0.23,0.598,0.25,0.844,0.03,1.939-1.74,3.586-3.5,5.062-5.25-0.009,15.45-0.083,27.42-0.312,34.75-0.007,0.22,0.104,0.43,0.291,0.54,0.187,0.12,0.421,0.13,0.615,0.02l4.687-2.53c0.204-0.1,0.336-0.3,0.344-0.53v-2.62h20.438v3.97c-0.06-0.03-0.097-0.07-0.157-0.1-0.286-0.15-0.641-0.05-0.812,0.22l-0.719,1.09c-0.152,0.24-0.126,0.55,0.063,0.75,1.673,1.92,2.84,3.45,3.531,4.6,0.687,1.14,1.345,2.7,1.937,4.59,0.325,1.04,0.759,1.82,1.469,2.22s1.604,0.24,2.375-0.25c0.324-0.21,0.57-0.47,0.813-0.75-2.695,3.53-5.749,6.78-9.313,9.69-0.253,0.19-0.32,0.54-0.156,0.81l0.719,1.09c0.183,0.29,0.558,0.37,0.843,0.19,17.903-10.64,26.682-29.9,26.344-57.4h2.375v46.37c0,2.26,0.507,4.08,1.594,5.38,1.086,1.29,2.737,1.99,4.781,2.12h11.252c3.35,0,5.9-1.92,7.4-5.44,0.09-0.18,0.08-0.39-0.02-0.56-0.1-0.18-0.28-0.29-0.48-0.31-0.95-0.11-1.59-0.47-2.09-1.13s-0.82-1.66-0.94-3.03c-0.24-2.87-0.37-6.56-0.37-11.12,0-0.17-0.07-0.33-0.18-0.45-0.12-0.12-0.28-0.18-0.45-0.18h-1.81c-0.33,0-0.61,0.26-0.62,0.6-0.25,5.29-0.55,9.38-0.91,12.25-0.17,1.37-0.53,2.34-1,2.93s-1,0.85-1.81,0.85h-4.03c-1.827,0.22-2.995-0.07-3.661-0.78-0.666-0.72-0.949-2.02-0.718-3.97,0.001-0.02,0.001-0.05,0-0.07v-43.46h18.529c0.26,0,0.49-0.15,0.58-0.39,0.1-0.23,0.04-0.5-0.14-0.68l-6.13-6.15c-0.11-0.12-0.27-0.19-0.43-0.19-0.17,0-0.32,0.07-0.44,0.19l-4.16,4.15h-16.122v-19.31l2.969-1.97c0.179-0.13,0.278-0.34,0.258-0.56-0.019-0.22-0.153-0.41-0.352-0.5l-8.687-3.63c-0.197-0.09-0.427-0.07-0.606,0.05s-0.281,0.33-0.269,0.55c0.235,6.82,0.482,15.34,0.718,25.37h-12c-0.238,0-0.457,0.13-0.562,0.35l-5.188-4.53c-0.128-0.11-0.292-0.16-0.457-0.14-0.164,0.02-0.315,0.1-0.418,0.23l-2.718,3.37h-7.094c2.095-2.77,3.962-5.22,5.312-6.84,1.548-1.86,2.694-2.98,3.094-3.22,1.15-0.69,2.306-1.15,3.438-1.37,0.226-0.05,0.406-0.22,0.465-0.45,0.06-0.22-0.012-0.46-0.184-0.62l-6.156-5.06c-0.13-0.11-0.302-0.17-0.474-0.15-0.173,0.02-0.33,0.11-0.433,0.25l-2.343,3h-8.688c0.965-1.64,1.886-3,2.688-3.85,0.961-1.02,1.763-1.4,2.343-1.4,0.284,0,0.534-0.19,0.608-0.46s-0.048-0.56-0.295-0.7l-7.219-4.34c-0.103-0.07-0.222-0.1-0.344-0.1zm48.094,5.78c-0.154,0.01-0.299,0.08-0.406,0.19l-1.094,1.09c-0.119,0.13-0.179,0.3-0.168,0.47,0.012,0.17,0.096,0.33,0.231,0.44,3.795,3.08,6.048,6.14,6.843,9.09,0.432,1.61,0.907,2.79,1.5,3.6,0.296,0.4,0.626,0.72,1.036,0.9,0.4,0.19,0.88,0.23,1.31,0.1,1.44-0.44,2.42-1.73,2.94-3.53,0.54-1.9-0.04-3.85-1.57-5.75-1.59-1.99-5.003-4.11-10.341-6.53-0.087-0.05-0.184-0.07-0.281-0.07zm-47.75,8.13h9.531l-5.562,11.4h-8.094l-2.562-1.71c2.227-2.93,4.462-6.15,6.687-9.69zm24.438,13.81l2.281,2.28c0.162,0.17,0.404,0.23,0.625,0.16,2.098-0.7,4.468-1.06,7.062-1.06h1.907c0.622,18.59-3.482,33.69-12.282,45.37,0.239-0.33,0.435-0.72,0.563-1.12,0.278-0.89,0.285-1.89,0.094-3.04-0.221-1.32-1.188-2.59-2.782-3.9-1.344-1.11-3.247-2.29-5.531-3.5l4.875-2.06c0.236-0.11,0.386-0.34,0.375-0.6-0.238-6.18-0.342-16.38-0.344-30.37l2.969-2c0.072-0.04,0.136-0.1,0.188-0.16zm-29.563,0.66h7.438v11.75h-7.438v-11.75zm13.031,0h7.407v11.75h-7.407v-11.75zm-13.031,14.81h7.438v12.12h-7.438v-12.12zm13.031,0h7.407v12.12h-7.407v-12.12zm-1.812,20.09c-0.089,0.02-0.175,0.05-0.25,0.1l-1.094,0.72c-0.282,0.18-0.365,0.55-0.187,0.84,0.953,1.67,1.786,3.33,2.5,5,0.7,1.63,1.238,3.49,1.593,5.62,0.193,1.16,0.497,2.05,1.157,2.57,0.659,0.52,1.595,0.48,2.437,0.06,1.68-0.84,2.471-2.67,2.344-5.09-0.072-1.37-0.845-2.79-2.188-4.38s-3.29-3.37-5.843-5.31c-0.133-0.11-0.302-0.15-0.469-0.13zm-8.656,1.44c-0.144,0.02-0.276,0.08-0.375,0.19l-0.719,0.72c-0.167,0.17-0.216,0.43-0.125,0.65,0.466,1.17,0.894,2.59,1.25,4.25,0.345,1.62,0.431,3.56,0.312,5.82-0.064,1.22,0.113,2.2,0.657,2.9,0.543,0.7,1.46,0.95,2.437,0.81,0.993-0.14,1.811-0.67,2.344-1.5,0.532-0.82,0.811-1.91,0.875-3.25,0.137-2.88-2.012-6.26-6.156-10.4-0.13-0.14-0.314-0.21-0.5-0.19zm-6.875,0.72c-0.256,0.04-0.455,0.24-0.5,0.5-1.192,5.24-2.85,8.69-4.719,10.34-1.067,0.94-1.765,1.8-2.063,2.69s-0.075,1.86,0.594,2.53c1.23,1.23,2.914,1.5,4.531,0.69,1.597-0.8,2.767-2.25,3.532-4.16,0.816-2.04,1.023-5.81,0.781-11.62-0.016-0.27-0.206-0.5-0.469-0.56l-1.437-0.38c-0.081-0.03-0.166-0.04-0.25-0.03z" fill="url(#o)"/>
|
||||
</g>
|
||||
<g transform="matrix(0.6, 0, 0, 0.6, 83.43, -47.62)">
|
||||
<path fill-opacity="0.3" d="M208.28,99.312c-9.25,0.001-16.95,1.548-22.87,5.718-5.68,4-9.54,10.17-11.6,17.53l-1.87,6.78,6.9,1.25,7.72,1.41c-0.33,0.15-0.77,0.25-1.09,0.41-4.18,2.02-7.7,5.14-10.09,9.06-2.4,3.92-3.57,8.51-3.57,13.22,0,7.06,2.76,13.73,7.81,18.5,5.22,4.91,12.4,7.19,20.29,7.19,4.61,0,9.11-0.88,13.28-2.66,1.26-0.54,2.42-1.31,3.62-2l1.25,3.19h31.69l-4.47-9.19c-1.33-2.73-2.17-5.18-2.59-7.25-0.38-1.87-0.69-5.87-0.69-11.41l0.16-20.12v-0.06c0-7.95-0.41-13.62-3.1-18.54-2.31-4.23-6.15-7.42-10.78-9.65-5.31-2.57-11.79-3.377-20-3.378zm0,12.808c7.2,0.01,12.2,1.02,14.44,2.1,2.92,1.41,4.36,2.86,5.12,4.25,0.39,0.7,1.53,5.34,1.54,12.41l-0.19,20.12v0.06c0,5.95,0.24,10.39,0.97,13.97,0.07,0.36,0.22,0.73,0.31,1.09h-3.41c-0.06-0.22-0.09-0.26-0.15-0.5-0.31-1.08-0.44-1.65-0.75-2.5l-2.6-6.9c0.2-0.49,0.54-0.9,0.69-1.41,0.94-3.07,0.94-5.84,0.94-10.22v-16.43c0-4.38-1.17-8.91-4.69-11.94-3.86-3.33-8.22-3.69-13.44-3.69-3.91,0-7.68,0.76-10.87,3-1.72,1.21-3.04,2.73-4.13,4.44l-3.03-0.56c1.08-1.63,2.34-2.91,3.75-3.91,2.69-1.89,7.78-3.37,15.5-3.38zm-1.22,13.22c1.61,0,2.63,0.2,3.5,0.38-2.14,0.54-5.01,1.15-8.03,1.75,0.47-0.85,0.89-1.39,1-1.47,0.26-0.18,1.3-0.66,3.53-0.66zm-13.47,17.66c-2.66,2.57-4.5,6.2-4.5,9.91,0,4.01,1.78,8.05,4.63,10.84,1.89,1.84,4.29,3.08,6.84,3.78-0.22,0.01-0.42,0.06-0.65,0.06-5.57,0-8.97-1.36-11.5-3.75-2.69-2.53-3.82-5.06-3.82-9.15,0-2.67,0.59-4.71,1.72-6.57,1.14-1.86,2.57-3.13,4.75-4.18,0.61-0.3,1.67-0.63,2.53-0.94zm18.5,6.44c-0.03,0.33-0.1,1.75-0.09,1.72v0.03l-0.03,0.03c-0.26,0.88-0.62,1.51-1.78,2.37-1.93,1.4-3.38,1.82-5.1,1.82-1.44,0-1.81-0.21-2.43-0.82-0.76-0.74-0.78-0.86-0.78-1.68,0-0.51-0.31-0.18,0.68-0.88-0.33,0.22,2.55-1,7.06-1.97,1.09-0.23,1.51-0.41,2.47-0.62z" fill="#000"/>
|
||||
<path d="M191.4,122.86l-15.68-2.83c1.76-6.31,4.8-10.99,9.1-14.02,4.31-3.04,10.7-4.55,19.19-4.55,7.71,0,13.45,0.91,17.22,2.73,3.77,1.83,6.43,4.14,7.97,6.95,1.53,2.81,2.3,7.97,2.3,15.47l-0.18,20.17c0,5.74,0.28,9.98,0.83,12.7,0.55,2.73,1.59,5.65,3.1,8.77h-17.09c-0.45-1.15-1.01-2.85-1.66-5.11-0.29-1.02-0.5-1.7-0.62-2.03-2.95,2.87-6.11,5.03-9.47,6.46-3.36,1.44-6.95,2.15-10.76,2.15-6.73,0-12.03-1.82-15.9-5.47-3.88-3.65-5.81-8.26-5.81-13.84,0-3.69,0.88-6.98,2.64-9.87,1.77-2.89,4.24-5.1,7.41-6.64,3.18-1.54,7.76-2.88,13.75-4.03,8.08-1.52,13.67-2.93,16.79-4.24v-1.73c0-3.32-0.82-5.68-2.46-7.1-1.64-1.41-4.74-2.12-9.29-2.12-3.07,0-5.47,0.6-7.19,1.81-1.73,1.21-3.12,3.33-4.19,6.37zm23.13,14.02c-2.22,0.74-5.72,1.62-10.52,2.65-4.8,1.02-7.93,2.03-9.41,3.01-2.25,1.6-3.38,3.63-3.38,6.09,0,2.42,0.9,4.51,2.71,6.27,1.8,1.76,4.1,2.65,6.88,2.65,3.12,0,6.09-1.03,8.92-3.08,2.09-1.56,3.47-3.46,4.12-5.72,0.45-1.48,0.68-4.28,0.68-8.42v-3.45z" stroke="#500050" stroke-width="10" fill="none"/>
|
||||
<path d="M191.4,122.86l-15.68-2.83c1.76-6.31,4.8-10.99,9.1-14.02,4.31-3.04,10.7-4.55,19.19-4.55,7.71,0,13.45,0.91,17.22,2.73,3.77,1.83,6.43,4.14,7.97,6.95,1.53,2.81,2.3,7.97,2.3,15.47l-0.18,20.17c0,5.74,0.28,9.98,0.83,12.7,0.55,2.73,1.59,5.65,3.1,8.77h-17.09c-0.45-1.15-1.01-2.85-1.66-5.11-0.29-1.02-0.5-1.7-0.62-2.03-2.95,2.87-6.11,5.03-9.47,6.46-3.36,1.44-6.95,2.15-10.76,2.15-6.73,0-12.03-1.82-15.9-5.47-3.88-3.65-5.81-8.26-5.81-13.84,0-3.69,0.88-6.98,2.64-9.87,1.77-2.89,4.24-5.1,7.41-6.64,3.18-1.54,7.76-2.88,13.75-4.03,8.08-1.52,13.67-2.93,16.79-4.24v-1.73c0-3.32-0.82-5.68-2.46-7.1-1.64-1.41-4.74-2.12-9.29-2.12-3.07,0-5.47,0.6-7.19,1.81-1.73,1.21-3.12,3.33-4.19,6.37zm23.13,14.02c-2.22,0.74-5.72,1.62-10.52,2.65-4.8,1.02-7.93,2.03-9.41,3.01-2.25,1.6-3.38,3.63-3.38,6.09,0,2.42,0.9,4.51,2.71,6.27,1.8,1.76,4.1,2.65,6.88,2.65,3.12,0,6.09-1.03,8.92-3.08,2.09-1.56,3.47-3.46,4.12-5.72,0.45-1.48,0.68-4.28,0.68-8.42v-3.45z" fill="url(#p)"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 41 KiB |
38
recipes/adventure_zone_pl.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Adventure_zone(BasicNewsRecipe):
|
||||
title = u'Adventure Zone'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Adventure zone - adventure games from A to Z'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
||||
cover=soup.find(id='box_OstatninumerAZ')
|
||||
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.body.findAll(name='a')
|
||||
if skip_tag is not None:
|
||||
for r in skip_tag:
|
||||
if 'articles.php?' in r['href']:
|
||||
if r.strong is not None:
|
||||
word=r.strong.string
|
||||
if ('zapowied' or 'recenzj') in word:
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
|
||||
else:
|
||||
None
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('news.php?readmore', 'print.php?type=N&item_id')
|
||||
|
12
recipes/android_com_pl.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Android_com_pl(BasicNewsRecipe):
|
||||
title = u'Android.com.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Android.com.pl - biggest polish Android site'
|
||||
category = 'Android, mobile'
|
||||
language = 'pl'
|
||||
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
feeds = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
|
18
recipes/astro_news_pl.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AstroNEWS(BasicNewsRecipe):
|
||||
title = u'AstroNEWS'
|
||||
__author__ = 'fenuks'
|
||||
description = 'AstroNEWS- astronomy every day'
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
cover_url='http://news.astronet.pl/img/logo_news.jpg'
|
||||
# no_stylesheets= True
|
||||
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
|
||||
|
15
recipes/astronomia_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Astronomia_pl(BasicNewsRecipe):
|
||||
title = u'Astronomia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Astronomia - polish astronomy site'
|
||||
cover_url = 'http://www.astronomia.pl/grafika/logo.gif'
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
#no_stylesheets=True
|
||||
remove_tags_before=dict(name='div', attrs={'id':'a1'})
|
||||
keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
|
||||
feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
|
15
recipes/bash_org_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Bash_org_pl(BasicNewsRecipe):
|
||||
title = u'Bash.org.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Bash.org.pl - funny quotations from IRC discussions'
|
||||
category = 'funny quotations, humour'
|
||||
language = 'pl'
|
||||
oldest_article = 15
|
||||
cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
keep_only_tags= [dict(name='div', attrs={'class':'quote post-content post-body'})]
|
||||
feeds = [(u'Cytaty', u'http://bash.org.pl/rss')]
|
@ -36,8 +36,9 @@ class BBC(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
|
||||
'story-feature wide ', 'story-feature narrow']})
|
||||
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
|
||||
'story-feature wide ', 'story-feature narrow']}),
|
||||
dict(id=['hypertab', 'comment-form']),
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
31
recipes/brasil_de_fato.recipe
Normal file
@ -0,0 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BrasilDeFato(BasicNewsRecipe):
|
||||
news = True
|
||||
title = u'Brasil de Fato'
|
||||
__author__ = 'Alex Mitrani'
|
||||
description = u'Uma visão popular do Brasil e do mundo.'
|
||||
publisher = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
|
||||
category = 'news, politics, Brazil, rss, Portuguese'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
language = 'pt_BR'
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'links'})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'links'})]
|
||||
|
||||
feeds = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
|
||||
,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
|
||||
,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
|
||||
,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
|
||||
,(u'Análise', u'http://www.brasildefato.com.br/rss_analise')
|
||||
]
|
57
recipes/bugun_gazetesi.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Bugun (BasicNewsRecipe):
|
||||
|
||||
title = u'BUGÜN Gazetesi'
|
||||
__author__ = u'thomass'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed =100
|
||||
#no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'UTF-8'
|
||||
publisher = 'thomass'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper '
|
||||
extra_css = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
|
||||
#introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
|
||||
masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
|
||||
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ]
|
||||
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||
#remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
|
||||
|
||||
|
||||
#remove_attributes = ['width','height']
|
||||
remove_empty_feeds= True
|
||||
|
||||
feeds = [
|
||||
( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'),
|
||||
( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'),
|
||||
( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'),
|
||||
( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'),
|
||||
( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'),
|
||||
( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'),
|
||||
( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'),
|
||||
( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'),
|
||||
( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'),
|
||||
( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'),
|
||||
( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'),
|
||||
( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'),
|
||||
( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'),
|
||||
|
||||
|
||||
|
||||
|
||||
]
|
@ -1,93 +1,105 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
__copyright__ = '2008 Kovid Goyal kovid@kovidgoyal.net, 2010 Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
businessweek.com
|
||||
www.businessweek.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BusinessWeek(BasicNewsRecipe):
|
||||
title = 'Business Week'
|
||||
description = 'Business News, Stock Market and Financial Advice'
|
||||
__author__ = 'ChuckEggDotCom and Sujata Raman'
|
||||
language = 'en'
|
||||
title = 'Business Week'
|
||||
__author__ = 'Kovid Goyal and Darko Miletic'
|
||||
description = 'Read the latest international business news & stock market news. Get updated company profiles, financial advice, global economy and technology news.'
|
||||
publisher = 'Bloomberg L.P.'
|
||||
category = 'Business, business news, stock market, stock market news, financial advice, company profiles, financial advice, global economy, technology news'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
||||
masthead_url = 'http://assets.businessweek.com/images/bw-logo.png'
|
||||
extra_css = """
|
||||
body{font-family: Helvetica,Arial,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.tagline{color: gray; font-style: italic}
|
||||
.photoCredit{font-size: small; color: gray}
|
||||
"""
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
recursions = 1
|
||||
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
.news_story_title{font-family :Arial,Helvetica,sans-serif; font-size:large;font-weight:bold;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium;color:#666666;}
|
||||
h3{text-transform:uppercase;font-family :Arial,Helvetica,sans-serif; font-size:large;font-weight:bold;}
|
||||
h4{font-family :Arial,Helvetica,sans-serif; font-size:small;font-weight:bold;}
|
||||
p{font-family :Arial,Helvetica,sans-serif; }
|
||||
#lede600{font-size:x-small;}
|
||||
#storybody{font-size:x-small;}
|
||||
p{font-family :Arial,Helvetica,sans-serif;}
|
||||
.strap{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#064599;}
|
||||
.byline{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
.postedBy{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
||||
.trackback{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
||||
.date{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
||||
.wrapper{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
.photoCredit{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
||||
.tagline{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
||||
.pageCount{color:#666666;font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
.note{font-family :Arial,Helvetica,sans-serif; font-size:small;color:#666666;font-style:italic;}
|
||||
.highlight{font-family :Arial,Helvetica,sans-serif; font-size:small;background-color:#FFF200;}
|
||||
.annotation{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
||||
'''
|
||||
|
||||
remove_tags = [ dict(name='div', attrs={'id':["log","feedback","footer","secondarynav","secondnavbar","header","email","bw2-header","column2","wrapper-bw2-footer","wrapper-mgh-footer","inset","commentForm","commentDisplay","bwExtras","bw2-umbrella","readerComments","leg","rightcol"]}),
|
||||
dict(name='div', attrs={'class':["menu",'sponsorbox smallertext',"TopNavTile","graybottom leaderboard"]}),
|
||||
dict(name='img', alt ="News"),
|
||||
dict(name='td', width ="1"),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(attrs={'class':'inStory'})
|
||||
,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
|
||||
,dict(attrs={'id':['inset','videoDisplay']})
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody','article_body','articleBody']})]
|
||||
remove_attributes = ['lang']
|
||||
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
||||
(u'Top News', u'http://www.businessweek.com/rss/bwdaily.rss'),
|
||||
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
||||
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
||||
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
||||
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
||||
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
||||
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
||||
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
||||
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
||||
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
||||
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
||||
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
||||
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
||||
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
||||
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
||||
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
||||
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
||||
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
||||
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
||||
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
|
||||
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
||||
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
||||
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
||||
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
||||
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
||||
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
||||
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
||||
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
||||
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
||||
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
||||
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
||||
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
||||
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
||||
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
||||
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
||||
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
||||
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
||||
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
||||
url = article.get('guid', None)
|
||||
if 'podcasts' in url:
|
||||
return None
|
||||
if 'surveys' in url:
|
||||
return None
|
||||
if 'images' in url:
|
||||
return None
|
||||
if 'feedroom' in url:
|
||||
return None
|
||||
if '/magazine/toc/' in url:
|
||||
return None
|
||||
rurl, sep, rest = url.rpartition('?')
|
||||
if rurl:
|
||||
return rurl
|
||||
return rest
|
||||
|
||||
if 'podcasts' in url or 'surveys' in url:
|
||||
url = None
|
||||
|
||||
return url
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
|
||||
for tag in soup.findAll(name=['ul','li','table','td','tr','span']):
|
||||
tag.name = 'div'
|
||||
for tag in soup.findAll(name= 'div',attrs={ 'id':'pageNav'}):
|
||||
tag.extract()
|
||||
return soup
|
||||
def print_version(self, url):
|
||||
if '/news/' in url or '/blog/' in url:
|
||||
return url
|
||||
if '/magazine' in url:
|
||||
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/printer/')
|
||||
else:
|
||||
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
|
||||
return rurl.replace('/investing/','/investor/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
@ -4,95 +4,73 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.businessworld.in
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BusinessWorldMagazine(BasicNewsRecipe):
|
||||
title = 'Business World Magazine'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'News from India'
|
||||
publisher = 'ABP Pvt Ltd Publication'
|
||||
category = 'news, politics, finances, India, Asia'
|
||||
delay = 1
|
||||
no_stylesheets = True
|
||||
INDEX = 'http://www.businessworld.in/bw/Magazine_Current_Issue'
|
||||
INDEX = 'http://www.businessworld.in/businessworld/magazine_latest_issue.php'
|
||||
ROOT = 'http://www.businessworld.in'
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'en_IN'
|
||||
extra_css = """
|
||||
img{display: block; margin-bottom: 0.5em}
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
h2{color: gray; display: block}
|
||||
"""
|
||||
auto_cleanup = True
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
def is_in_list(self,linklist,url):
|
||||
for litem in linklist:
|
||||
if litem == url:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
br = self.browser
|
||||
br.open(self.ROOT)
|
||||
raw = br.open(br.click_link(text_regex=re.compile('Current.*Issue',
|
||||
re.I))).read()
|
||||
soup = self.index_to_soup(raw)
|
||||
mc = soup.find(attrs={'class':'mag_cover'})
|
||||
if mc is not None:
|
||||
img = mc.find('img', src=True)
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
|
||||
feeds = []
|
||||
current_section = None
|
||||
articles = []
|
||||
linklist = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
tough = soup.find('div', attrs={'id':'tough'})
|
||||
if tough:
|
||||
for item in tough.findAll('h1'):
|
||||
description = ''
|
||||
title_prefix = ''
|
||||
feed_link = item.find('a')
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = self.ROOT + feed_link['href']
|
||||
if not self.is_in_list(linklist,url):
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
linklist.append(url)
|
||||
|
||||
for item in soup.findAll('div', attrs={'class':'nametitle'}):
|
||||
description = ''
|
||||
title_prefix = ''
|
||||
feed_link = item.find('a')
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = self.ROOT + feed_link['href']
|
||||
if not self.is_in_list(linklist,url):
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
linklist.append(url)
|
||||
return [(soup.head.title.string, articles)]
|
||||
for tag in soup.findAll(['h3', 'h2']):
|
||||
inner_a = tag.find('a')
|
||||
if tag.name == 'h3' and inner_a is not None:
|
||||
continue
|
||||
if tag.name == 'h2' and (inner_a is None or current_section is
|
||||
None):
|
||||
continue
|
||||
|
||||
if tag.name == 'h3':
|
||||
if current_section is not None and articles:
|
||||
feeds.append((current_section, articles))
|
||||
current_section = self.tag_to_string(tag)
|
||||
self.log('Found section:', current_section)
|
||||
articles = []
|
||||
elif tag.name == 'h2':
|
||||
url = inner_a.get('href', None)
|
||||
if url is None: continue
|
||||
if url.startswith('/'): url = self.ROOT + url
|
||||
title = self.tag_to_string(inner_a)
|
||||
h1 = tag.findPreviousSibling('h1')
|
||||
if h1 is not None:
|
||||
title = self.tag_to_string(h1) + title
|
||||
self.log('\tFound article:', title)
|
||||
articles.append({'title':title, 'url':url, 'date':'',
|
||||
'description':''})
|
||||
|
||||
if current_section and articles:
|
||||
feeds.append((current_section, articles))
|
||||
|
||||
return feeds
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'printwrapper'})]
|
||||
remove_tags = [dict(name=['object','link','meta','base','iframe','link','table'])]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/bw/','/bw/storyContent/')
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('img',attrs={'class':'toughbor'})
|
||||
if cover_item:
|
||||
cover_url = self.ROOT + cover_item['src']
|
||||
return cover_url
|
||||
|
73
recipes/cbn.recipe
Normal file
@ -0,0 +1,73 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CBN(BasicNewsRecipe):
|
||||
title = u'CBN News'
|
||||
__author__ = 'Roger'
|
||||
# TODO: I just noticed this is downloading 25+ articles, while
|
||||
# the online site is only publishing at most 7 articles daily.
|
||||
# So, somehow this needs to be fixed it only downloads max 7 articles
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
description = 'The Christian Broadcasting Network'
|
||||
publisher = 'http://www.cbn.com/'
|
||||
category = 'news, religion, spiritual, christian'
|
||||
language = 'en'
|
||||
|
||||
# Make article titles, author and date bold, italic or small font.
|
||||
# TODO: Could use a smaller title text
|
||||
# TODO: Italicize Author and Publisher?
|
||||
#
|
||||
# http://www.cbn.com/App_Themes/Common/base.css,
|
||||
# http://www.cbn.com/App_Themes/CBNNews/article.css",
|
||||
# ... and many more style sheets.
|
||||
#extra_css = '''
|
||||
# .story_item_headline { font-size: medium; font-weight: bold; }
|
||||
# .story_item_author { font-size: small; font-style:italic; }
|
||||
# .signature_line { font-size: small; }
|
||||
# '''
|
||||
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'iso-8859-1'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
|
||||
# TODO: No masterhead_url for CBN, using one I grepped from a news article
|
||||
# (There's a better/higher contrast blue on white background image, but
|
||||
# can't get it or it's too big -- embedded into a larger jpeg?)
|
||||
masthead_url = 'http://www.cbn.com/templates/images/cbn_com_logo.jpg'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'id':'articleTitle'}),
|
||||
dict(name='div', attrs={'class':'articleAuthor'}),
|
||||
dict(name='div', attrs={'class':'articleDate'}),
|
||||
dict(name='div', attrs={'class':'articleText'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
# The article image is usually Adobe Flash Player Image
|
||||
# The snapshot .jpg image files of the video are found
|
||||
# within a URL folder named "PageFiles_Files"
|
||||
# Filter this for now.
|
||||
# (Majority of images seem to be Adobe Flash.)
|
||||
dict(name='div', attrs={'class':'articleImage'}),
|
||||
]
|
||||
|
||||
|
||||
# Comment-out or uncomment any of the following RSS feeds according to your
|
||||
# liking.
|
||||
# A full list can be found here: http://www.cbn.com/rss.aspx
|
||||
|
||||
feeds = [
|
||||
(u'World', u'http://www.cbn.com/cbnnews/world/feed/'),
|
||||
(u'US', u'http://www.cbn.com/cbnnews/us/feed/'),
|
||||
(u'Inside Israel', u'http://www.cbn.com/cbnnews/insideisrael/feed/'),
|
||||
(u'Politics', u'http://www.cbn.com/cbnnews/politics/feed/'),
|
||||
(u'Christian World News', u'http://www.cbn.com/cbnnews/shows/cwn/feed/'),
|
||||
(u'Health and Science', u'http://www.cbn.com/cbnnews/healthscience/feed/'),
|
||||
(u'Finance', u'http://www.cbn.com/cbnnews/finance/feed/'),
|
||||
]
|
||||
|
16
recipes/cd_action.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CD_Action(BasicNewsRecipe):
|
||||
title = u'CD-Action'
|
||||
__author__ = 'fenuks'
|
||||
description = 'cdaction.pl - polish magazine about games site'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
|
||||
keep_only_tags= dict(id='news_content')
|
||||
remove_tags_after= dict(name='div', attrs={'class':'tresc'})
|
||||
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
|
128
recipes/cio_magazine.recipe
Normal file
@ -0,0 +1,128 @@
|
||||
# Los primeros comentarios son las dificultades que he tenido con el Piton
|
||||
# Cuando da error UTF8 revisa los comentarios (acentos). En notepad++ Search, Goto, posicion y lo ves.
|
||||
# Editar con Notepad++ Si pone - donde no debe es que ha indentado mal... Edit - Blank operations - tab to space
|
||||
# He entendido lo que significa el from... son paths dentro de pylib.zip...
|
||||
# Con from importa solo un simbolo...con import,la libreria completa
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
# sys no hace falta... lo intente usar para escribir en stderr
|
||||
from calibre import strftime
|
||||
# Para convertir el tiempo del articulo
|
||||
import string, re
|
||||
# Para usar expresiones regulares
|
||||
# Visto en pylib.zip... la primera letra es mayuscula
|
||||
# Estas dos ultimas han sido un vago intento de establecer una cookie (no usado)
|
||||
|
||||
class CIO_Magazine(BasicNewsRecipe):
|
||||
title = 'CIO Magazine'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
__author__ = 'Julio Map'
|
||||
description = 'CIO is the leading information brand for today-s busy Chief information Officer - CIO Magazine bi-monthly '
|
||||
language = 'en'
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.cio.com/homepage/images/hp-cio-logo-linkedin.png'
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'id':'container'})
|
||||
# Absolutamente innecesario... al final he visto un print_version (ver mas adelante)
|
||||
|
||||
# Dentro de una revista dada...
|
||||
# issue_details contiene el titulo y las secciones de este ejemplar
|
||||
# DetailModule esta dentro de issue_details contiene las urls y resumenes
|
||||
# Dentro de un articulo dado...
|
||||
# Article-default-body contiene el texto. Pero como digo, he encontrado una print_version
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
def print_version(self,url):
|
||||
# A esta funcion le llama el sistema... no hay que llamarla uno mismo (porque seria llamada dos veces)
|
||||
# Existe una version imprimible de los articulos cambiando
|
||||
# http://www.cio.com/article/<num>/<titulo> por
|
||||
# http://www.cio.com/article/print/<num> que contiene todas las paginas dentro del div id=container
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.cio.com'+url
|
||||
segments = url.split('/')
|
||||
printURL = '/'.join(segments[0:4]) + '/print/' + segments[4] +'#'
|
||||
return printURL
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
###########################################################################
|
||||
# This method should be implemented in recipes that parse a website
|
||||
# instead of feeds to generate a list of articles. Typical uses are for
|
||||
# news sources that have a Print Edition webpage that lists all the
|
||||
# articles in the current print edition. If this function is implemented,
|
||||
# it will be used in preference to BasicNewsRecipe.parse_feeds().
|
||||
#
|
||||
# It must return a list. Each element of the list must be a 2-element
|
||||
# tuple of the form ('feed title', list of articles).
|
||||
#
|
||||
# Each list of articles must contain dictionaries of the form:
|
||||
#
|
||||
# {
|
||||
# 'title' : article title,
|
||||
# 'url' : URL of print version,
|
||||
# 'date' : The publication date of the article as a string,
|
||||
# 'description' : A summary of the article
|
||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
# }
|
||||
#
|
||||
# For an example, see the recipe for downloading The Atlantic.
|
||||
# In addition, you can add 'author' for the author of the article.
|
||||
###############################################################################
|
||||
|
||||
# Primero buscamos cual es la ultima revista que se ha creado
|
||||
soupinicial = self.index_to_soup('http://www.cio.com/magazine')
|
||||
# Es el primer enlace que hay en el DIV con class content_body
|
||||
a= soupinicial.find(True, attrs={'class':'content_body'}).find('a', href=True)
|
||||
INDEX = re.sub(r'\?.*', '', a['href'])
|
||||
# Como cio.com usa enlaces relativos, le anteponemos el domain name.
|
||||
if INDEX.startswith('/'): # protegiendonos de que dejen de usarlos
|
||||
INDEX = 'http://www.cio.com'+INDEX
|
||||
# Y nos aseguramos en los logs que lo estamos haciendo bien
|
||||
print ("INDEX en parse_index: ", INDEX)
|
||||
|
||||
# Ya sabemos cual es la revista... procesemosla.
|
||||
soup = self.index_to_soup(INDEX)
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
feeds = []
|
||||
# Para empezar nos quedamos solo con dos DIV, 'heading' y ' issue_item'
|
||||
# Del primero sacamos las categorias (key) y del segundo las urls y resumenes
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['heading', 'issue_item']}):
|
||||
|
||||
if div['class'] == 'heading':
|
||||
key = string.capwords(self.tag_to_string(div.span))
|
||||
print ("Key: ",key) # Esto es para depurar
|
||||
articles[key] = []
|
||||
feeds.append(key)
|
||||
|
||||
elif div['class'] == 'issue_item':
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
print("url: ",url) # Esto es para depurar
|
||||
title = self.tag_to_string(a, use_alt=True).strip() # Ya para nota, quitar al final las dos ultimas palabras
|
||||
pubdate = strftime('%a, %d %b') # No es la fecha de publicacion sino la de colecta
|
||||
summary = div.find('p') # Dentro de la div 'issue_item' el unico parrafo que hay es el resumen
|
||||
description = '' # Si hay summary la description sera el summary... si no, la dejamos en blanco
|
||||
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
print ("Description = ", description)
|
||||
|
||||
|
||||
feed = key if key is not None else 'Uncategorized' # Esto esta copiado del NY times
|
||||
if not articles.has_key(feed):
|
||||
articles[feed] = []
|
||||
if not 'podcasts' in url:
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description,
|
||||
content=''))
|
||||
feeds = [(key, articles[key]) for key in feeds if articles.has_key(key)]
|
||||
return feeds
|
@ -28,11 +28,12 @@ class CNN(BasicNewsRecipe):
|
||||
(re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''),
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(id='cnnContentContainer')]
|
||||
keep_only_tags = [dict(id=['cnnContentContainer', 'storycontent'])]
|
||||
remove_tags = [
|
||||
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
||||
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
||||
'cnn_strycntntrgt']},
|
||||
'cnn_strycntntrgt', 'hed_side', 'foot']},
|
||||
dict(id=['ie_column']),
|
||||
]
|
||||
|
||||
|
||||
|
@ -1,40 +1,10 @@
|
||||
import re
|
||||
from lxml.html import parse
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Counterpunch(BasicNewsRecipe):
|
||||
'''
|
||||
Parses counterpunch.com for articles
|
||||
'''
|
||||
title = 'Counterpunch'
|
||||
description = 'Daily political opinion from www.Counterpunch.com'
|
||||
language = 'en'
|
||||
__author__ = 'O. Emmerson'
|
||||
keep_only_tags = [dict(name='td', attrs={'width': '522'})]
|
||||
max_articles_per_feed = 10
|
||||
title = u'Counterpunch'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
title, url = 'Counterpunch', 'http://www.counterpunch.com'
|
||||
articles = self.parse_page(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def parse_page(self, url):
|
||||
parsed_page = parse(url).getroot()
|
||||
articles = []
|
||||
unwanted_text = re.compile('Website\ of\ the|I\ urge\ you|Subscribe\ now|DONATE|\@asis\.com|donation\ button|click\ over\ to\ our')
|
||||
parsed_articles = [a for a in parsed_page.cssselect("html>body>table tr>td>p[class='style2']") if not unwanted_text.search(a.text_content())]
|
||||
for art in parsed_articles:
|
||||
try:
|
||||
author = art.text
|
||||
title = art.cssselect("a")[0].text + ' by {0}'.format(author)
|
||||
art_url = 'http://www.counterpunch.com/' + art.cssselect("a")[0].attrib['href']
|
||||
articles.append({'title': title, 'url': art_url})
|
||||
except Exception as e:
|
||||
e
|
||||
#print('Handler Error: ', e, 'title :', a.text_content())
|
||||
pass
|
||||
return articles
|
||||
feeds = [(u'Counterpunch', u'http://www.counterpunch.org/category/article/feed/')]
|
||||
|
||||
|
47
recipes/cvecezla.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
cvecezla.wordpress.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CveceZla(BasicNewsRecipe):
|
||||
title = 'Cvece zla i naopakog'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Haoticnost razmisljanja poradja haoticnost pisanja. Muzika, stripovi, igre, knjige, generalno glupiranje...'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'sr'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publication_type = 'blog'
|
||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{display: block } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'igre, muzika, film, blog, Srbija'
|
||||
, 'publisher': 'Mehmet Krljic'
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags_before = dict(attrs={'class':'navigation'})
|
||||
remove_tags_after = dict(attrs={'class':'commentlist'})
|
||||
remove_tags = [
|
||||
dict(attrs={'class':['postmetadata alt','sharedaddy sharedaddy-dark sd-like-enabled sd-sharing-enabled','reply','navigation']})
|
||||
,dict(attrs={'id':'respond'})
|
||||
]
|
||||
|
||||
feeds = [(u'Clanci', u'http://cvecezla.wordpress.com/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
21
recipes/dobreprogamy.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
title = 'Dobreprogramy.pl'
|
||||
__author__ = 'fenuks'
|
||||
__licence__ ='GPL v3'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
extra_css = '.title {font-size:22px;}'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
|
||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
@ -77,32 +77,23 @@ class Economist(BasicNewsRecipe):
|
||||
continue
|
||||
self.log('Found section: %s'%section_title)
|
||||
articles = []
|
||||
for h5 in section.findAll('h5'):
|
||||
article_title = self.tag_to_string(h5).strip()
|
||||
if not article_title:
|
||||
continue
|
||||
data = h5.findNextSibling(attrs={'class':'article'})
|
||||
if data is None: continue
|
||||
a = data.find('a', href=True)
|
||||
if a is None: continue
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
article_title += ': %s'%self.tag_to_string(a).strip()
|
||||
articles.append({'title':article_title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
if not articles:
|
||||
# We have last or first section
|
||||
for art in section.findAll(attrs={'class':'article'}):
|
||||
a = art.find('a', href=True)
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
title = self.tag_to_string(a)
|
||||
if title:
|
||||
articles.append({'title':title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
subsection = ''
|
||||
for node in section.findAll(attrs={'class':'article'}):
|
||||
subsec = node.findPreviousSibling('h5')
|
||||
if subsec is not None:
|
||||
subsection = self.tag_to_string(subsec)
|
||||
prefix = (subsection+': ') if subsection else ''
|
||||
a = node.find('a', href=True)
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
title = self.tag_to_string(a)
|
||||
if title:
|
||||
title = prefix + title
|
||||
self.log('\tFound article:', title)
|
||||
articles.append({'title':title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
|
@ -69,32 +69,23 @@ class Economist(BasicNewsRecipe):
|
||||
continue
|
||||
self.log('Found section: %s'%section_title)
|
||||
articles = []
|
||||
for h5 in section.findAll('h5'):
|
||||
article_title = self.tag_to_string(h5).strip()
|
||||
if not article_title:
|
||||
continue
|
||||
data = h5.findNextSibling(attrs={'class':'article'})
|
||||
if data is None: continue
|
||||
a = data.find('a', href=True)
|
||||
if a is None: continue
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
article_title += ': %s'%self.tag_to_string(a).strip()
|
||||
articles.append({'title':article_title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
if not articles:
|
||||
# We have last or first section
|
||||
for art in section.findAll(attrs={'class':'article'}):
|
||||
a = art.find('a', href=True)
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
title = self.tag_to_string(a)
|
||||
if title:
|
||||
articles.append({'title':title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
subsection = ''
|
||||
for node in section.findAll(attrs={'class':'article'}):
|
||||
subsec = node.findPreviousSibling('h5')
|
||||
if subsec is not None:
|
||||
subsection = self.tag_to_string(subsec)
|
||||
prefix = (subsection+': ') if subsection else ''
|
||||
a = node.find('a', href=True)
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||
url += '/print'
|
||||
title = self.tag_to_string(a)
|
||||
if title:
|
||||
title = prefix + title
|
||||
self.log('\tFound article:', title)
|
||||
articles.append({'title':title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
|
40
recipes/el_mostrador.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1313609361(BasicNewsRecipe):
|
||||
news = True
|
||||
title = u'El Mostrador'
|
||||
__author__ = 'Alex Mitrani'
|
||||
description = u'Chilean online newspaper'
|
||||
publisher = u'La Plaza S.A.'
|
||||
category = 'news, rss'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
language = 'es_CL'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.elmostrador.cl/assets/img/logo-elmostrador-m.jpg'
|
||||
remove_tags_before = dict(name='div', attrs={'class':'news-heading cf'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'footer-actions cf'})
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer-actions cb cf'})
|
||||
,dict(name='div', attrs={'class':'news-aside fl'})
|
||||
,dict(name='div', attrs={'class':'footer-actions cf'})
|
||||
,dict(name='div', attrs={'class':'user-bar','id':'top'})
|
||||
,dict(name='div', attrs={'class':'indicators'})
|
||||
,dict(name='div', attrs={'id':'header'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Temas Destacados'
|
||||
, u'http://www.elmostrador.cl/destacado/feed/')
|
||||
, (u'El D\xeda', u'http://www.elmostrador.cl/dia/feed/')
|
||||
, (u'Pa\xeds', u'http://www.elmostrador.cl/noticias/pais/feed/')
|
||||
, (u'Mundo', u'http://www.elmostrador.cl/noticias/mundo/feed/')
|
||||
, (u'Negocios', u'http://www.elmostrador.cl/noticias/negocios/feed/')
|
||||
, (u'Cultura', u'http://www.elmostrador.cl/noticias/cultura/feed/')
|
||||
, (u'Vida en L\xednea', u'http://www.elmostrador.cl/vida-en-linea/feed/')
|
||||
, (u'Opini\xf3n & Blogs', u'http://www.elmostrador.cl/opinion/feed/')
|
||||
]
|
||||
|
15
recipes/elektroda_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Elektroda(BasicNewsRecipe):
|
||||
title = u'Elektroda'
|
||||
oldest_article = 8
|
||||
__author__ = 'fenuks'
|
||||
description = 'Elektroda.pl'
|
||||
cover_url = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif'
|
||||
category = 'electronics'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_before=dict(name='span', attrs={'class':'postbody'})
|
||||
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
||||
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
||||
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
113
recipes/fairbanks_daily.recipe
Normal file
@ -0,0 +1,113 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FairbanksDailyNewsminer(BasicNewsRecipe):
|
||||
title = u'Fairbanks Daily News-miner'
|
||||
__author__ = 'Roger'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
description = 'The voice of interior Alaska since 1903'
|
||||
publisher = 'http://www.newsminer.com/'
|
||||
category = 'news, Alaska, Fairbanks'
|
||||
language = 'en'
|
||||
|
||||
# Make article titles, author and date bold, italic or small font.
|
||||
# http://assets.matchbin.com/sites/635/stylesheets/newsminer.com.css
|
||||
# (signature_line contains date, views, comments)
|
||||
extra_css = '''
|
||||
.story_item_headline { font-size: medium; font-weight: bold; }
|
||||
.story_item_author { font-size: small; font-style:italic; }
|
||||
.signature_line { font-size: small; }
|
||||
'''
|
||||
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'utf8'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
|
||||
# TODO: The News-miner cover image seems a bit small. Can this be enlarged by 10-30%?
|
||||
masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
|
||||
|
||||
|
||||
# In order to omit seeing number of views, number of posts and the pipe
|
||||
# symbol for divider after the title and date of the article, a regex or
|
||||
# manual processing is needed to get just the "story_item_date updated"
|
||||
# (which contains the date). Everything else on this line is pretty much not needed.
|
||||
#
|
||||
# Currently, you will see the following:
|
||||
# | Aug 24, 2011 | 654 views | 6 | |
|
||||
# (ie. 6 comments)
|
||||
#
|
||||
# HTML line containing story_item_date:
|
||||
# <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span> | 2370 views | 52 <a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a> | <span id="number_recommendations_15183753" class="number_recommendations">9</span> <a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a> | <a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a> | <span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div>
|
||||
|
||||
# The following was suggested, but it looks like I also need to define self & soup
|
||||
# (as well as bring in extra soup depends?)
|
||||
#date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
|
||||
|
||||
#preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
|
||||
#preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
|
||||
|
||||
#preprocess_regexps = [
|
||||
# (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
# ]
|
||||
|
||||
#def get_browser(self):
|
||||
#def preprocess_html(soup, first_fetch):
|
||||
# date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
|
||||
# return
|
||||
|
||||
#preprocess_regexps = [(re.compile(r' |.*?', re.DOTALL), lambda m: '')]
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
#dict(name='div', attrs={'class':'hnews hentry item'}),
|
||||
dict(name='div', attrs={'class':'story_item_headline entry-title'}),
|
||||
#dict(name='div', attrs={'class':'story_item_author'}),
|
||||
#dict(name='span', attrs={'class':'story_item_date updated'}),
|
||||
#dict(name='div', attrs={'class':'story_item_author'}),
|
||||
dict(name='div', attrs={'class':'full_story'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
# Try getting rid of some signature_line (date line) stuff
|
||||
#dict(name='img', attrs={'alt'}),
|
||||
dict(name='img', attrs={'class':'dont_touch_me'}),
|
||||
dict(name='span', attrs={'class':'number_recommendations'}),
|
||||
#dict(name='div', attrs={'class':'signature_line'}),
|
||||
|
||||
# Removes div within <!-- AddThis Button BEGIN --> <!-- AddThis Button END -->
|
||||
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
|
||||
|
||||
dict(name='div', attrs={'class':'related_content'}),
|
||||
dict(name='div', attrs={'id':'comments_container'})
|
||||
]
|
||||
|
||||
|
||||
# Comment-out or uncomment any of the following RSS feeds according to your
|
||||
# liking.
|
||||
#
|
||||
# TODO: Some random bits of text might be trailing the last page (or TOC on
|
||||
# MOBI files), these are bits of public posts and comments and need to also
|
||||
# be removed.
|
||||
#
|
||||
feeds = [
|
||||
(u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
|
||||
(u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
|
||||
(u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
|
||||
(u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
|
||||
(u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
|
||||
(u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
|
||||
#(u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
|
||||
(u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
|
||||
#(u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
|
||||
#(u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
|
||||
(u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
|
||||
(u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
|
||||
#(u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
|
||||
(u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
|
||||
#(u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
|
||||
]
|
||||
|
40
recipes/film_web.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Filmweb_pl(BasicNewsRecipe):
|
||||
title = u'FilmWeb'
|
||||
__author__ = 'fenuks'
|
||||
description = 'FilmWeb - biggest polish movie site'
|
||||
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
|
||||
category = 'movies'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = '.hdrBig {font-size:22px;}'
|
||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||
(u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
|
||||
(u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
|
||||
(u'News / Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
|
||||
(u'News / Dystrybucja dvd / blu-ray', u'http://www.filmweb.pl/feed/news/category/video'),
|
||||
(u'News / Dystrybucja kinowa', u'http://www.filmweb.pl/feed/news/category/cinema'),
|
||||
(u'News / off', u'http://www.filmweb.pl/feed/news/category/off'),
|
||||
(u'News / Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
|
||||
(u'News / Organizacje branżowe', u'http://www.filmweb.pl/feed/news/category/organizations'),
|
||||
(u'News / Internet', u'http://www.filmweb.pl/feed/news/category/internet'),
|
||||
(u'News / Różne', u'http://www.filmweb.pl/feed/news/category/other'),
|
||||
(u'News / Kino polskie', u'http://www.filmweb.pl/feed/news/category/polish.cinema'),
|
||||
(u'News / Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
|
||||
(u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
|
||||
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn('skip_tag')
|
||||
self.log.warn(skip_tag)
|
||||
return self.index_to_soup(skip_tag['href'], raw=True)
|
||||
|
@ -5,6 +5,7 @@ www.ft.com/uk-edition
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -22,8 +23,11 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
needs_subscription = True
|
||||
encoding = 'utf8'
|
||||
publication_type = 'newspaper'
|
||||
articles_are_obfuscated = True
|
||||
temp_files = []
|
||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
||||
INDEX = 'http://www.ft.com/uk-edition'
|
||||
PREFIX = 'http://www.ft.com'
|
||||
|
||||
@ -39,14 +43,19 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.open(self.LOGIN2)
|
||||
br.select_form(name='loginForm')
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
|
||||
,dict(name='div', attrs={'class':'standfirst'})
|
||||
,dict(name='div', attrs={'id' :'storyContent'})
|
||||
,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'floating-con'})
|
||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||
@ -68,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
|
||||
def get_artlinks(self, elem):
|
||||
articles = []
|
||||
count = 0
|
||||
for item in elem.findAll('a',href=True):
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return articles
|
||||
rawlink = item['href']
|
||||
if rawlink.startswith('http://'):
|
||||
url = rawlink
|
||||
else:
|
||||
url = self.PREFIX + rawlink
|
||||
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||
title = self.tag_to_string(item)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'url' :urlverified
|
||||
,'description':''
|
||||
})
|
||||
return articles
|
||||
@ -96,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
st = wide.find('h4',attrs={'class':'section-no-arrow'})
|
||||
if st:
|
||||
strest.insert(0,st)
|
||||
count = 0
|
||||
for item in strest:
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return feeds
|
||||
ftitle = self.tag_to_string(item)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||
feedarts = self.get_artlinks(item.parent.ul)
|
||||
@ -135,4 +153,19 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
if cdate.isoweekday() == 7:
|
||||
cdate -= datetime.timedelta(days=1)
|
||||
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
count = 0
|
||||
while (count < 10):
|
||||
try:
|
||||
response = self.browser.open(url)
|
||||
html = response.read()
|
||||
count = 10
|
||||
except:
|
||||
print "Retrying download..."
|
||||
count += 1
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
39
recipes/fluter_de.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch fluter.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1313693926(BasicNewsRecipe):
|
||||
|
||||
title = u'Fluter'
|
||||
description = 'fluter.de Magazin der Bundeszentrale für politische Bildung/bpb'
|
||||
language = 'de'
|
||||
encoding = 'UTF-8'
|
||||
|
||||
__author__ = 'Armin Geller' # 2011-08-19
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':["comments"]}),
|
||||
dict(attrs={'class':['commentlink']}),
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':["grid_8 articleText"]}),
|
||||
dict(name='div', attrs={'class':["articleTextInnerText"]}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'),
|
||||
]
|
||||
|
||||
extra_css = '.cs_img {margin-right: 10pt;}'
|
||||
|
@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime, timedelta
|
||||
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
||||
@ -16,7 +17,7 @@ class FolhaOnline(BasicNewsRecipe):
|
||||
news = True
|
||||
|
||||
title = u'Folha de S\xE3o Paulo'
|
||||
__author__ = 'Euler Alves'
|
||||
__author__ = 'Euler Alves and Alex Mitrani'
|
||||
description = u'Brazilian news from Folha de S\xE3o Paulo'
|
||||
publisher = u'Folha de S\xE3o Paulo'
|
||||
category = 'news, rss'
|
||||
@ -62,37 +63,50 @@ class FolhaOnline(BasicNewsRecipe):
|
||||
,dict(name='div',
|
||||
attrs={'class':[
|
||||
'openBox adslibraryArticle'
|
||||
,'toolbar'
|
||||
]})
|
||||
|
||||
,dict(name='a')
|
||||
,dict(name='iframe')
|
||||
,dict(name='link')
|
||||
,dict(name='script')
|
||||
,dict(name='li')
|
||||
]
|
||||
remove_tags_after = dict(name='div',attrs={'id':'articleEnd'})
|
||||
|
||||
feeds = [
|
||||
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
|
||||
,(u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml')
|
||||
,(u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml')
|
||||
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
|
||||
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
|
||||
,(u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml')
|
||||
,(u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml')
|
||||
,(u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml')
|
||||
,(u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml')
|
||||
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
|
||||
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
|
||||
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
|
||||
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
|
||||
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
|
||||
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
|
||||
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
|
||||
,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/')
|
||||
,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/')
|
||||
,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/')
|
||||
,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/')
|
||||
,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml')
|
||||
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml')
|
||||
,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml')
|
||||
,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml')
|
||||
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml')
|
||||
,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml')
|
||||
,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml')
|
||||
,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml')
|
||||
,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml')
|
||||
,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
|
||||
,(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml')
|
||||
,(u'Zapping', u'http://feeds.folha.uol.com.br/colunas/zapping/rss091.xml')
|
||||
,(u'Cida Santos', u'http://feeds.folha.uol.com.br/colunas/cidasantos/rss091.xml')
|
||||
,(u'Clóvis Rossi', u'http://feeds.folha.uol.com.br/colunas/clovisrossi/rss091.xml')
|
||||
,(u'Eliane Cantanhêde', u'http://feeds.folha.uol.com.br/colunas/elianecantanhede/rss091.xml')
|
||||
,(u'Fernando Canzian', u'http://feeds.folha.uol.com.br/colunas/fernandocanzian/rss091.xml')
|
||||
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/colunas/gilbertodimenstein/rss091.xml')
|
||||
,(u'Hélio Schwartsman', u'http://feeds.folha.uol.com.br/colunas/helioschwartsman/rss091.xml')
|
||||
,(u'Humberto Luiz Peron', u'http://feeds.folha.uol.com.br/colunas/futebolnarede/rss091.xml')
|
||||
,(u'João Pereira Coutinho', u'http://feeds.folha.uol.com.br/colunas/joaopereiracoutinho/rss091.xml')
|
||||
,(u'José Antonio Ramalho', u'http://feeds.folha.uol.com.br/colunas/canalaberto/rss091.xml')
|
||||
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/colunas/kennedyalencar/rss091.xml')
|
||||
,(u'Luiz Caversan', u'http://feeds.folha.uol.com.br/colunas/luizcaversan/rss091.xml')
|
||||
,(u'Luiz Rivoiro', u'http://feeds.folha.uol.com.br/colunas/paiepai/rss091.xml')
|
||||
,(u'Marcelo Leite', u'http://feeds.folha.uol.com.br/colunas/marceloleite/rss091.xml')
|
||||
,(u'Sérgio Malbergier', u'http://feeds.folha.uol.com.br/colunas/sergiomalbergier/rss091.xml')
|
||||
,(u'Sylvia Colombo', u'http://feeds.folha.uol.com.br/colunas/sylviacolombo/rss091.xml')
|
||||
,(u'Valdo Cruz', u'http://feeds.folha.uol.com.br/colunas/valdocruz/rss091.xml')
|
||||
]
|
||||
|
||||
|
||||
|
26
recipes/gildia_pl.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Gildia(BasicNewsRecipe):
|
||||
title = u'Gildia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gildia - cultural site'
|
||||
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
|
||||
category = 'culture'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets=True
|
||||
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
|
||||
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
|
||||
feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')]
|
||||
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
content = soup.find('div', attrs={'class':'news'})
|
||||
skip_tag= content.findAll(name='a')
|
||||
if skip_tag is not None:
|
||||
for link in skip_tag:
|
||||
if 'recenzja' in link['href']:
|
||||
self.log.warn('odnosnik')
|
||||
self.log.warn(link['href'])
|
||||
return self.index_to_soup(link['href'], raw=True)
|
16
recipes/gram_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Gram_pl(BasicNewsRecipe):
|
||||
title = u'Gram.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gram.pl - site about computer games'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
||||
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
38
recipes/gry_online_pl.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Gry_online_pl(BasicNewsRecipe):
|
||||
title = u'Gry-Online.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gry-Online.pl - computer games'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 13
|
||||
INDEX= 'http://www.gry-online.pl/'
|
||||
cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = 'p.wn1{font-size:22px;}'
|
||||
remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
|
||||
#remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
|
||||
feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl = soup.find('a', attrs={'class':'num_str_nex'})
|
||||
if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
|
||||
appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
|
||||
if nexturl is not None:
|
||||
if 'strona' in nexturl.div.string:
|
||||
nexturl= self.INDEX + nexturl['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
|
||||
for tag in pagetext:
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, tag)
|
||||
self.append_page(soup2, appendtag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
@ -15,8 +15,10 @@ class Guardian(BasicNewsRecipe):
|
||||
title = u'The Guardian and The Observer'
|
||||
if date.today().weekday() == 6:
|
||||
base_url = "http://www.guardian.co.uk/theobserver"
|
||||
cover_pic = 'Observer digital edition'
|
||||
else:
|
||||
base_url = "http://www.guardian.co.uk/theguardian"
|
||||
cover_pic = 'Guardian digital edition'
|
||||
|
||||
__author__ = 'Seabound and Sujata Raman'
|
||||
language = 'en_GB'
|
||||
@ -79,7 +81,7 @@ class Guardian(BasicNewsRecipe):
|
||||
# soup = self.index_to_soup("http://www.guardian.co.uk/theobserver")
|
||||
soup = self.index_to_soup(self.base_url)
|
||||
# find cover pic
|
||||
img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
|
||||
img = soup.find( 'img',attrs ={'alt':self.cover_pic})
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
# end find cover pic
|
||||
|
50
recipes/h7_tumspor.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Haber7TS (BasicNewsRecipe):
|
||||
|
||||
title = u'H7 TÜMSPOR'
|
||||
__author__ = u'thomass'
|
||||
description = ' Haber 7 TÜMSPOR sitesinden tüm branşlarda spor haberleri '
|
||||
oldest_article =2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'ISO 8859-9'
|
||||
publisher = 'thomass'
|
||||
category = 'güncel, haber, türkçe,spor,futbol'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
extra_css = ' #newsheadcon h1{font-weight: bold; font-size: 18px;color:#0000FF} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['intNews','leftmidmerge']})]
|
||||
remove_tags = [dict(name='div', attrs={'id':['blocktitle','banner46860body']}),dict(name='div', attrs={'class':[ 'Breadcrumb','shr','mobile/home.jpg','etiket','yorumYazNew','shr','y-list','banner','lftBannerShowcase','comments','interNews','lftBanner','midblock','rightblock','comnum','commentcon',]}) ,dict(name='a', attrs={'class':['saveto','sendto','comlink','newsshare',]}),dict(name='iframe', attrs={'name':['frm111','frm107']}) ,dict(name='ul', attrs={'class':['nocPagi','leftmidmerge']})]
|
||||
cover_img_url = 'http://image.tumspor.com/v2/images/tasarim/images/logo.jpg'
|
||||
masthead_url = 'http://image.tumspor.com/v2/images/tasarim/images/logo.jpg'
|
||||
remove_empty_feeds= True
|
||||
|
||||
feeds = [
|
||||
( u'Futbol', u'http://open.dapper.net/services/h7tsfutbol'),
|
||||
( u'Basketbol', u'http://open.dapper.net/services/h7tsbasket'),
|
||||
( u'Tenis', u'http://open.dapper.net/services/h7tstenis'),
|
||||
( u'NBA', u'http://open.dapper.net/services/h7tsnba'),
|
||||
( u'Diğer Sporlar', u'http://open.dapper.net/services/h7tsdiger'),
|
||||
( u'Yazarlar & Magazin', u'http://open.dapper.net/services/h7tsyazarmagazin'),
|
||||
]
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
# def print_version(self, url):
|
||||
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
60
recipes/haber7.recipe
Normal file
@ -0,0 +1,60 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Haber7 (BasicNewsRecipe):
|
||||
|
||||
title = u'Haber 7'
|
||||
__author__ = u'thomass'
|
||||
description = ' Haber 7 sitesinden haberler '
|
||||
oldest_article =2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'ISO 8859-9'
|
||||
publisher = 'thomass'
|
||||
category = 'güncel, haber, türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
extra_css = 'body{ font-size: 12px}h2{font-weight: bold; font-size: 18px;color:#0000FF} #newsheadcon h1{font-weight: bold; font-size: 18px;color:#0000FF}'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['intNews','leftmidmerge']})]
|
||||
remove_tags = [dict(name='div', attrs={'id':['blocktitle','banner46860body']}),dict(name='div', attrs={'class':[ 'Breadcrumb','shr','mobile/home.jpg','etiket','yorumYazNew','shr','y-list','banner','lftBannerShowcase','comments','interNews','lftBanner','midblock','rightblock','comnum','commentcon',]}) ,dict(name='a', attrs={'class':['saveto','sendto','comlink','newsshare',]}),dict(name='iframe', attrs={'name':['frm111','frm107']}) ,dict(name='ul', attrs={'class':['nocPagi','leftmidmerge']})]
|
||||
|
||||
cover_img_url = 'http://dl.dropbox.com/u/39726752/haber7.JPG'
|
||||
masthead_url = 'http://dl.dropbox.com/u/39726752/haber7.JPG'
|
||||
remove_empty_feeds= True
|
||||
|
||||
feeds = [
|
||||
( u'Siyaset', u'http://open.dapper.net/services/h7siyaset'),
|
||||
( u'Güncel', u'http://open.dapper.net/services/h7guncel'),
|
||||
( u'Yaşam', u'http://open.dapper.net/services/h7yasam'),
|
||||
( u'Ekonomi', u'http://open.dapper.net/services/h7ekonomi'),
|
||||
( u'3. Sayfa', u'http://open.dapper.net/services/h73sayfa'),
|
||||
( u'Dünya', u'http://open.dapper.net/services/h7dunya'),
|
||||
( u'Medya', u'http://open.dapper.net/services/h7medya'),
|
||||
|
||||
( u'Yazarlar', u'http://open.dapper.net/services/h7yazarlar'),
|
||||
( u'Bilim', u'http://open.dapper.net/services/h7bilim'),
|
||||
( u'Eğitim', u'http://open.dapper.net/services/h7egitim'),
|
||||
( u'Spor', u'http://open.dapper.net/services/h7sporv3'),
|
||||
|
||||
|
||||
]
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
# def print_version(self, url):
|
||||
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
123
recipes/hackernews.recipe
Normal file
@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
'''
|
||||
Hacker News
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from urlparse import urlparse
|
||||
import re
|
||||
|
||||
class HackerNews(BasicNewsRecipe):
|
||||
title = 'Hacker News'
|
||||
__author__ = 'Tom Scholl'
|
||||
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
|
||||
publisher = 'Y Combinator'
|
||||
category = 'news, programming, it, technology'
|
||||
masthead_url = 'http://img585.imageshack.us/img585/5011/hnle.png'
|
||||
cover_url = 'http://img585.imageshack.us/img585/5011/hnle.png'
|
||||
delay = 1
|
||||
max_articles_per_feed = 30
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
language = 'en'
|
||||
requires_version = (0,8,16)
|
||||
|
||||
feeds = [
|
||||
(u'Hacker News', 'http://news.ycombinator.com/rss')
|
||||
]
|
||||
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
def get_readable_content(self, url):
|
||||
self.log('get_readable_content(' + url + ')')
|
||||
br = self.get_browser()
|
||||
f = br.open(url)
|
||||
html = f.read()
|
||||
f.close()
|
||||
|
||||
return self.extract_readable_article(html, url)
|
||||
|
||||
def get_hn_content(self, url):
|
||||
self.log('get_hn_content(' + url + ')')
|
||||
soup = self.index_to_soup(url)
|
||||
main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td
|
||||
|
||||
title = self.tag_to_string(main.find('td', 'title'))
|
||||
link = main.find('td', 'title').find('a')['href']
|
||||
if link.startswith('item?'):
|
||||
link = 'http://news.ycombinator.com/' + link
|
||||
readable_link = link.rpartition('http://')[2].rpartition('https://')[2]
|
||||
subtext = self.tag_to_string(main.find('td', 'subtext'))
|
||||
|
||||
title_content_td = main.find('td', 'title').findParent('tr').findNextSiblings('tr', limit=3)[2].findAll('td', limit=2)[1]
|
||||
title_content = u''
|
||||
if not title_content_td.find('form'):
|
||||
title_content_td.name ='div'
|
||||
title_content = title_content_td.prettify()
|
||||
|
||||
comments = u''
|
||||
for td in main.findAll('td', 'default'):
|
||||
comhead = td.find('span', 'comhead')
|
||||
if comhead:
|
||||
com_title = u'<h4>' + self.tag_to_string(comhead).replace(' | link', '') + u'</h4>'
|
||||
comhead.parent.extract()
|
||||
br = td.find('br')
|
||||
if br:
|
||||
br.extract()
|
||||
reply = td.find('a', attrs = {'href' : re.compile('^reply?')})
|
||||
if reply:
|
||||
reply.parent.extract()
|
||||
td.name = 'div'
|
||||
indent_width = (int(td.parent.find('td').img['width']) * 2) / 3
|
||||
td['style'] = 'padding-left: ' + str(indent_width) + 'px'
|
||||
comments = comments + com_title + td.prettify()
|
||||
|
||||
body = u'<h3>' + title + u'</h3><p><a href="' + link + u'">' + readable_link + u'</a><br/><strong>' + subtext + u'</strong></p>' + title_content + u'<br/>'
|
||||
body = body + comments
|
||||
return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
if url.startswith('http://news.ycombinator.com'):
|
||||
content = self.get_hn_content(url)
|
||||
else:
|
||||
# TODO: use content-type header instead of url
|
||||
is_image = False
|
||||
for ext in ['.jpg', '.png', '.svg', '.gif', '.jpeg', '.tiff', '.bmp',]:
|
||||
if url.endswith(ext):
|
||||
is_image = True
|
||||
break
|
||||
|
||||
if is_image:
|
||||
self.log('using image_content (' + url + ')')
|
||||
content = u'<html><body><img src="' + url + u'"></body></html>'
|
||||
else:
|
||||
content = self.get_readable_content(url)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(content)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
def is_link_wanted(self, url, tag):
|
||||
if url.endswith('.pdf'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def prettyify_url(self, url):
|
||||
return urlparse(url).hostname
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.text_summary = self.prettyify_url(article.url)
|
||||
article.summary = article.text_summary
|
||||
|
||||
# def parse_index(self):
|
||||
# feeds = []
|
||||
# feeds.append((u'Hacker News',[{'title': 'Testing', 'url': 'http://news.ycombinator.com/item?id=2935944'}]))
|
||||
# return feeds
|
||||
|
||||
|
||||
|
@ -11,8 +11,15 @@ class HBR(BasicNewsRecipe):
|
||||
timefmt = ' [%B %Y]'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
||||
' to log into their website. This is unsupported in calibre, so'
|
||||
' this recipe has been disabled. If you would like to see '
|
||||
' HBR supported in calibre, contact hbr.org and ask them'
|
||||
' to provide a javascript free login method.')
|
||||
|
||||
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
||||
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
||||
|
||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||
|
||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||
@ -34,17 +41,23 @@ class HBR(BasicNewsRecipe):
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
self.logout_url = None
|
||||
|
||||
#'''
|
||||
br.open(self.LOGIN_URL)
|
||||
br.select_form(name='signin-form')
|
||||
br['signin-form:username'] = self.username
|
||||
br['signin-form:password'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'My Account' not in raw:
|
||||
if '>Sign out<' not in raw:
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
self.logout_url = None
|
||||
link = br.find_link(text='Sign out')
|
||||
if link:
|
||||
self.logout_url = link.absolute_url
|
||||
try:
|
||||
link = br.find_link(text='Sign out')
|
||||
if link:
|
||||
self.logout_url = link.absolute_url
|
||||
except:
|
||||
self.logout_url = self.LOGOUT_URL
|
||||
#'''
|
||||
return br
|
||||
|
||||
def cleanup(self):
|
||||
@ -57,6 +70,8 @@ class HBR(BasicNewsRecipe):
|
||||
|
||||
|
||||
def hbr_get_toc(self):
|
||||
#return self.index_to_soup(open('/t/hbr.html').read())
|
||||
|
||||
today = date.today()
|
||||
future = today + timedelta(days=30)
|
||||
for x in [x.strftime('%y%m') for x in (future, today)]:
|
||||
@ -66,53 +81,43 @@ class HBR(BasicNewsRecipe):
|
||||
return soup
|
||||
raise Exception('Could not find current issue')
|
||||
|
||||
def hbr_parse_section(self, container, feeds):
|
||||
current_section = None
|
||||
current_articles = []
|
||||
for x in container.findAll(name=['li', 'h3', 'h4']):
|
||||
if x.name in ['h3', 'h4'] and not x.findAll(True):
|
||||
if current_section and current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if x.name == 'li':
|
||||
a = x.find('a', href=True)
|
||||
if a is not None:
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href')
|
||||
if '/ar/' not in url:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://hbr.org'+url
|
||||
url = self.map_url(url)
|
||||
p = x.find('p')
|
||||
desc = ''
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
if not title or not url:
|
||||
continue
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
self.log('\t\t\t', desc)
|
||||
current_articles.append({'title':title, 'url':url,
|
||||
'description':desc, 'date':''})
|
||||
if current_section and current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
|
||||
|
||||
def hbr_parse_toc(self, soup):
|
||||
feeds = []
|
||||
features = soup.find(id='issueFeaturesContent')
|
||||
self.hbr_parse_section(features, feeds)
|
||||
departments = soup.find(id='issueDepartments')
|
||||
self.hbr_parse_section(departments, feeds)
|
||||
current_section = None
|
||||
articles = []
|
||||
for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
|
||||
if x.name == 'h3':
|
||||
if current_section is not None and articles:
|
||||
feeds.append((current_section, articles))
|
||||
current_section = self.tag_to_string(x).capitalize()
|
||||
articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
else:
|
||||
a = x.find('a', href=True)
|
||||
if a is None: continue
|
||||
title = self.tag_to_string(a)
|
||||
url = a['href']
|
||||
if '/ar/' not in url:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://hbr.org' + url
|
||||
url = self.map_url(url)
|
||||
p = x.parent.find('p')
|
||||
desc = ''
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
self.log('\t\t\t', desc)
|
||||
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':''})
|
||||
return feeds
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.hbr_get_toc()
|
||||
#open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
||||
feeds = self.hbr_parse_toc(soup)
|
||||
return feeds
|
||||
|
||||
|
@ -5,34 +5,27 @@ class HBR(BasicNewsRecipe):
|
||||
|
||||
title = 'Harvard Business Review Blogs'
|
||||
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
||||
needs_subscription = True
|
||||
__author__ = 'Kovid Goyal, enhanced by BrianG'
|
||||
__author__ = 'Kovid Goyal'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
#recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
||||
# ' to log into their website. This is unsupported in calibre, so'
|
||||
# ' this recipe has been disabled. If you would like to see '
|
||||
# ' HBR supported in calibre, contact hbr.org and ask them'
|
||||
# ' to provide a javascript free login method.')
|
||||
needs_subscription = False
|
||||
|
||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||
LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
|
||||
|
||||
INDEX = 'http://hbr.org/current'
|
||||
|
||||
#
|
||||
# Blog Stuff
|
||||
#
|
||||
|
||||
|
||||
INCLUDE_BLOGS = True
|
||||
INCLUDE_ARTICLES = False
|
||||
|
||||
# option-specific settings.
|
||||
|
||||
if INCLUDE_BLOGS == True:
|
||||
remove_tags_after = dict(id='articleBody')
|
||||
remove_tags_before = dict(id='pageFeature')
|
||||
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
else:
|
||||
timefmt = ' [%B %Y]'
|
||||
|
||||
remove_tags_after = dict(id='articleBody')
|
||||
remove_tags_before = dict(id='pageFeature')
|
||||
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [ dict(name='div', id='pageContainer')
|
||||
]
|
||||
@ -41,21 +34,16 @@ class HBR(BasicNewsRecipe):
|
||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||
'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter']),
|
||||
dict(name='iframe')]
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter', 'shareWidgetTop']),
|
||||
dict(name=['iframe', 'style'])]
|
||||
|
||||
extra_css = '''
|
||||
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
||||
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
||||
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
|
||||
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
|
||||
#articleBody{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
|
||||
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
|
||||
'''
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
self.logout_url = None
|
||||
return br
|
||||
|
||||
#'''
|
||||
br.open(self.LOGIN_URL)
|
||||
br.select_form(name='signin-form')
|
||||
br['signin-form:username'] = self.username
|
||||
@ -63,11 +51,15 @@ class HBR(BasicNewsRecipe):
|
||||
raw = br.submit().read()
|
||||
if 'My Account' not in raw:
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
self.logout_url = None
|
||||
link = br.find_link(text='Sign out')
|
||||
if link:
|
||||
self.logout_url = link.absolute_url
|
||||
try:
|
||||
link = br.find_link(text='Sign out')
|
||||
if link:
|
||||
self.logout_url = link.absolute_url
|
||||
except:
|
||||
self.logout_url = self.LOGOUT_URL
|
||||
#'''
|
||||
return br
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
def cleanup(self):
|
||||
if self.logout_url is not None:
|
||||
@ -76,99 +68,7 @@ class HBR(BasicNewsRecipe):
|
||||
def map_url(self, url):
|
||||
if url.endswith('/ar/1'):
|
||||
return url[:-1]+'pr'
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def hbr_get_toc(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
|
||||
return self.index_to_soup('http://hbr.org'+url)
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def hbr_parse_section(self, container, feeds):
|
||||
current_section = None
|
||||
current_articles = []
|
||||
for x in container.findAll(name=['li', 'h3', 'h4']):
|
||||
if x.name in ['h3', 'h4'] and not x.findAll(True):
|
||||
if current_section and current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if x.name == 'li':
|
||||
a = x.find('a', href=True)
|
||||
if a is not None:
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href')
|
||||
if '/ar/' not in url:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://hbr.org'+url
|
||||
url = self.map_url(url)
|
||||
p = x.find('p')
|
||||
desc = ''
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
if not title or not url:
|
||||
continue
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
self.log('\t\t\t', desc)
|
||||
current_articles.append({'title':title, 'url':url,
|
||||
'description':desc, 'date':''})
|
||||
if current_section and current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def hbr_parse_toc(self, soup):
|
||||
feeds = []
|
||||
features = soup.find(id='issueFeaturesContent')
|
||||
self.hbr_parse_section(features, feeds)
|
||||
departments = soup.find(id='issueDepartments')
|
||||
self.hbr_parse_section(departments, feeds)
|
||||
return feeds
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
def feed_to_index_append(self, feedObject, masterFeed):
|
||||
# Loop thru the feed object and build the correct type of article list
|
||||
for feed in feedObject:
|
||||
# build the correct structure from the feed object
|
||||
newArticles = []
|
||||
for article in feed.articles:
|
||||
newArt = {
|
||||
'title' : article.title,
|
||||
'url' : article.url,
|
||||
'date' : article.date,
|
||||
'description' : article.text_summary
|
||||
}
|
||||
newArticles.append(newArt)
|
||||
|
||||
# Append the earliest/latest dates of the feed to the feed title
|
||||
startDate, endDate = self.get_feed_dates(feed, '%d-%b')
|
||||
newFeedTitle = feed.title + ' (' + startDate + ' thru ' + endDate + ')'
|
||||
|
||||
# append the newly-built list object to the index object passed in
|
||||
# as masterFeed.
|
||||
masterFeed.append( (newFeedTitle,newArticles) )
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
def get_feed_dates(self, feedObject, dateMask):
|
||||
startDate = feedObject.articles[len(feedObject.articles)-1].localtime.strftime(dateMask)
|
||||
endDate = feedObject.articles[0].localtime.strftime(dateMask)
|
||||
|
||||
return startDate, endDate
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def parse_index(self):
|
||||
if self.INCLUDE_ARTICLES == True:
|
||||
soup = self.hbr_get_toc()
|
||||
feeds = self.hbr_parse_toc(soup)
|
||||
else:
|
||||
return BasicNewsRecipe.parse_index(self)
|
||||
|
||||
return feeds
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://hbr.org/current'
|
||||
|
29
recipes/hindustan_times.recipe
Normal file
@ -0,0 +1,29 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Hindustan Times'
|
||||
language = 'en_IN'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
('News',
|
||||
'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
|
||||
('Views',
|
||||
'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
|
||||
('Cricket',
|
||||
'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
|
||||
('Business',
|
||||
'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
|
||||
('Entertainment',
|
||||
'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
|
||||
('Lifestyle',
|
||||
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
|
||||
]
|
||||
|
||||
|
52
recipes/hira.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# coding=utf-8
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Hira(BasicNewsRecipe):
|
||||
title = 'Hira'
|
||||
__author__ = 'thomass'
|
||||
description = 'مجلة حراء مجلة علمية ثقافية فكرية تصدر كل شهرين، تعنى بالعلوم الطبيعية والإنسانية والاجتماعية وتحاور أسرار النفس البشرية وآفاق الكون الشاسعة بالمنظور القرآني الإيماني في تآلف وتناسب بين العلم والإيمان، والعقل والقلب، والفكر والواقع.'
|
||||
oldest_article = 63
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'thomass'
|
||||
category = 'News'
|
||||
language = 'ar'
|
||||
publication_type = 'magazine'
|
||||
extra_css = ' .title-detail-wrap{ font-weight: bold ;text-align:right;color:#FF0000;font-size:25px}.title-detail{ font-family:sans-serif;text-align:right;} '
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
,'base-font-size':'10'
|
||||
}
|
||||
#html2lrf_options = []
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['title-detail']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['clear', 'bbsp']}),
|
||||
]
|
||||
|
||||
remove_attributes = [
|
||||
'width','height'
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'حراء', 'http://open.dapper.net/services/hira'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
@ -1,8 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
import string, pprint
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HoustonChronicle(BasicNewsRecipe):
|
||||
@ -13,53 +11,28 @@ class HoustonChronicle(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_attributes = ['style']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(id=['story-head', 'story'])
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(id=['share-module', 'resource-box',
|
||||
'resource-box-header'])
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
|
||||
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
|
||||
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
|
||||
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
|
||||
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
'''
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
categories = ['news', 'sports', 'business', 'entertainment', 'life',
|
||||
'travel']
|
||||
feeds = []
|
||||
for cat in categories:
|
||||
articles = []
|
||||
soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
|
||||
for elem in soup.findAll(comptype='story', storyid=True):
|
||||
a = elem.find('a', href=True)
|
||||
if a is None: continue
|
||||
url = a['href']
|
||||
if not url.startswith('http://'):
|
||||
url = 'http://www.chron.com'+url
|
||||
articles.append({'title':self.tag_to_string(a), 'url':url,
|
||||
'description':'', 'date':''})
|
||||
pprint.pprint(articles[-1])
|
||||
if articles:
|
||||
feeds.append((string.capwords(cat), articles))
|
||||
return feeds
|
||||
oldest_article = 2.0
|
||||
|
||||
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
|
||||
'hst-articletext' in x or 'hst-galleryitem' in x)}
|
||||
|
||||
feeds = [
|
||||
('News', "http://www.chron.com/rss/feed/News-270.php"),
|
||||
('Sports',
|
||||
'http://www.chron.com/sports/headlines/collectionRss/Sports-Headlines-Staff-Stories-10767.php'),
|
||||
('Neighborhood',
|
||||
'http://www.chron.com/rss/feed/Neighborhood-305.php'),
|
||||
('Business', 'http://www.chron.com/rss/feed/Business-287.php'),
|
||||
('Entertainment',
|
||||
'http://www.chron.com/rss/feed/Entertainment-293.php'),
|
||||
('Editorials',
|
||||
'http://www.chron.com/opinion/editorials/collectionRss/Opinion-Editorials-Headline-List-10567.php'),
|
||||
('Life', 'http://www.chron.com/rss/feed/Life-297.php'),
|
||||
('Science & Tech',
|
||||
'http://www.chron.com/rss/feed/AP-Technology-and-Science-266.php'),
|
||||
]
|
||||
|
||||
|
||||
|
BIN
recipes/icons/adventure_zone_pl.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
recipes/icons/android_com_pl.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
recipes/icons/astro_news_pl.png
Normal file
After Width: | Height: | Size: 625 B |
BIN
recipes/icons/astronomia_pl.png
Normal file
After Width: | Height: | Size: 389 B |
BIN
recipes/icons/bash_org_pl.png
Normal file
After Width: | Height: | Size: 391 B |
BIN
recipes/icons/cd_action.png
Normal file
After Width: | Height: | Size: 972 B |
BIN
recipes/icons/dobreprogamy.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/elektroda_pl.png
Normal file
After Width: | Height: | Size: 1023 B |
BIN
recipes/icons/film_web.png
Normal file
After Width: | Height: | Size: 3.4 KiB |
BIN
recipes/icons/gram_pl.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/gry_online_pl.png
Normal file
After Width: | Height: | Size: 249 B |
BIN
recipes/icons/japan_times.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/niebezpiecznik.png
Normal file
After Width: | Height: | Size: 795 B |
BIN
recipes/icons/rtnews.png
Normal file
After Width: | Height: | Size: 606 B |
BIN
recipes/icons/twitchfilms.png
Normal file
After Width: | Height: | Size: 200 B |
BIN
recipes/icons/ubuntu_pl.png
Normal file
After Width: | Height: | Size: 508 B |
BIN
recipes/icons/wnp.png
Normal file
After Width: | Height: | Size: 576 B |
@ -1,76 +1,25 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class IndiaToday(BasicNewsRecipe):
|
||||
|
||||
title = 'India Today'
|
||||
__author__ = 'Kovid Goyal'
|
||||
language = 'en_IN'
|
||||
timefmt = ' [%d %m, %Y]'
|
||||
|
||||
oldest_article = 700
|
||||
max_articles_per_feed = 10
|
||||
title = u'India Today'
|
||||
language = 'en_IN'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 15 #days
|
||||
max_articles_per_feed = 25
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
remove_tags_before = dict(id='content_story_title')
|
||||
remove_tags_after = dict(id='rightblockdiv')
|
||||
remove_tags = [dict(id=['rightblockdiv', 'share_links'])]
|
||||
|
||||
extra_css = '#content_story_title { font-size: 170%; font-weight: bold;}'
|
||||
conversion_options = { 'linearize_tables': True }
|
||||
|
||||
def it_get_index(self):
|
||||
soup = self.index_to_soup('http://indiatoday.intoday.in/site/archive')
|
||||
a = soup.find('a', href=lambda x: x and 'issueId=' in x)
|
||||
url = 'http://indiatoday.intoday.in/site/'+a.get('href')
|
||||
img = a.find('img')
|
||||
self.cover_url = img.get('src')
|
||||
return self.index_to_soup(url)
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.it_get_index()
|
||||
feeds, current_section, current_articles = [], None, []
|
||||
for x in soup.findAll(name=['h1', 'a']):
|
||||
if x.name == 'h1':
|
||||
if current_section and current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
elif x.name == 'a' and 'Story' in x.get('href', ''):
|
||||
title = self.tag_to_string(x)
|
||||
url = x.get('href')
|
||||
url = url.replace(' ', '%20')
|
||||
if not url.startswith('/'):
|
||||
url = 'http://indiatoday.intoday.in/site/' + url
|
||||
if title and url:
|
||||
url += '?complete=1'
|
||||
self.log('\tFound article:', title)
|
||||
self.log('\t\t', url)
|
||||
desc = ''
|
||||
h3 = x.parent.findNextSibling('h3')
|
||||
if h3 is not None:
|
||||
desc = 'By ' + self.tag_to_string(h3)
|
||||
h4 = h3.findNextSibling('h4')
|
||||
if h4 is not None:
|
||||
desc = self.tag_to_string(h4) + ' ' + desc
|
||||
if desc:
|
||||
self.log('\t\t', desc)
|
||||
current_articles.append({'title':title, 'description':desc,
|
||||
'url':url, 'date':''})
|
||||
|
||||
if current_section and current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
a = soup.find(text='Print')
|
||||
if a is not None:
|
||||
tr = a.findParent('tr')
|
||||
if tr is not None:
|
||||
tr.extract()
|
||||
return soup
|
||||
feeds = [
|
||||
('Latest News', 'http://indiatoday.intoday.in/rss/article.jsp?sid=4'),
|
||||
('Cover Story', 'http://indiatoday.intoday.in/rss/article.jsp?sid=30'),
|
||||
('Nation', 'http://indiatoday.intoday.in/rss/article.jsp?sid=36'),
|
||||
('States', 'http://indiatoday.intoday.in/rss/article.jsp?sid=21'),
|
||||
('Economy', 'http://indiatoday.intoday.in/rss/article.jsp?sid=34'),
|
||||
('World', 'http://indiatoday.intoday.in/rss/article.jsp?sid=61'),
|
||||
('Sport', 'http://indiatoday.intoday.in/rss/article.jsp?sid=41'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
@ -7,56 +7,33 @@ www.inquirer.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class InquirerNet(BasicNewsRecipe):
|
||||
title = 'Inquirer.net'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Krittika Goyal'
|
||||
description = 'News from Philipines'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
publisher = 'inquirer.net'
|
||||
category = 'news, politics, philipines'
|
||||
lang = 'en'
|
||||
language = 'en'
|
||||
|
||||
extra_css = ' .fontheadline{font-size: x-large} .fontsubheadline{font-size: large} .fontkick{font-size: medium}'
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
remove_tags = [dict(name=['object','link','script','iframe','form'])]
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
(u'Breaking news', u'http://services.inquirer.net/rss/breakingnews.xml' )
|
||||
,(u'Top stories' , u'http://services.inquirer.net/rss/topstories.xml' )
|
||||
,(u'Sports' , u'http://services.inquirer.net/rss/brk_breakingnews.xml' )
|
||||
,(u'InfoTech' , u'http://services.inquirer.net/rss/infotech_tech.xml' )
|
||||
,(u'InfoTech' , u'http://services.inquirer.net/rss/infotech_tech.xml' )
|
||||
,(u'Business' , u'http://services.inquirer.net/rss/inq7money_breaking_news.xml' )
|
||||
,(u'Editorial' , u'http://services.inquirer.net/rss/opinion_editorial.xml' )
|
||||
,(u'Global Nation', u'http://services.inquirer.net/rss/globalnation_breakingnews.xml')
|
||||
(u'Inquirer', u'http://www.inquirer.net/fullfeed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.set_handle_gzip(True)
|
||||
return br
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
rest, sep, art = url.rpartition('/view/')
|
||||
art_id, sp, rrest = art.partition('/')
|
||||
return 'http://services.inquirer.net/print/print.php?article_id=' + art_id
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
japantimes.co.jp
|
||||
'''
|
||||
@ -9,24 +7,61 @@ japantimes.co.jp
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JapanTimes(BasicNewsRecipe):
|
||||
title = u'The Japan Times'
|
||||
title = 'The Japan Times'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Japan'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
description = "Daily news and features on Japan from the most widely read English-language newspaper in Japan. Coverage includes national news, business news, sports news, commentary and features on living in Japan, entertainment, the arts, education and more."
|
||||
language = 'en_JP'
|
||||
category = 'news, politics, japan'
|
||||
publisher = 'The Japan Times'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://search.japantimes.co.jp/images/header_title.gif'
|
||||
extra_css = 'body{font-family: Geneva,Arial,Helvetica,sans-serif}'
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'searchresult'}) ]
|
||||
remove_tags_after = [ dict(name='div', attrs={'id':'mainbody' }) ]
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'printresult'})]
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'id':'ads' })
|
||||
,dict(name='table', attrs={'width':470})
|
||||
dict(name=['iframe','meta','link','embed','object','base'])
|
||||
,dict(attrs={'id':'searchfooter'})
|
||||
]
|
||||
feeds = [(u'The Japan Times', u'http://feeds.feedburner.com/japantimes')]
|
||||
remove_attributes = ['border']
|
||||
|
||||
def get_article_url(self, article):
|
||||
rurl = BasicNewsRecipe.get_article_url(self, article)
|
||||
return rurl.partition('?')[0]
|
||||
|
||||
feeds = [
|
||||
(u'The Japan Times', u'http://feedproxy.google.com/japantimes')
|
||||
]
|
||||
def print_version(self, url):
|
||||
return url.replace('/cgi-bin/','/print/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('photo'):
|
||||
item.name = 'div'
|
||||
for item in soup.head.findAll('paragraph'):
|
||||
item.extract()
|
||||
for item in soup.findAll('wwfilename'):
|
||||
item.extract()
|
||||
for item in soup.findAll('jtcategory'):
|
||||
item.extract()
|
||||
for item in soup.findAll('nomooter'):
|
||||
item.extract()
|
||||
for item in soup.body.findAll('paragraph'):
|
||||
item.name = 'p'
|
||||
return soup
|
||||
|
@ -1,4 +1,3 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Attis <attis@attis.one.pl>'
|
||||
__version__ = 'v. 0.1'
|
||||
@ -16,21 +15,21 @@ class KopalniaWiedzy(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
INDEX = u'http://kopalniawiedzy.pl/'
|
||||
remove_javascript = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }]
|
||||
|
||||
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
|
||||
remove_tags_after = dict(attrs={'class':'ad-square'})
|
||||
keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})]
|
||||
extra_css = '.topimage {margin-top: 30px}'
|
||||
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
|
||||
lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
|
||||
(re.compile(u'<br /><br />'),
|
||||
lambda match: '<br\/>')
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
|
||||
(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
|
||||
@ -39,10 +38,10 @@ class KopalniaWiedzy(BasicNewsRecipe):
|
||||
(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
|
||||
(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
|
||||
]
|
||||
|
||||
|
||||
def is_link_wanted(self, url, tag):
|
||||
return tag['class'] == 'next'
|
||||
|
||||
|
||||
def remove_beyond(self, tag, next):
|
||||
while tag is not None and getattr(tag, 'name', None) != 'body':
|
||||
after = getattr(tag, next)
|
||||
@ -51,30 +50,30 @@ class KopalniaWiedzy(BasicNewsRecipe):
|
||||
after.extract()
|
||||
after = ns
|
||||
tag = tag.parent
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('a',attrs={'class':'next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'id':'articleContent'})
|
||||
|
||||
|
||||
tag = texttag.find(attrs={'class':'pages'})
|
||||
self.remove_beyond(tag, 'nextSibling')
|
||||
|
||||
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
|
||||
for item in soup.findAll('div',attrs={'class':'pages'}):
|
||||
item.extract()
|
||||
|
||||
|
||||
for item in soup.findAll('p', attrs={'class':'wykop'}):
|
||||
item.extract()
|
||||
|
||||
|
||||
return soup
|
||||
|
@ -7,8 +7,9 @@ latercera.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaTercera(BasicNewsRecipe):
|
||||
news = True
|
||||
title = 'La Tercera'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Darko Miletic and Alex Mitrani'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'La Tercera'
|
||||
category = 'news, politics, Chile'
|
||||
@ -18,8 +19,8 @@ class LaTercera(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
language = 'es'
|
||||
|
||||
language = 'es_CL'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
@ -28,28 +29,33 @@ class LaTercera(BasicNewsRecipe):
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':['titularArticulo']})
|
||||
,dict(name='h4', attrs={'class':['bajadaArt']})
|
||||
,dict(name='h5', attrs={'class':['autorArt']})
|
||||
,dict(name='div', attrs={'class':['articleContent']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['ul','input','base'])
|
||||
,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
|
||||
,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
|
||||
,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
|
||||
dict(name='div', attrs={'class':['boxCompartir','keywords']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['keywords']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
||||
feeds = [(u'La Tercera', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
||||
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
|
||||
,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
|
||||
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
|
||||
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
|
||||
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
||||
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
|
||||
,(u'Entretenimiento', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661')
|
||||
,(u'Motores', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=665')
|
||||
,(u'Santiago', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1731')
|
||||
,(u'Tendencias', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=659')
|
||||
,(u'Estilo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=660')
|
||||
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
|
||||
,(u'Cultura', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1453')
|
||||
,(u'Entretención', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661')
|
||||
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -18,21 +18,28 @@ class Liberation(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1')
|
||||
,dict(name='div', attrs={'class':'articleContent'})
|
||||
#,dict(name='div', attrs={'class':'object-content text text-item'})
|
||||
,dict(name='div', attrs={'class':'article'})
|
||||
#,dict(name='div', attrs={'class':'articleContent'})
|
||||
,dict(name='div', attrs={'class':'entry'})
|
||||
]
|
||||
remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':'clear'})
|
||||
,dict(name='ul', attrs={'class':'floatLeft clear'})
|
||||
,dict(name='div', attrs={'class':'clear floatRight'})
|
||||
,dict(name='object')
|
||||
,dict(name='div', attrs={'class':'toolbox'})
|
||||
,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
|
||||
#,dict(name='div', attrs={'class':'clear block block-call-items'})
|
||||
,dict(name='div', attrs={'class':'block-content'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'La une', u'http://www.liberation.fr/rss/laune')
|
||||
,(u'Monde' , u'http://www.liberation.fr/rss/monde')
|
||||
|
@ -2,6 +2,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Metro Nieuws NL'
|
||||
description = u'Metro Nieuws - NL'
|
||||
# Version 1.2, updated cover image to match the changed website.
|
||||
# added info date on title
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
__author__ = u'DrMerry'
|
||||
@ -10,11 +13,11 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
simultaneous_downloads = 5
|
||||
delay = 1
|
||||
# timefmt = ' [%A, %d %B, %Y]'
|
||||
timefmt = ''
|
||||
timefmt = ' [%A, %d %b %Y]'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
cover_url = 'http://www.readmetro.com/img/en/metroholland/last/1/small.jpg'
|
||||
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
remove_tags_before = dict(name='div', attrs={'id':'date'})
|
||||
|
@ -16,6 +16,7 @@ __UseLife__ = True
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
@ -230,8 +231,9 @@ class MPRecipe(BasicNewsRecipe):
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
#(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
@ -591,4 +593,3 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
@ -6,11 +6,13 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
msdn.microsoft.com/en-us/magazine
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
|
||||
|
||||
class MSDNMagazine_en(BasicNewsRecipe):
|
||||
title = 'MSDN Magazine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The Microsoft Journal for Developers'
|
||||
masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
|
||||
publisher = 'Microsoft Press'
|
||||
category = 'news, IT, Microsoft, programming, windows'
|
||||
oldest_article = 31
|
||||
@ -20,24 +22,44 @@ class MSDNMagazine_en(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
language = 'en'
|
||||
|
||||
base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
|
||||
rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
|
||||
|
||||
|
||||
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','base','table'])
|
||||
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
|
||||
dict(name='div', attrs={'class':'DivRatingsOnly'})
|
||||
,dict(name='div', attrs={'class':'ShareThisButton4'})
|
||||
]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'navpage'})
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
|
||||
item.name="h2"
|
||||
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
|
||||
item.name="h1"
|
||||
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
|
||||
item.name="h3"
|
||||
return soup
|
||||
def find_articles(self):
|
||||
idx_contents = self.browser.open(self.rss_url).read()
|
||||
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
||||
|
||||
for article in idx.findAll('item'):
|
||||
desc_html = self.tag_to_string(article.find('description'))
|
||||
description = self.tag_to_string(BeautifulSoup(desc_html))
|
||||
|
||||
a = {
|
||||
'title': self.tag_to_string(article.find('title')),
|
||||
'url': self.tag_to_string(article.find('link')),
|
||||
'description': description,
|
||||
'date' : self.tag_to_string(article.find('pubdate')),
|
||||
}
|
||||
yield a
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.base_url)
|
||||
|
||||
#find issue name, eg "August 2011"
|
||||
issue_name = self.tag_to_string(soup.find('h1'))
|
||||
|
||||
# find cover pic
|
||||
img = soup.find('img',attrs ={'alt':issue_name})
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
|
||||
return [(issue_name, list(self.find_articles()))]
|
||||
|
||||
|
16
recipes/niebezpiecznik.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Niebezpiecznik_pl(BasicNewsRecipe):
|
||||
title = u'Niebezpiecznik.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Niebezpiecznik.pl'
|
||||
category = 'hacking, IT'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
|
||||
remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
|
||||
keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})]
|
||||
feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
|
||||
('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
|
@ -37,24 +37,24 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
print "----------------------------open top page----------------------------------------"
|
||||
print "-------------------------open top page-------------------------------------"
|
||||
br.open('http://www.nikkei.com/')
|
||||
print "----------------------------open first login form--------------------------------"
|
||||
print "-------------------------open first login form-----------------------------"
|
||||
link = br.links(url_regex="www.nikkei.com/etc/accounts/login").next()
|
||||
br.follow_link(link)
|
||||
#response = br.response()
|
||||
#print response.get_data()
|
||||
print "----------------------------JS redirect(send autoPostForm)-----------------------"
|
||||
print "-------------------------JS redirect(send autoPostForm)--------------------"
|
||||
br.select_form(name='autoPostForm')
|
||||
br.submit()
|
||||
#response = br.response()
|
||||
print "----------------------------got login form---------------------------------------"
|
||||
print "-------------------------got login form------------------------------------"
|
||||
br.select_form(name='LA0210Form01')
|
||||
br['LA0210Form01:LA0210Email'] = self.username
|
||||
br['LA0210Form01:LA0210Password'] = self.password
|
||||
br.submit()
|
||||
#response = br.response()
|
||||
print "----------------------------JS redirect------------------------------------------"
|
||||
print "-------------------------JS redirect---------------------------------------"
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
|
||||
@ -64,18 +64,23 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
def cleanup(self):
|
||||
print "----------------------------logout-----------------------------------------------"
|
||||
print "-------------------------logout--------------------------------------------"
|
||||
self.browser.open('https://regist.nikkei.com/ds/etc/accounts/logout')
|
||||
|
||||
def parse_index(self):
|
||||
print "----------------------------get index of paper-----------------------------------"
|
||||
print "-------------------------get index of paper--------------------------------"
|
||||
result = []
|
||||
soup = self.index_to_soup('http://www.nikkei.com/paper/')
|
||||
#soup = self.index_to_soup(self.test_data())
|
||||
for sect in soup.findAll('div', 'cmn-section kn-special JSID_baseSection'):
|
||||
sections = soup.findAll('div', 'cmn-section kn-special JSID_baseSection')
|
||||
if len(sections) == 0:
|
||||
sections = soup.findAll('div', 'cmn-section kn-special')
|
||||
for sect in sections:
|
||||
sect_title = sect.find('h3', 'cmnc-title').string
|
||||
sect_result = []
|
||||
for elem in sect.findAll(attrs={'class':['cmn-article_title']}):
|
||||
if elem.span.a == None or elem.span.a['href'].startswith('javascript') :
|
||||
continue
|
||||
url = 'http://www.nikkei.com' + elem.span.a['href']
|
||||
url = re.sub("/article/", "/print-article/", url) # print version.
|
||||
span = elem.span.a.span
|
||||
@ -84,6 +89,5 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
|
||||
sect_result.append(dict(title=title, url=url, date='',
|
||||
description='', content=''))
|
||||
result.append([sect_title, sect_result])
|
||||
#pp.pprint(result)
|
||||
return result
|
||||
|
||||
|
34
recipes/ntv_spor.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1313512459(BasicNewsRecipe):
|
||||
title = u'NTVSpor'
|
||||
__author__ = 'A Erdogan'
|
||||
description = 'News from Turkey'
|
||||
publisher = 'NTVSpor.net'
|
||||
category = 'sports, Turkey'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.ntvspor.net/HTML/r/i/l.png'
|
||||
language = 'tr'
|
||||
|
||||
extra_css ='''
|
||||
body{font-family:Arial,Helvetica,sans-serif; font-size:small; align:left; color:#000000}
|
||||
h1{font-size:large; color:#000000}
|
||||
h2{font-size:small; color:#000000}
|
||||
p{font-size:small; color:#000000}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['embed','il','ul','iframe','object','link','base']), dict(name='div', attrs={'id':'contentPhotoGallery'}), dict(name='div', attrs={'class':'SocialMediaWrapper'}), dict(name='div', attrs={'class':'grid2'}), dict(name='div', attrs={'class':'grid8'}), dict(name='div', attrs={'id':'anonsBar'}), dict(name='div', attrs={'id':'header'})]
|
||||
remove_tags_before = dict(name='h1', attrs={'style':['margin-top: 6px;']})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'newsBody'})
|
||||
feeds = [(u'NTVSpor', u'http://www.ntvspor.net/Rss/anasayfa')]
|
||||
|
45
recipes/ntv_tr.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NTVMSNBC(BasicNewsRecipe):
|
||||
title = u'NTV'
|
||||
__author__ = 'A Erdogan'
|
||||
description = 'News from Turkey'
|
||||
publisher = 'NTV'
|
||||
category = 'news, politics, Turkey'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.ntvmsnbc.com/images/MSNBC/msnbc_ban.gif'
|
||||
language = 'tr'
|
||||
|
||||
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
|
||||
extra_css ='''
|
||||
body{font-family:Arial,Helvetica,sans-serif; font-size:small; align:left; color:#000000}
|
||||
h1{font-size:large; color:#000000}
|
||||
h2{font-size:small; color:#000000}
|
||||
p{font-size:small; color:#000000}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['embed','il','ul','iframe','object','link','base']), dict(name='div', attrs={'style':['padding: 0pt 10px 10px;']}), dict(name='div', attrs={'style':['padding: 0pt 10px 10px;']}), dict(name='div', attrs={'class':['textSmallGrey w320']}), dict(name='div', attrs={'style':['font-family:Arial; font-size:16px;font-weight:bold; font-color:#003366; margin-bottom:20px; margin-top:20px; border-bottom:solid 1px;border-color: #CCC; padding-bottom:2px;']})]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='div', attrs={'style':['font-family:Arial; font-size:16px;font-weight:bold; font-color:#003366; margin-bottom:20px; margin-top:20px; border-bottom:solid 1px;border-color: #CCC; padding-bottom:2px;']})
|
||||
|
||||
feeds = [(u'NTV', u'http://www.ntvmsnbc.com/id/3032091/device/rss/rss.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
articleid = url.rpartition('/id/')[2]
|
||||
return 'http://www.ntvmsnbc.com/id/' + articleid + '/print/1/displaymode/1098/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
95
recipes/pagina_12_print_ed.recipe
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
pagina12.com.ar
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
|
||||
title = 'Pagina/12 - Edicion Impresa'
|
||||
__author__ = 'Pablo Marfil'
|
||||
description = 'Diario argentino'
|
||||
INDEX = 'http://www.pagina12.com.ar/diario/secciones/index.html'
|
||||
language = 'es'
|
||||
encoding = 'cp1252'
|
||||
remove_tags_before = dict(id='fecha')
|
||||
remove_tags_after = dict(id='fin')
|
||||
remove_tags = [dict(id=['fecha', 'fin', 'pageControls','logo','logo_suple','fecha_suple','volver'])]
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
no_stylesheets = True
|
||||
|
||||
preprocess_regexps= [(re.compile(r'<!DOCTYPE[^>]+>', re.I), lambda m:'')]
|
||||
|
||||
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html')
|
||||
for image in soup.findAll('img',alt=True):
|
||||
if image['alt'].startswith('Tapa de la fecha'):
|
||||
return image['src']
|
||||
print image
|
||||
return None
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
numero = 1
|
||||
raw = self.index_to_soup('http://www.pagina12.com.ar/diario/secciones/index.html', raw=True)
|
||||
raw = re.sub(r'(?i)<!DOCTYPE[^>]+>', '', raw)
|
||||
soup = self.index_to_soup(raw)
|
||||
|
||||
feeds = []
|
||||
|
||||
seen_titles = set([])
|
||||
for section in soup.findAll('div','seccionx'):
|
||||
numero+=1
|
||||
print (numero)
|
||||
section_title = self.tag_to_string(section.find('div','desplegable_titulo on_principal right'))
|
||||
self.log('Found section:', section_title)
|
||||
articles = []
|
||||
for post in section.findAll('h2'):
|
||||
h = post.find('a', href=True)
|
||||
title = self.tag_to_string(h)
|
||||
if title in seen_titles:
|
||||
continue
|
||||
seen_titles.add(title)
|
||||
a = post.find('a', href=True)
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://pagina12.com.ar/imprimir'+url
|
||||
p = post.find('div', attrs={'h2'})
|
||||
desc = None
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\t\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':''})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for table in soup.findAll('table', align='right'):
|
||||
img = table.find('img')
|
||||
if img is not None:
|
||||
img.extract()
|
||||
caption = self.tag_to_string(table).strip()
|
||||
div = Tag(soup, 'div')
|
||||
div['style'] = 'text-align:center'
|
||||
div.insert(0, img)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
if caption:
|
||||
div.insert(2, NavigableString(caption))
|
||||
table.replaceWith(div)
|
||||
|
||||
return soup
|
||||
|
@ -14,54 +14,11 @@ class PeopleMag(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 50
|
||||
use_embedded_content = False
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
|
||||
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
||||
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
||||
.byline {font-size: small; color: #666666; font-style:italic; }
|
||||
.lastline {font-size: small; color: #666666; font-style:italic;}
|
||||
.contact {font-size: small; color: #666666;}
|
||||
.contact p {font-size: small; color: #666666;}
|
||||
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
|
||||
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
|
||||
.article_timestamp{font-size:x-small; color:#666666;}
|
||||
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
|
||||
'''
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'panel_news_article_main'}),
|
||||
dict(name='div', attrs={'class':'article_content'}),
|
||||
dict(name='div', attrs={'class': 'headline'}),
|
||||
dict(name='div', attrs={'class': 'post'}),
|
||||
dict(name='div', attrs={'class': 'packageheadlines'}),
|
||||
dict(name='div', attrs={'class': 'snap_preview'}),
|
||||
dict(name='div', attrs={'id': 'articlebody'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'share_comments'}),
|
||||
dict(name='p', attrs={'class':'twitter_facebook'}),
|
||||
dict(name='div', attrs={'class':'share_comments_bottom'}),
|
||||
dict(name='h2', attrs={'id':'related_content'}),
|
||||
dict(name='div', attrs={'class':'next_article'}),
|
||||
dict(name='div', attrs={'class':'prev_article'}),
|
||||
dict(name='ul', attrs={'id':'sharebar'}),
|
||||
dict(name='div', attrs={'class':'sharelinkcont'}),
|
||||
dict(name='div', attrs={'class':'categories'}),
|
||||
dict(name='ul', attrs={'class':'categories'}),
|
||||
dict(name='div', attrs={'class':'related_content'}),
|
||||
dict(name='div', attrs={'id':'promo'}),
|
||||
dict(name='div', attrs={'class':'linksWrapper'}),
|
||||
dict(name='p', attrs={'class':'tag tvnews'}),
|
||||
dict(name='p', attrs={'class':'tag movienews'}),
|
||||
dict(name='p', attrs={'class':'tag musicnews'}),
|
||||
dict(name='p', attrs={'class':'tag couples'}),
|
||||
dict(name='p', attrs={'class':'tag gooddeeds'}),
|
||||
dict(name='p', attrs={'class':'tag weddings'}),
|
||||
dict(name='p', attrs={'class':'tag health'})
|
||||
]
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@id="article-image"]'
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -69,26 +26,4 @@ class PeopleMag(BasicNewsRecipe):
|
||||
('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
ans = article.link
|
||||
|
||||
try:
|
||||
self.log('Looking for full story link in', ans)
|
||||
soup = self.index_to_soup(ans)
|
||||
x = soup.find(text="View All")
|
||||
|
||||
if x is not None:
|
||||
ans = ans + '?viewAll=y'
|
||||
self.log('Found full story link', ans)
|
||||
except:
|
||||
pass
|
||||
return ans
|
||||
|
||||
def postprocess_html(self, soup,first):
|
||||
|
||||
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
|
||||
tag.extract()
|
||||
for tag in soup.findAll(name='br'):
|
||||
tag.extract()
|
||||
|
||||
return soup
|
||||
|
@ -1,45 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1308312288(BasicNewsRecipe):
|
||||
class BasicUserRecipe1314970845(BasicNewsRecipe):
|
||||
title = u'Philadelphia Inquirer'
|
||||
__author__ = 'sexymax15'
|
||||
language = 'en'
|
||||
description = 'Daily news from the Philadelphia Inquirer'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language= 'en'
|
||||
__author__ = 'bing'
|
||||
requires_version = (0, 8, 16)
|
||||
|
||||
# remove_tags_before = {'class':'article_timestamp'}
|
||||
#remove_tags_after = {'class':'graylabel'}
|
||||
keep_only_tags= [dict(name=['h1','p'])]
|
||||
remove_tags = [dict(name=['hr','dl','dt','img','meta','iframe','link','script','form','input','label']),
|
||||
dict(id=['toggleConfirmEmailDiv','toggleTOS','toggleUsernameMsgDiv','toggleConfirmYear','navT1_philly','secondaryNav','navPlacement','globalPrimaryNav'
|
||||
,'ugc-footer-philly','bv_footer_include','footer','header',
|
||||
'container_rag_bottom','section_rectangle','contentrightside'])
|
||||
,{'class':['megamenu3 megamenu','container misc','container_inner misc_inner'
|
||||
,'misccontainer_left_32','headlineonly','misccontainer_middle_32'
|
||||
,'misccontainer_right_32','headline formBegin',
|
||||
'post_balloon','relatedlist','linkssubhead','b_sq','dotted-rule-above'
|
||||
,'container','headlines-digest','graylabel','container_inner'
|
||||
,'rlinks_colorbar1','rlinks_colorbar2','supercontainer','container_5col_left','container_image_left',
|
||||
'digest-headline2','digest-lead','container_5col_leftmiddle',
|
||||
'container_5col_middlemiddle','container_5col_rightmiddle'
|
||||
,'container_5col_right','divclear','supercontainer_outer force-width',
|
||||
'supercontainer','containertitle kicker-title',
|
||||
'pollquestion','pollchoice','photomore','pollbutton','container rssbox','containertitle video ',
|
||||
'containertitle_image ','container_tabtwo','selected'
|
||||
,'shadetabs','selected','tabcontentstyle','tabcontent','inner_container'
|
||||
,'arrow','container_ad','containertitlespacer','adUnit','tracking','sitemsg_911 clearfix']}]
|
||||
|
||||
extra_css = """
|
||||
h1{font-family: Georgia,serif; font-size: xx-large}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.philly.com/philly_news.rss')]
|
||||
feeds = [
|
||||
(u'Front Page', u'http://www.philly.com/inquirer_front_page.rss'),
|
||||
(u'Philly.com News', u'http://www.philly.com/philly_news.rss'),
|
||||
(u'National/World (Philly.com)', u'http://www.philly.com/philly_news_nation.rss'),
|
||||
(u'Politics (Philly.com)', u'http://www.philly.com/philly_politics.rss'),
|
||||
(u'Local (Philly.com)', u'http://www.philly.com/philly_news_local.rss'),
|
||||
(u'South Jersey News', u'http://www.philly.com/inq_news_south_jersey.rss'),
|
||||
(u'Sports', u'http://www.philly.com/inquirer_sports.rss'),
|
||||
(u'Tech News', u'http://www.philly.com/philly_tech.rss'),
|
||||
(u'Daily Magazine', u'http://www.philly.com/inq_magazine_daily.rss'),
|
||||
(u'Weekend', u'http://www.philly.com/inq_entertainment_weekend.rss'),
|
||||
(u'Business', u'http://www.philly.com/inq_business.rss'),
|
||||
(u'Education', u'http://www.philly.com/inquirer_education.rss'),
|
||||
(u'Books', u'http://www.philly.com/inq_books.rss'),
|
||||
(u'Entertainment', u'http://www.philly.com/inq_entertainment.rss'),
|
||||
(u'Food', u'http://www.philly.com/inq_food.rss'),
|
||||
(u'Health and Science', u'http://www.philly.com/inquirer_health_science.rss'),
|
||||
(u'Home and Design', u'http://www.philly.com/inq_home_design.rss'),
|
||||
(u'News Columnists', u'http://www.philly.com/inq_columnists.rss'),
|
||||
(u'Editorial', u'http://www.philly.com/inq_news_editorial.rss'),
|
||||
(u'Travel', u'http://www.philly.com/inquirer_travel.rss'),
|
||||
(u'Obituaries', u'http://www.philly.com/inquirer_obituaries.rss')
|
||||
]
|
||||
|
||||
|
@ -5,7 +5,6 @@ class PolitiFactCom(BasicNewsRecipe):
|
||||
__author__ = u'Michael Heinz'
|
||||
oldest_article = 21
|
||||
max_articles_per_feed = 100
|
||||
recursion = 0
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
@ -27,4 +26,7 @@ class PolitiFactCom(BasicNewsRecipe):
|
||||
(u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.set_handle_gzip(True)
|
||||
return br
|
||||
|
@ -16,23 +16,25 @@ class Reuters(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:arial,helvetica,sans;}
|
||||
body{font-family:arial,helvetica,sans;}
|
||||
h1{ font-size:larger ; font-weight:bold; }
|
||||
.byline{color:#006E97;font-size:x-small; font-weight:bold;}
|
||||
.location{font-size:x-small; font-weight:bold;}
|
||||
.timestamp{font-size:x-small; }
|
||||
'''
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
|
||||
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
|
||||
dict(name='p', attrs={'class':['relatedTopics']}),
|
||||
dict(name='a', attrs={'id':['fullSizeLink']}),
|
||||
dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),]
|
||||
dict(name='a', attrs={'id':['fullSizeLink']}),
|
||||
dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),
|
||||
# Remove the Tweet, Share this, Email and Print links below article title too!
|
||||
dict(name='div', attrs={'class':['columnRight']}),
|
||||
]
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
[
|
||||
##(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
||||
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
|
||||
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
|
||||
@ -40,18 +42,15 @@ class Reuters(BasicNewsRecipe):
|
||||
(r'<body>.*?<div class="contentBand">', lambda match : '<body>'),
|
||||
(r'<h3>Share:</h3>.*?</body>', lambda match : '<!-- END:: Shared Module id=36615 --></body>'),
|
||||
(r'<div id="atools" class="articleTools">.*?<div class="linebreak">', lambda match : '<div class="linebreak">'),
|
||||
]
|
||||
]
|
||||
|
||||
]]
|
||||
|
||||
|
||||
feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/topNews?format=xml'),
|
||||
('US News', 'http://feeds.reuters.com/reuters/domesticNews?format=xml'),
|
||||
('World News', 'http://feeds.reuters.com/reuters/worldNews?format=xml'),
|
||||
('Politics News', 'http://feeds.reuters.com/reuters/politicsNews?format=xml'),
|
||||
('Science News', 'http://feeds.reuters.com/reuters/scienceNews?format=xml'),
|
||||
('Environment News', 'http://feeds.reuters.com/reuters/Environment?format=xml'),
|
||||
('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'),
|
||||
('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
|
||||
]
|
||||
|
||||
('US News', 'http://feeds.reuters.com/reuters/domesticNews?format=xml'),
|
||||
('World News', 'http://feeds.reuters.com/reuters/worldNews?format=xml'),
|
||||
('Politics News', 'http://feeds.reuters.com/reuters/politicsNews?format=xml'),
|
||||
('Science News', 'http://feeds.reuters.com/reuters/scienceNews?format=xml'),
|
||||
('Environment News', 'http://feeds.reuters.com/reuters/Environment?format=xml'),
|
||||
('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'),
|
||||
('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
|
||||
]
|
||||
|
64
recipes/rtnews.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
rt.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RT_eng(BasicNewsRecipe):
|
||||
title = 'RT in English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'RT is the first Russian 24/7 English-language news channel which brings the Russian view on global news.'
|
||||
publisher = 'Autonomous Nonprofit Organization "TV-Novosti"'
|
||||
category = 'news, politics, economy, finances, Russia, world'
|
||||
oldest_article = 2
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
masthead_url = 'http://rt.com/s/css/img/printlogo.gif'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
language = 'en_RU'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
h1{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
.grey{color: gray}
|
||||
.fs12{font-size: small}
|
||||
"""
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'all'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','iframe','meta','link'])
|
||||
,dict(attrs={'class':'crumbs oh'})
|
||||
]
|
||||
remove_attributes = ['clear']
|
||||
|
||||
feeds = [
|
||||
(u'Politics' , u'http://rt.com/politics/rss/' )
|
||||
,(u'USA' , u'http://rt.com/usa/news/rss/' )
|
||||
,(u'Business' , u'http://rt.com/business/news/rss/' )
|
||||
,(u'Sport' , u'http://rt.com/sport/rss/' )
|
||||
,(u'Art&Culture', u'http://rt.com/art-and-culture/news/rss/')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
str = item.string
|
||||
if str is None:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
return soup
|
63
recipes/samanyolu_haber.recipe
Normal file
@ -0,0 +1,63 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SHaber (BasicNewsRecipe):
|
||||
|
||||
title = u'Samanyolu Haber'
|
||||
__author__ = u'thomass'
|
||||
description = ' Samanyolu Haber Sitesinden günlük haberler '
|
||||
oldest_article =2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'thomass'
|
||||
category = 'güncel, haber, türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
extra_css = ' .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
|
||||
|
||||
cover_img_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
|
||||
masthead_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
|
||||
remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'Son Dakika', u'http://podcast.samanyoluhaber.com/sondakika.rss'),
|
||||
( u'Gündem', u'http://podcast.samanyoluhaber.com/gundem.rss'),
|
||||
( u'Politika ', u'http://podcast.samanyoluhaber.com/politika.rss'),
|
||||
( u'Ekonomi', u'http://podcast.samanyoluhaber.com/ekonomi.rss'),
|
||||
( u'Dünya', u'http://podcast.samanyoluhaber.com/dunya.rss'),
|
||||
( u'Spor ', u'http://podcast.samanyoluhaber.com/spor.rss'),
|
||||
( u'Sağlık', u'http://podcast.samanyoluhaber.com/saglik.rss'),
|
||||
( u'Kültür', u'http://podcast.samanyoluhaber.com/kultur.rss'),
|
||||
#( u'Teknoloji ', u'http://podcast.samanyoluhaber.com/teknoloji.rss'),
|
||||
( u'Eğitim', u'http://podcast.samanyoluhaber.com/egitim.rss'),
|
||||
( u'Ramazan', u'http://podcast.samanyoluhaber.com/ramazan.rss'),
|
||||
( u'Yazarlar ', u'http://podcast.samanyoluhaber.com/yazarlar.rss'),
|
||||
|
||||
|
||||
|
||||
]
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
# def print_version(self, url):
|
||||
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
55
recipes/samanyolu_teknoloji.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SHaberTekno (BasicNewsRecipe):
|
||||
|
||||
title = u'Samanyolu Teknoloji'
|
||||
__author__ = u'thomass'
|
||||
description = 'Samanyolu Teknoloji Haber Sitesinden haberler '
|
||||
oldest_article =8
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'thomass'
|
||||
category = 'bilim, teknoloji, haber, türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
extra_css = ' .IcerikMetin{ font-family:sans-serif;font-weight: normal;font-size: 10px } .h1IcerikBaslik {font-weight: bold; font-size: 18px}' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['IcerikBaslik','IcerikMetinDiv']})]#,dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
|
||||
|
||||
cover_img_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg'
|
||||
masthead_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg'
|
||||
remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'GENEL', u'http://podcast.samanyoluhaber.com/Teknoloji.rss'),
|
||||
( u'İNTERNET', u'http://open.dapper.net/services/shaberteknolojiinternet'),
|
||||
( u'CEP TELEFONU', u'http://open.dapper.net/services/shaberteknolojicep'),
|
||||
( u'OYUN', u'http://open.dapper.net/services/shaberteknolojioyun'),
|
||||
( u'DONANIM', u'http://open.dapper.net/services/httpopendappernetservicesshaberteknolojidonanim'),
|
||||
( u'ÜRÜN İNCELEME', u'http://open.dapper.net/services/shaberteknolojiurun'),
|
||||
( u'ALIŞVERİŞ', u'http://open.dapper.net/services/shaberteknolojialisveris'),
|
||||
( u'BİLİM & TEKNOLOJİ', u'http://open.dapper.net/services/shaberteknolojibilim'),
|
||||
( u'HABERLER', u'http://open.dapper.net/services/shaberteknolojihaber'),
|
||||
|
||||
|
||||
|
||||
]
|
||||
|
||||
# def print_version(self, url):
|
||||
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
67
recipes/star_gazetesi.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Star (BasicNewsRecipe):
|
||||
|
||||
title = u'Star Gazetesi'
|
||||
__author__ = u'thomass'
|
||||
description = 'yeni Türkiye''nin Gazetesi'
|
||||
oldest_article =2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'thomass'
|
||||
category = 'güncel, haber, türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
extra_css = ' .font8{font-weight: bold; font-size:20px}.font11{font-weight: normal; font-size:small}#hdetay{ font-family:sans-serif;font-size: 9px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['font8']}),dict(name='span', attrs={'class':['font11']}),dict(name='div', attrs={'id':['hdetay']})]#,,dict(name='h6', attrs={'class':['KirmiziText']}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
|
||||
|
||||
cover_img_url = 'http://www.stargazete.com/starnew/img/starlogo.png'
|
||||
masthead_url = 'http://www.stargazete.com/starnew/img/starlogo.png'
|
||||
remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
|
||||
|
||||
( u'MANSET', u'http://open.dapper.net/services/starmanset'),
|
||||
( u'GÜNCEL', u'http://www.stargazete.com/guncel.xml'),
|
||||
( u'POLİTİKA', u'http://www.stargazete.com/politika.xml'),
|
||||
( u' EKONOMİ', u'http://www.stargazete.com/ekonomi.xml'),
|
||||
( u'DÜNYA', u'http://www.stargazete.com/dunya.xml'),
|
||||
( u'YAZARLAR', u'http://www.stargazete.com/gazeteyazarlar.xml'),
|
||||
( u'SPOR', u'http://www.stargazete.com/spor.xml'),
|
||||
( u'SPOR YAZARLARI', u'http://www.stargazete.com/index.php?metot=rss&islem=sporyazarlar'),
|
||||
( u'SİNEMA', u'http://www.stargazete.com/sinema.xml'),
|
||||
( u'KADIN&SAĞLIK', u'http://www.stargazete.com/kadinsaglik.xml'),
|
||||
( u' STARTEK', u'http://www.stargazete.com/startek.xml'),
|
||||
( u' AÇIK GÖRÜŞ', u'http://www.stargazete.com/acikgorus.xml'),
|
||||
( u'Star PAZAR', u'http://www.stargazete.com/pazar.xml'),
|
||||
( u'Star CUMARTESİ', u'http://www.stargazete.com/cumartesi.xml'),
|
||||
|
||||
|
||||
|
||||
]
|
||||
def preprocess_html(self, soup):#remove links
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
#def print_version(self, url):
|
||||
#return url.replace('/', 'http://www.stargazete.com/')
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2011, M. Ching modified from work 2009-2011 Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
staradvertiser.com
|
||||
'''
|
||||
@ -7,12 +7,13 @@ staradvertiser.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Starbulletin(BasicNewsRecipe):
|
||||
title = 'Honolulu Star Advertiser'
|
||||
title = 'Honolulu Star-Advertiser'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Latest national and local Hawaii sports news'
|
||||
publisher = 'Honolulu Star-Advertiser'
|
||||
category = 'news, Honolulu, Hawaii'
|
||||
oldest_article = 2
|
||||
needs_subscription = True
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
@ -20,12 +21,12 @@ class Starbulletin(BasicNewsRecipe):
|
||||
encoding = 'utf8'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
h1,.brown,.postCredit{color: #663300}
|
||||
.storyDeck{font-size: 1.2em; font-weight: bold}
|
||||
img{display: block}
|
||||
"""
|
||||
# extra_css = """
|
||||
# body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
# h1,.brown,.hsa_postCredit{color: #663300}
|
||||
# .storyDeck{font-size: 1.2em; font-weight: bold}
|
||||
# img{display: block}
|
||||
# """
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -35,26 +36,38 @@ class Starbulletin(BasicNewsRecipe):
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':'storyTitle'})
|
||||
,dict(attrs={'class':['storyDeck','postCredit']})
|
||||
,dict(name='span',attrs={'class':'brown'})
|
||||
dict(attrs={'id':'hsa_storyTitle'})
|
||||
,dict(attrs={'id':'hsa_storyTitle article-important'})
|
||||
,dict(attrs={'class':['hsa_dateStamp','hsa_postCredit','storyDeck']})
|
||||
,dict(name='span',attrs={'class':['hsa_dateStamp','hsa_postCredit']})
|
||||
,dict(name='span',attrs={'class':['hsa_dateStamp article-important','hsa_postCredit article-important']})
|
||||
,dict(name='div',attrs={'class':'storytext article-important'})
|
||||
,dict(name='div',attrs={'class':'storytext'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','span','meta','base','iframe'])
|
||||
dict(name=['object','link','script','meta','base','iframe'])
|
||||
# removed 'span' from preceding list to permit keeping of author and timestamp
|
||||
,dict(attrs={'class':['insideStoryImage','insideStoryAd']})
|
||||
,dict(attrs={'name':'fb_share'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Headlines' , u'http://www.staradvertiser.com/staradvertiser_headlines.rss' )
|
||||
,(u'News' , u'http://www.staradvertiser.com/news/index.rss' )
|
||||
,(u'Sports' , u'http://www.staradvertiser.com/sports/index.rss' )
|
||||
,(u'Features' , u'http://www.staradvertiser.com/features/index.rss' )
|
||||
,(u'Editorials', u'http://www.staradvertiser.com/editorials/index.rss' )
|
||||
,(u'Business' , u'http://www.staradvertiser.com/business/index.rss' )
|
||||
,(u'Travel' , u'http://www.staradvertiser.com/travel/index.rss' )
|
||||
]
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.staradvertiser.com/manage/Login/')
|
||||
br.select_form(name='loginForm')
|
||||
br['email'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
feeds = [
|
||||
(u'Breaking News', u'http://www.staradvertiser.com/news/breaking/index.rss')
|
||||
,(u'News', u'http://www.staradvertiser.com/newspremium/index.rss')
|
||||
,(u'Business', u'http://www.staradvertiser.com/businesspremium/index.rss')
|
||||
,(u'Sports', u'http://www.staradvertiser.com/sportspremium/index.rss')
|
||||
,(u'Features', u'http://www.staradvertiser.com/featurespremium/index.rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
@ -75,4 +88,4 @@ class Starbulletin(BasicNewsRecipe):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
@ -9,9 +10,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Sueddeutsche(BasicNewsRecipe):
|
||||
|
||||
title = u'S\xfcddeutsche'
|
||||
title = u'Süddeutsche'
|
||||
description = 'News from Germany'
|
||||
__author__ = 'Oliver Niesner and Sujata Raman'
|
||||
__author__ = 'Oliver Niesner and Armin Geller'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
oldest_article = 7
|
||||
@ -25,13 +26,13 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [ dict(name='link'), dict(name='iframe'),
|
||||
dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
|
||||
"SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
|
||||
"SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
|
||||
|
||||
dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
|
||||
"pages closed","basebox right narrow","headslot galleried"]}),
|
||||
"pages closed","basebox right narrow","headslot galleried"]}),
|
||||
|
||||
dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
|
||||
"item","videoBigButton","articlefooter full-column",
|
||||
"item","videoBigButton","articlefooter full-column",
|
||||
"bildbanderolle full-column","footerCopy padleft5"]}),
|
||||
|
||||
dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
|
||||
@ -53,26 +54,29 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
body{font-family:Arial,Helvetica,sans-serif; }
|
||||
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
|
||||
|
||||
#feeds = [(u'Topthemen', u'http://suche.sueddeutsche.de/query/politik/-docdatetime/drilldown/%C2%A7documenttype%3AArtikel?output=rss')]
|
||||
|
||||
feeds = [(u'Wissen', u'http://suche.sueddeutsche.de/query/wissen/nav/%C2%A7ressort%3AWissen/sort/-docdatetime?output=rss'),
|
||||
(u'Politik', u'http://suche.sueddeutsche.de/query/politik/nav/%C2%A7ressort%3APolitik/sort/-docdatetime?output=rss'),
|
||||
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/wirtschaft/nav/%C2%A7ressort%3AWirtschaft/sort/-docdatetime?output=rss'),
|
||||
(u'Finanzen', u'http://suche.sueddeutsche.de/query/finanzen/nav/%C2%A7ressort%3AGeld/sort/-docdatetime?output=rss'),
|
||||
(u'Kultur', u'http://suche.sueddeutsche.de/query/kultur/nav/%C2%A7ressort%3AKultur/sort/-docdatetime?output=rss'),
|
||||
(u'Sport', u'http://suche.sueddeutsche.de/query/sport/nav/%C2%A7ressort%3ASport/sort/-docdatetime?output=rss'),
|
||||
(u'Bayern', u'http://suche.sueddeutsche.de/query/bayern/nav/%C2%A7ressort%3ABayern/sort/-docdatetime?output=rss'),
|
||||
(u'Panorama', u'http://suche.sueddeutsche.de/query/panorama/sort/-docdatetime?output=rss'),
|
||||
(u'Leben&Stil', u'http://suche.sueddeutsche.de/query/stil/nav/%C2%A7ressort%3A%22Leben%20%26%20Stil%22/sort/-docdatetime?output=rss'),
|
||||
(u'Gesundheit', u'http://suche.sueddeutsche.de/query/gesundheit/nav/%C2%A7ressort%3AGesundheit/sort/-docdatetime?output=rss'),
|
||||
(u'Auto&Reise', u'http://suche.sueddeutsche.de/query/automobil/nav/%C2%A7ressort%3A%22Auto%20%26%20Mobil%22/sort/-docdatetime?output=rss'),
|
||||
(u'Computer', u'http://suche.sueddeutsche.de/query/computer/nav/%C2%A7ressort%3AComputer/sort/-docdatetime?output=rss'),
|
||||
(u'Job&Karriere', u'http://suche.sueddeutsche.de/query/job/nav/%C2%A7ressort%3A%22Job%20%26%20Karriere%22/sort/-docdatetime?output=rss'),
|
||||
(u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss')
|
||||
feeds = [
|
||||
(u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
|
||||
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
|
||||
(u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
|
||||
(u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
|
||||
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
|
||||
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
|
||||
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
|
||||
(u'München&Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
|
||||
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
|
||||
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
|
||||
(u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
|
||||
(u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
|
||||
(u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
|
||||
(u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
|
||||
(u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
|
||||
(u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
|
||||
(u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only
|
||||
(u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only
|
||||
(u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
|
||||
(u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only
|
||||
]
|
||||
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, id = url.rpartition('/')
|
||||
return main + '/2.220/' + id
|
||||
|
@ -40,11 +40,11 @@ class SVD_se(BasicNewsRecipe):
|
||||
,(u'Kultur' , u'http://www.svd.se/kulturnoje/nyheter/?service=rss')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articlecontent'})]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'articlebody normal'})
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['article-content', 'articlecontent']})]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'articlebody'})
|
||||
remove_tags = [
|
||||
dict(name=['object','link','base'])
|
||||
,dict(name='div',attrs={'class':['articlead','factcolumn']})
|
||||
,dict(name='div',attrs={'class':['articlead','factcolumn', 'article-ad']})
|
||||
,dict(name='ul', attrs={'class':'toolbar articletop clearfix'})
|
||||
,dict(name='p', attrs={'class':'more'})
|
||||
]
|
||||
|
27
recipes/the_clinic_online.recipe
Normal file
@ -0,0 +1,27 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1313555075(BasicNewsRecipe):
|
||||
news = True
|
||||
title = u'The Clinic'
|
||||
__author__ = 'Alex Mitrani'
|
||||
description = u'Online version of Chilean satirical weekly'
|
||||
publisher = u'The Clinic'
|
||||
category = 'news, politics, Chile, rss'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
language = 'es_CL'
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.theclinic.cl/wp-content/themes/tc12m/css/ui/mainLogoTC-top.png'
|
||||
remove_tags_before = dict(name='article', attrs={'class':'scope bordered'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'commentsSection'})
|
||||
remove_tags = [dict(name='span', attrs={'class':'relTags'})
|
||||
,dict(name='div', attrs={'class':'articleActivity hdcol'})
|
||||
,dict(name='div', attrs={'id':'commentsSection'})
|
||||
]
|
||||
|
||||
feeds = [(u'The Clinic Online', u'http://www.theclinic.cl/feed/')]
|
@ -15,12 +15,12 @@ class Time(BasicNewsRecipe):
|
||||
# ' publish complete articles on the web.')
|
||||
title = u'Time'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly magazine'
|
||||
description = ('Weekly US magazine.')
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
#needs_subscription = 'optional'
|
||||
|
||||
keep_only_tags = [
|
||||
{
|
||||
@ -41,6 +41,21 @@ class Time(BasicNewsRecipe):
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<meta .+/>'), lambda m:'')]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if False and self.username and self.password:
|
||||
# This site uses javascript in its login process
|
||||
res = br.open('http://www.time.com/time/magazine')
|
||||
br.select_form(nr=1)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
res = br.submit()
|
||||
raw = res.read()
|
||||
if '>Log Out<' not in raw:
|
||||
raise ValueError('Failed to login to time.com, check'
|
||||
' your username and password')
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True)
|
||||
root = html.fromstring(raw)
|
||||
|
@ -1,12 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
twitchfilm.net/site/
|
||||
twitchfilm.net/news/
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Twitchfilm(BasicNewsRecipe):
|
||||
title = 'Twitch Films'
|
||||
@ -15,29 +12,46 @@ class Twitchfilm(BasicNewsRecipe):
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Twitch'
|
||||
masthead_url = 'http://twitchfilm.com/img/logo.png'
|
||||
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
|
||||
language = 'en'
|
||||
|
||||
lang = 'en-US'
|
||||
language = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
|
||||
keep_only_tags=[dict(attrs={'class':'asset-header'})]
|
||||
remove_tags_after=dict(attrs={'class':'asset-body'})
|
||||
remove_tags = [ dict(name='div', attrs={'class':['social','categories']})
|
||||
, dict(attrs={'id':'main-asset'})
|
||||
, dict(name=['meta','link','iframe','embed','object'])
|
||||
]
|
||||
|
||||
feeds = [(u'News', u'http://feedproxy.google.com/TwitchEverything')]
|
||||
feeds = [(u'News', u'http://feeds.twitchfilm.net/TwitchEverything')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
|
||||
soup.head.insert(0,mtag)
|
||||
soup.html['lang'] = self.lang
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
|
16
recipes/ubuntu_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ubuntu_pl(BasicNewsRecipe):
|
||||
title = u'UBUNTU.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'UBUNTU.pl - polish ubuntu community site'
|
||||
cover_url = 'http://ubuntu.pl/img/logo.jpg'
|
||||
category = 'linux, IT'
|
||||
language = 'pl'
|
||||
no_stylesheets = True
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '#main {text-align:left;}'
|
||||
keep_only_tags= [dict(name='td', attrs={'class':'teaser-node-mc'}), dict(name='h3', attrs={'class':'entry-title'}), dict(name='div', attrs={'class':'entry-content'})]
|
||||
remove_tags_after= [dict(name='div' , attrs={'class':'content'})]
|
||||
feeds = [('Czytelnia Ubuntu', 'http://feeds.feedburner.com/ubuntu-czytelnia'), (u'WikiGames', u'http://feeds.feedburner.com/WikiGames')]
|
@ -13,6 +13,7 @@ class USAToday(BasicNewsRecipe):
|
||||
title = 'USA Today'
|
||||
__author__ = 'Kovid Goyal'
|
||||
oldest_article = 1
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ''
|
||||
max_articles_per_feed = 20
|
||||
language = 'en'
|
||||
|
21
recipes/wnp.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||
title = u'WNP'
|
||||
cover_url= 'http://k.wnp.pl/images/wnpLogo.gif'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Wirtualny Nowy Przemysł'
|
||||
category = 'economy'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
keep_only_tags = dict(name='div', attrs={'id':'contentText'})
|
||||
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
||||
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
||||
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
||||
(u'Serwis Hutnictwo', u'http://www.wnp.pl/rss/serwis_rss_3.xml'),
|
||||
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
||||
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
||||
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
@ -94,9 +94,11 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
if date is not None:
|
||||
self.timefmt = ' [%s]'%self.tag_to_string(date)
|
||||
|
||||
cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
|
||||
cov = soup.find('div', attrs={'class':'itpSectionHeaderPdf'})
|
||||
if cov is not None:
|
||||
self.cover_url = cov['href']
|
||||
a = cov.find('a', href=True)
|
||||
if a is not None:
|
||||
self.cover_url = a['href']
|
||||
|
||||
feeds = []
|
||||
div = soup.find('div', attrs={'class':'itpHeader'})
|
||||
|
@ -53,6 +53,12 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
def abs_wsj_url(self, href):
|
||||
if not href.startswith('http'):
|
||||
href = 'http://online.wsj.com' + href
|
||||
return href
|
||||
|
||||
|
||||
def wsj_get_index(self):
|
||||
return self.index_to_soup('http://online.wsj.com/itp')
|
||||
|
||||
@ -83,14 +89,14 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
pageone = a['href'].endswith('pageone')
|
||||
if pageone:
|
||||
title = 'Front Section'
|
||||
url = 'http://online.wsj.com' + a['href']
|
||||
url = self.abs_wsj_url(a['href'])
|
||||
feeds = self.wsj_add_feed(feeds,title,url)
|
||||
title = 'What''s News'
|
||||
url = url.replace('pageone','whatsnews')
|
||||
feeds = self.wsj_add_feed(feeds,title,url)
|
||||
else:
|
||||
title = self.tag_to_string(a)
|
||||
url = 'http://online.wsj.com' + a['href']
|
||||
url = self.abs_wsj_url(a['href'])
|
||||
feeds = self.wsj_add_feed(feeds,title,url)
|
||||
return feeds
|
||||
|
||||
@ -146,7 +152,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a).strip() + ' [%s]'%meta
|
||||
else:
|
||||
title = self.tag_to_string(a).strip()
|
||||
url = 'http://online.wsj.com'+a['href']
|
||||
url = self.abs_wsj_url(a['href'])
|
||||
desc = ''
|
||||
for p in container.findAll('p'):
|
||||
desc = self.tag_to_string(p)
|
||||
|
52
recipes/yagmur_dergisi.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Yagmur(BasicNewsRecipe):
|
||||
title = u'Yagmur Dergisi'
|
||||
__author__ = u'thomass'
|
||||
description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
|
||||
oldest_article = 90
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
|
||||
#publisher = ' '
|
||||
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
encoding = 'ISO 8859-9'
|
||||
publisher = 'thomass'
|
||||
|
||||
|
||||
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
|
||||
|
||||
#remove_attributes = ['aria-describedby']
|
||||
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
|
||||
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
|
||||
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
|
||||
#remove_tags_before = dict(id='content-right')
|
||||
|
||||
|
||||
#remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'Yagmur', u'http://open.dapper.net/services/yagmur'),
|
||||
]
|
||||
|
||||
#def preprocess_html(self, soup):
|
||||
# return self.adeify_images(soup)
|
||||
def print_version(self, url): #there is a probem caused by table format
|
||||
return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')
|
||||
|
52
recipes/yeni_umit_dergisi.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class YeniUmit(BasicNewsRecipe):
|
||||
title = u'Yeni Umit Dergisi'
|
||||
__author__ = u'thomass'
|
||||
description = 'Aylık Dini İlimler ve Kültür Dergisi'
|
||||
oldest_article = 45
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
|
||||
#publisher = ' '
|
||||
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
encoding = 'ISO 8859-9'
|
||||
publisher = 'thomass'
|
||||
|
||||
|
||||
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
|
||||
|
||||
#remove_attributes = ['aria-describedby']
|
||||
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
|
||||
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
|
||||
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
|
||||
#remove_tags_before = dict(id='content-right')
|
||||
|
||||
|
||||
#remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
|
||||
]
|
||||
|
||||
#def preprocess_html(self, soup):
|
||||
# return self.adeify_images(soup)
|
||||
def print_version(self, url): #there is a probem caused by table format
|
||||
return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')
|
||||
|
64
recipes/yenisafak_gazetesi.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Bugun (BasicNewsRecipe):
|
||||
|
||||
title = u'Yenişafak Gazetesi'
|
||||
__author__ = u'thomass'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'ISO 8859-9' #'UTF-8'
|
||||
publisher = 'thomass'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper '
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif'
|
||||
masthead_url = 'http://yenisafak.com.tr/resim/logo.gif'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ]
|
||||
extra_css = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
|
||||
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||
remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
|
||||
|
||||
|
||||
#remove_attributes = ['width','height']
|
||||
remove_empty_feeds= True
|
||||
|
||||
feeds = [
|
||||
( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'),
|
||||
( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'),
|
||||
( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'),
|
||||
( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'),
|
||||
( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'),
|
||||
( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'),
|
||||
( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'),
|
||||
( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'),
|
||||
( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'),
|
||||
( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'),
|
||||
( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'),
|
||||
( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'),
|
||||
( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'),
|
||||
( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'),
|
||||
( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'),
|
||||
( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'),
|
||||
( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'),
|
||||
( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'),
|
||||
( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'),
|
||||
( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'),
|
||||
( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'),
|
||||
|
||||
|
||||
|
||||
]
|
@ -61,11 +61,26 @@ authors_completer_append_separator = False
|
||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||
# The author name suffixes are words that are ignored when they occur at the
|
||||
# end of an author name. The case of the suffix is ignored and trailing
|
||||
# periods are automatically handled.
|
||||
# periods are automatically handled. The same is true for prefixes.
|
||||
# The author name copy words are a set of words which if they occur in an
|
||||
# author name cause the automatically generated author sort string to be
|
||||
# identical to the author name. This means that the sort for a string like Acme
|
||||
# Inc. will be Acme Inc. instead of Inc., Acme
|
||||
author_sort_copy_method = 'comma'
|
||||
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
||||
'MD', 'M.D', 'I', 'II', 'III', 'IV',
|
||||
'Junior', 'Senior')
|
||||
author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
|
||||
author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
|
||||
'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
|
||||
|
||||
#: Splitting multiple author names
|
||||
# By default, calibre splits a string containing multiple author names on
|
||||
# ampersands and the words "and" and "with". You can customize the splitting
|
||||
# by changing the regular expression below. Strings are split on whatever the
|
||||
# specified regular expression matches.
|
||||
# Default: r'(?i),?\s+(and|with)\s+'
|
||||
authors_split_regex = r'(?i),?\s+(and|with)\s+'
|
||||
|
||||
#: Use author sort in Tag Browser
|
||||
# Set which author field to display in the tags pane (the list of authors,
|
||||
@ -181,7 +196,7 @@ save_template_title_series_sorting = 'library_order'
|
||||
# To disable use the expression: '^$'
|
||||
# This expression is designed for articles that are followed by spaces. If you
|
||||
# also need to match articles that are followed by other characters, for example L'
|
||||
# in French, use: r"^(A\s+|The\s+|An\s+|L')" instead.
|
||||
# in French, use: "^(A\s+|The\s+|An\s+|L')" instead.
|
||||
# Default: '^(A|The|An)\s+'
|
||||
title_sort_articles=r'^(A|The|An)\s+'
|
||||
|
||||
|
BIN
resources/images/languages.png
Normal file
After Width: | Height: | Size: 18 KiB |
@ -98,7 +98,7 @@
|
||||
<xsl:apply-templates/>
|
||||
</emph>
|
||||
</xsl:when>
|
||||
<xsl:when test = "@underlined">
|
||||
<xsl:when test = "@underlined and @underlined != 'false'">
|
||||
<emph rend = "paragraph-emph-underlined">
|
||||
<xsl:apply-templates/>
|
||||
</emph>
|
||||
@ -220,7 +220,7 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="parse-styles-attrs">
|
||||
<!--<xsl:text>position:relative;</xsl:text>-->
|
||||
<!--<xsl:text>position:relative;</xsl:text>
|
||||
<xsl:if test="@space-before">
|
||||
<xsl:text>padding-top:</xsl:text>
|
||||
<xsl:value-of select="@space-before"/>
|
||||
@ -230,7 +230,7 @@
|
||||
<xsl:text>padding-bottom:</xsl:text>
|
||||
<xsl:value-of select="@space-after"/>
|
||||
<xsl:text>pt;</xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:if>-->
|
||||
<xsl:if test="@left-indent">
|
||||
<xsl:text>padding-left:</xsl:text>
|
||||
<xsl:value-of select="@left-indent"/>
|
||||
@ -256,15 +256,15 @@
|
||||
<xsl:value-of select="'italic'"/>
|
||||
<xsl:text>;</xsl:text>
|
||||
</xsl:if>
|
||||
<xsl:if test="@underline and @underline != 'false'">
|
||||
<xsl:if test="@underlined and @underlined != 'false'">
|
||||
<xsl:text>text-decoration:underline</xsl:text>
|
||||
<xsl:text>;</xsl:text>
|
||||
</xsl:if>
|
||||
<xsl:if test="@line-spacing">
|
||||
<!--<xsl:if test="@line-spacing">
|
||||
<xsl:text>line-height:</xsl:text>
|
||||
<xsl:value-of select="@line-spacing"/>
|
||||
<xsl:text>pt;</xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:if>-->
|
||||
<xsl:if test="(@align = 'just')">
|
||||
<xsl:text>text-align: justify;</xsl:text>
|
||||
</xsl:if>
|
||||
@ -314,7 +314,6 @@
|
||||
</xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
@ -446,8 +445,15 @@
|
||||
|
||||
<xsl:template match = "rtf:field[@type='hyperlink']">
|
||||
<xsl:element name ="a">
|
||||
<xsl:attribute name = "href">
|
||||
<xsl:value-of select = "@link"/>
|
||||
<xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match = "rtf:field[@type='bookmark-start']">
|
||||
<xsl:element name ="a">
|
||||
<xsl:attribute name = "id">
|
||||
<xsl:value-of select = "@number"/>
|
||||
</xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
|
@ -63,10 +63,10 @@ class Check(Command):
|
||||
for f in x[-1]:
|
||||
y = self.j(x[0], f)
|
||||
mtime = os.stat(y).st_mtime
|
||||
if f.endswith('.py') and f not in ('ptempfile.py', 'feedparser.py',
|
||||
'pyparsing.py', 'markdown.py') and \
|
||||
'genshi' not in y and cache.get(y, 0) != mtime and \
|
||||
'prs500/driver.py' not in y:
|
||||
if (f.endswith('.py') and f not in ('ptempfile.py', 'feedparser.py',
|
||||
'pyparsing.py', 'markdown.py') and
|
||||
'genshi' not in y and cache.get(y, 0) != mtime and
|
||||
'prs500/driver.py' not in y):
|
||||
yield y, mtime
|
||||
|
||||
for x in os.walk(self.j(self.d(self.SRC), 'recipes')):
|
||||
|
@ -17,8 +17,8 @@ class GUI(Command):
|
||||
|
||||
@classmethod
|
||||
def find_forms(cls):
|
||||
from calibre.gui2 import find_forms
|
||||
return find_forms(cls.SRC)
|
||||
# We do not use the calibre function find_forms as
|
||||
# mporting calibre.gui2 may not work
|
||||
forms = []
|
||||
for root, _, files in os.walk(cls.PATH):
|
||||
for name in files:
|
||||
@ -29,8 +29,9 @@ class GUI(Command):
|
||||
|
||||
@classmethod
|
||||
def form_to_compiled_form(cls, form):
|
||||
from calibre.gui2 import form_to_compiled_form
|
||||
return form_to_compiled_form(form)
|
||||
# We do not use the calibre function form_to_compiled_form as
|
||||
# importing calibre.gui2 may not work
|
||||
return form.rpartition('.')[0]+'_ui.py'
|
||||
|
||||
def run(self, opts):
|
||||
self.build_forms()
|
||||
|
@ -55,7 +55,7 @@ class Develop(Command):
|
||||
short_description = 'Setup a development environment for calibre'
|
||||
MODE = 0755
|
||||
|
||||
sub_commands = ['build', 'resources', 'gui']
|
||||
sub_commands = ['build', 'resources', 'iso639', 'gui',]
|
||||
|
||||
def add_postinstall_options(self, parser):
|
||||
parser.add_option('--make-errors-fatal', action='store_true', default=False,
|
||||
|