sync with Kovid's branch
592
Changelog.yaml
@ -5,7 +5,7 @@
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
# - version: ?.?.?
|
||||
# date: 2012-??-??
|
||||
# date: 2013-??-??
|
||||
#
|
||||
# new features:
|
||||
# - title:
|
||||
@ -19,6 +19,596 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.9.19
|
||||
date: 2013-02-15
|
||||
|
||||
new features:
|
||||
- title: "New tool: \"Polish books\" that allows you to perform various automated cleanup actions on EPUB and AZW3 files without doing a full conversion."
|
||||
type: major
|
||||
description: "Polishing books is all about putting the shine of perfection on your ebook files. You can use it to subset embedded fonts, update the metadata in the book files from the metadata in the calibre library, manipulate the book jacket, etc. More features will be added in the future. To use this tool, go to Preferences->Toolbar and add the Polish books tool to the main toolbar. Then simply select the books you want to be polished and click the Polish books button. Polishing, unlike conversion, does not change the internal structure/markup of your book, it performs only the minimal set of actions needed to achieve its goals. Note that polish books is a completely new codebase, so there may well be bugs, polishing a book backs up the original as ORIGINAL_EPUB or ORIGINAL_AZW3, unless you have turned off this feature in Preferences->Tweaks, in which case you should backup your files manually. You can also use this tool from the command line with ebook-polish.exe."
|
||||
|
||||
- title: "Driver for the Trekstor Pyrus Mini."
|
||||
tickets: [1124120]
|
||||
|
||||
- title: "E-book viewer: Add an option to change the minimum font size."
|
||||
tickets: [1122333]
|
||||
|
||||
- title: "PDF Output: Add support for converting documents with math typesetting, as described here: http://manual.calibre-ebook.com/typesetting_math.html"
|
||||
|
||||
- title: "Column coloring/icons: Add more conditions when using date based columns with reference to 'today'."
|
||||
|
||||
bug fixes:
|
||||
- title: "Transforming to titlecase - handle typographic hyphens in all caps phrases"
|
||||
|
||||
- title: "Dont ignore file open events that occur before the GUI is initialized on OS X"
|
||||
tickets: [1122713]
|
||||
|
||||
- title: "News download: Handle feeds that have entries with empty ids"
|
||||
|
||||
- title: "Fix a regression that broke using the template editor"
|
||||
|
||||
- title: "Do not block startup while scanning the computer for available network interfaces. Speeds up startup time on some windows computers with lots of spurious network interfaces."
|
||||
|
||||
improved recipes:
|
||||
- New Yorker
|
||||
- Kommersant
|
||||
- Le Monde (Subscription version)
|
||||
- NZ Herald
|
||||
|
||||
new recipes:
|
||||
- title: Navegalo
|
||||
author: Douglas Delgado
|
||||
|
||||
- title: El Guardian and More Intelligent Life
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.9.18
|
||||
date: 2013-02-08
|
||||
|
||||
new features:
|
||||
- title: "New metadata source: Edelweiss, a catalog of books that is updated directly by publishers. To enable it, go to Preferences->Metadata download and enable the Edelweiss plugin."
|
||||
tickets: [1091073]
|
||||
|
||||
- title: "Add an option to add extra spacing between rows in the book list. (Preferences->Look & Feel)"
|
||||
tickets: [1117907]
|
||||
|
||||
- title: "Column coloring/icons: Add a 'days ago' condition, useable with columns that store dates to set colors/icons based on the number of days before today"
|
||||
|
||||
- title: "E-book viewer: Add shortcuts Ctrl+= and Ctrl+- to increase/decrease text size."
|
||||
tickets: [ 1117524 ]
|
||||
|
||||
- title: "When showing possible duplicates after adding books, also show the file formats."
|
||||
|
||||
- title: "Driver for Trekstor Ventos Tablet"
|
||||
|
||||
bug fixes:
|
||||
- title: "Conversion: When transliterating unicode characters, handle « and » correctly."
|
||||
tickets: [1117270]
|
||||
|
||||
- title: "Fix adding books from multiple directories with multiple books per directory treating opf files as an ebook"
|
||||
|
||||
- title: "Fix download metadata window not resizable on smaller screens"
|
||||
tickets: [1116849]
|
||||
|
||||
- title: "Tweak Book: When rebuilding azw3 files handle <a> tags that have name but not id attribute, these are apparently produced by kindlegen."
|
||||
tickets: [ 1112934 ]
|
||||
|
||||
- title: "Fix regression in advanced column color rules."
|
||||
tickets: [1118678]
|
||||
|
||||
improved recipes:
|
||||
- El Mundo today
|
||||
- fluter.de
|
||||
- Birmingham Post
|
||||
- Japan Times
|
||||
- The Toronto Star
|
||||
- Le Monde (subscription version)
|
||||
- Globe and Mail
|
||||
|
||||
new recipes:
|
||||
- title: VICE Magazine Deutschland
|
||||
author: Alex
|
||||
|
||||
- title: Libertad Digital
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.9.17
|
||||
date: 2013-02-01
|
||||
|
||||
new features:
|
||||
- title: "Allow adding user specified icons to the main book list for books whose metadata matches specific criteria. Go to Preferences->Look & Feel->Column icons to setup these icons. They work in the same way as the column coloring rules."
|
||||
type: major
|
||||
|
||||
- title: "Allow choosing which page of a PDF to use as the cover."
|
||||
description: "To access this functionality add the PDF to calibre then click the edit metadata button. In the top right area of the edit metadata dialog there is a button to get the cover from the ebook file, this will now allow you to choose which page (from the first ten pages) of the pdf to use as the cover."
|
||||
tickets: [1110019]
|
||||
|
||||
- title: "Add option to turn off reflections in the cover browser (Preferences->Look & Feel->Cover Browser)"
|
||||
|
||||
- title: "PDF Output: Add an option to add page numbers to the bottom of every page in the generated PDF file (look in the PDF Output section of the conversion dialog)"
|
||||
|
||||
- title: "Add the full item name to the tool tip of a leaf item displayed in the tag browser."
|
||||
tickets: [1106231]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix out-of-bounds data causing errors in the Tag Browser"
|
||||
tickets: [1108017]
|
||||
|
||||
- title: "Conversion: Handle input documents that use multiple prefixes referring to the XHTML namespace correctly."
|
||||
tickets: [1107220]
|
||||
|
||||
- title: "PDF Output: Fix regression that caused some svg images to be rendered as black rectangles."
|
||||
tickets: [1105294]
|
||||
|
||||
- title: "Metadata download: Only normalize title case if the result has no language set or its language is English"
|
||||
|
||||
improved recipes:
|
||||
- Baltimore Sun
|
||||
- Harvard Business Review
|
||||
- Victoria Times
|
||||
- South China Morning Post
|
||||
- Volksrant
|
||||
- Seattle Times
|
||||
|
||||
new recipes:
|
||||
- title: Dob NeviNosti
|
||||
author: Darko Miletic
|
||||
|
||||
- title: La Nacion (CR)
|
||||
author: Douglas Delgado
|
||||
|
||||
- version: 0.9.16
|
||||
date: 2013-01-25
|
||||
|
||||
new features:
|
||||
- title: "News download: Add support for logging in to sites that require javascript for their logins."
|
||||
tickets: [1101809]
|
||||
|
||||
- title: "News download: Do not convert all downloaded images to JPG format. This fixes the problem of PNG images with transparent backgrounds being rendered with black backgrounds"
|
||||
|
||||
- title: "CHM Input: Support hierarchical table of contents. Do not generate an inline table of contents when a metadata table of contents is present. Also correctly decode the text in the table of contents"
|
||||
|
||||
- title: "Get Books: Add the beam-ebooks.de store"
|
||||
|
||||
- title: "Make custom yes/no columns using icons put text values under the icons."
|
||||
|
||||
- title: "Driver for LG E400 and SayCoolA710"
|
||||
tickets: [1103741,1104528]
|
||||
|
||||
- title: "Speed up device connection when there are lots of books on the device by not generating cover thumbnails unless they are actually needed."
|
||||
|
||||
- title: "Have the metadata download dialog remember its last used size."
|
||||
tickets: [1101150]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix deleting a custom column that was used in a column coloring rule makes the column coloring preferences panel unusable"
|
||||
tickets: [1103504]
|
||||
|
||||
- title: "Store caches outside the config directory for non-portable calibre installs"
|
||||
|
||||
- title: "PDF Output: Dont crash if the user has a font on his system that is missing the OS/2 table"
|
||||
tickets: [1102403]
|
||||
|
||||
- title: "Conversion: Do not error out because of an error in user supplied search replace rules."
|
||||
tickets: [1102647]
|
||||
|
||||
- title: "Conversion: Replace all non-ascii characters in CSS class names, as they cause problems with some broken EPUB renderers."
|
||||
tickets: [1102587]
|
||||
|
||||
- title: "Do not choke when reading metadata from MOBI files with incorrectly encoded metadata fields"
|
||||
|
||||
- title: "Conversion: Preserve ToC entries that point nowhere instead of causing them to point to a non-existent file"
|
||||
|
||||
- title: "E-book viewer: Allow entries in the Table of Contents that do not point anywhere, instead of just ignoring them."
|
||||
|
||||
- title: "Content server: Fix the 'Previous' link in the mobile version of the content server webpage skipping an entry"
|
||||
tickets: [1101124]
|
||||
|
||||
improved recipes:
|
||||
- TSN
|
||||
- St. Louis Post Dispatch
|
||||
- Metro UK
|
||||
- Michelle Malkin
|
||||
- Barrons
|
||||
|
||||
new recipes:
|
||||
- title: Contemporary Argentine Writers
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.9.15
|
||||
date: 2013-01-18
|
||||
|
||||
new features:
|
||||
- title: "Linux MTP driver: Detect devices that have MTP interfaces even if their USB ids are not known"
|
||||
|
||||
- title: "Content server: Allow picking a random book by clicking the 'Random book' link on the start page. You can also refresh the random book page to get a new random book"
|
||||
|
||||
- title: "E-book viewer: Add an option to hide the toolbars in the viewer window (Preferences->Miscellaneous->Show controls in the viewr preferences). You can unhide them by right clicking in the viewer window."
|
||||
|
||||
- title: "Kobo driver: Speedup initial connect by avoiding unnecessary update of series metadata in some situations."
|
||||
tickets: [1099190]
|
||||
|
||||
- title: "Get Books: Allow the store plugins to be dynamically loaded so that future website changes of a store dont require a calibre update to fix Get Books."
|
||||
|
||||
- title: "Wireless driver: Always replace file when resending a previously sent book to the device, even if the title/author have changed."
|
||||
|
||||
- title: "Add PocketBook Pro 912 driver."
|
||||
tickets: [1099571]
|
||||
|
||||
- title: "When creating/exporting epub and mobi files, add the calibre book identifier as a special field in the book's metadata. This allows third party tools to identify the book record in calibre to which the file belongs."
|
||||
|
||||
- title: "Wireless driver: Add support for using the book uuid as the filename"
|
||||
|
||||
- title: "Remove the experimental tag from the subset fonts feature, since there has been only one reported problem (now fixed) with it in the two months since it was released"
|
||||
|
||||
bug fixes:
|
||||
- title: "Get Books: Update the amazon, waterstones and libri.de plugins to account for website changes"
|
||||
|
||||
- title: "MOBI Input: Do not choke on MOBI files with incorrectly encoded titles."
|
||||
tickets: [1100601]
|
||||
|
||||
- title: "Font subsetting: Fix a bug in the parsing of the GSUB table that could cause some ligatures to not be included in the subset font"
|
||||
|
||||
- title: "E-book-viewer: Fix TOC links without anchors not scrolling to the top of the current flow"
|
||||
|
||||
- title: "LIT Input: Handle lit files that set an incorrect XML mimetype for their text."
|
||||
tickets: [1099621]
|
||||
|
||||
- title: "Catalogs: Fix 'X' being droppen from isbns on export"
|
||||
tickets: [1098325]
|
||||
|
||||
- title: "Fix an error when editing date in the main book list and all visible dates are blank."
|
||||
tickets: [1098675]
|
||||
|
||||
- title: "Fix calibre-smtp using incorrect escaping for non-ascii attachment filenames"
|
||||
tickets: [1098478]
|
||||
|
||||
- title: "Conversion: When subsetting fonts, handle multiple @font-face rules referring to the same physical font"
|
||||
|
||||
- title: "Content server: Update metadata when serving azw3 files"
|
||||
|
||||
- title: "CHM Input: Handle chm files that contain files with url unsafe filenames."
|
||||
tickets: [1100610]
|
||||
|
||||
- title: "Content server: Fix custom icons for top level categories incorrect."
|
||||
tickets: [1095016]
|
||||
|
||||
- title: "Kobo driver: When resending a file to the device, update the filesize in the Kobo db to prevent the device from deleting the file."
|
||||
tickets: [1100607]
|
||||
|
||||
improved recipes:
|
||||
- The Chronicle of Higher Education
|
||||
- Smithsonian Magazine
|
||||
- Philosophy Now
|
||||
- The Economist
|
||||
- Business Week Magazine
|
||||
|
||||
new recipes:
|
||||
- title: Asco de Vida
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: Schattenblick
|
||||
author: ThB
|
||||
|
||||
- version: 0.9.14
|
||||
date: 2013-01-11
|
||||
|
||||
new features:
|
||||
- title: "When adding multiple books and duplicates are found, allow the user to select which of the duplicate books will be added anyway."
|
||||
tickets: [1095256]
|
||||
|
||||
- title: "Device drivers for Kobo Arc on linux, Polaroid Android tablet"
|
||||
tickets: [1098049]
|
||||
|
||||
- title: "When sorting by series, use the language of the book to decide what leading articles to remove, just as is done for sorting by title"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Do not error out when the input document contains links with anchors not present in the document."
|
||||
tickets: [1096428]
|
||||
|
||||
- title: "Add support for upgraded db on newest Kobo firmware"
|
||||
tickets: [1095617]
|
||||
|
||||
- title: "PDF Output: Fix typo that broke use of custom paper sizes."
|
||||
tickets: [1097563]
|
||||
|
||||
- title: "PDF Output: Handle empty anchors present at the end of a page"
|
||||
|
||||
- title: "PDF Output: Fix side margins of last page in a flow being incorrect when large side margins are used."
|
||||
tickets: [1096290]
|
||||
|
||||
- title: "Edit metadata dialog: Allow setting the series number for custom series type columns to zero"
|
||||
|
||||
- title: "When bulk editing custom series-type columns and not provding a series number use 1 as the default, instead of None"
|
||||
|
||||
- title: "Catalogs: Fix issue with catalog generation using Hungarian UI and author_sort beginning with multiple letter groups."
|
||||
tickets: [1091581]
|
||||
|
||||
- title: "PDF Output: Dont error out on files that have invalid font-family declarations."
|
||||
tickets: [1096279]
|
||||
|
||||
- title: "Do not load QRawFont at global level, to allow calibre installation on systems with missing dependencies"
|
||||
tickets: [1096170]
|
||||
|
||||
- title: "PDF Output: Fix cover not present in generated PDF files"
|
||||
tickets: [1096098]
|
||||
|
||||
improved recipes:
|
||||
- Sueddeutsche Zeitung mobil
|
||||
- Boerse Online
|
||||
- TidBits
|
||||
- New York Review of Books
|
||||
- Fleshbot
|
||||
- Il Messaggero
|
||||
- Libero
|
||||
|
||||
new recipes:
|
||||
- title: Spectator Magazine, Oxford Mail and Outside Magazine
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: Libartes
|
||||
author: Darko Miletic
|
||||
|
||||
- title: El Diplo
|
||||
author: Tomas De Domenico
|
||||
|
||||
- version: 0.9.13
|
||||
date: 2013-01-04
|
||||
|
||||
new features:
|
||||
- title: "Complete rewrite of the PDF Output engine, to support links and fix various bugs"
|
||||
type: major
|
||||
description: "calibre now has a new PDF output engine that supports links in the text. It also fixes various bugs, detailed below. In order to implement support for links and fix these bugs, the engine had to be completely rewritten, so there may be some regressions."
|
||||
|
||||
- title: "Show disabled device plugins in Preferences->Ignored Devices"
|
||||
|
||||
- title: "Get Books: Fix Smashwords, Google books and B&N stores. Add Nook UK store"
|
||||
|
||||
- title: "Allow series numbers lower than -100 for custom series columns."
|
||||
tickets: [1094475]
|
||||
|
||||
- title: "Add mass storage driver for rockhip based android smart phones"
|
||||
tickets: [1087809]
|
||||
|
||||
- title: "Add a clear ratings button to the edit metadata dialog"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Fix custom page sizes not working on OS X"
|
||||
|
||||
- title: "PDF Output: Fix embedding of many fonts not supported (note that embedding of OpenType fonts with Postscript outlines is still not supported on windows, though it is supported on other operating systems)"
|
||||
|
||||
- title: "PDF Output: Fix crashes converting some books to PDF on OS X"
|
||||
tickets: [1087688]
|
||||
|
||||
- title: "HTML Input: Handle entities inside href attributes when following the links in an HTML file."
|
||||
tickets: [1094203]
|
||||
|
||||
- title: "Content server: Fix custom icons not used for sub categories"
|
||||
tickets: [1095016]
|
||||
|
||||
- title: "Force use of non-unicode constants in compiled templates. Fixes a problem with regular expression character classes and probably other things."
|
||||
|
||||
- title: "Kobo driver: Do not error out if there are invalid dates in the device database"
|
||||
tickets: [1094597]
|
||||
|
||||
- title: "Content server: Fix for non-unicode hostnames when using mDNS"
|
||||
tickets: [1094063]
|
||||
|
||||
improved recipes:
|
||||
- Today's Zaman
|
||||
- The Economist
|
||||
- Foreign Affairs
|
||||
- New York Times
|
||||
- Alternet
|
||||
- Harper's Magazine
|
||||
- La Stampa
|
||||
|
||||
- version: 0.9.12
|
||||
date: 2012-12-28
|
||||
|
||||
new features:
|
||||
- title: "Drivers for Kibano e-reader and Slick ER-700-2"
|
||||
tickets: [1093570, 1093732]
|
||||
|
||||
- title: "Add support for downloading metadata from Amazon Brazil."
|
||||
tickets: [1092594]
|
||||
|
||||
- title: "Copy to library: Allow specifying the destination library by path."
|
||||
tickets: [1093231]
|
||||
|
||||
- title: "When adding empty books, allow setting of the series for the new books. Also select the newly added book records after adding."
|
||||
|
||||
- title: "PDF Output: Add a checkbox to override the page size defined by the output profile. This allows you to specify a custom page size even if the output profile is not set to default."
|
||||
|
||||
- title: "Add usb ids for newer kindle fire to the linux mtp driver"
|
||||
|
||||
bug fixes:
|
||||
- title: "Linux: Temporarily redirect stdout to get rid of the annoying and pointless message about mtpz during libmtp initialization"
|
||||
|
||||
- title: "Fix multiple 'All column' coloring rules not being applied"
|
||||
tickets: [1093574]
|
||||
|
||||
- title: "Use custom icons in the content server as well."
|
||||
tickets: [1092098]
|
||||
|
||||
improved recipes:
|
||||
- La Voce
|
||||
- Harpers Magazine (printed edition)
|
||||
- Pajamas Media
|
||||
- NSFW corp
|
||||
- The Hindu
|
||||
- Nikkei News
|
||||
|
||||
new recipes:
|
||||
- title: Various Ukranian news sources
|
||||
author: rpalyvoda
|
||||
|
||||
- version: 0.9.11
|
||||
date: 2012-12-21
|
||||
|
||||
new features:
|
||||
- title: "Merry Christmas and Happy Holidays to all ☺"
|
||||
|
||||
- title: "When connecting to MTP devices such as the Kindle Fire HD or the Nook HD, speed up the process by ignoring some folders."
|
||||
description: "calibre will now ignore folders for music, video, pictures, etc. when scanning the device. This can substantially speed up the connection process if you have thousands of non-ebook files on the device. The list of folders to be ignored can be customized by right clicking on the device icon in calibre and selecting 'Configure this device'."
|
||||
|
||||
- title: "Allow changing the icons for categories in the Tag Browser. Right click on a category and choose 'Change category icon'."
|
||||
tickets: [1092098]
|
||||
|
||||
- title: "Allow setting the color of all columns with a single rule in Preferences->Look & Feel->Column Coloring"
|
||||
|
||||
- title: "MOBI: When reading metadata from mobi files, put the contents of the ASIN field into an identifier named mobi-asin. Note that this value is not used when downloading metadata as it is not possible to know which (country specific) amazon website the ASIN comes from."
|
||||
tickets: [1090394]
|
||||
|
||||
bug fixes:
|
||||
- title: "Windows build: Fix a regression in 0.9.9 that caused calibre to not start on some windows system that were missing the VC.90 dlls (some older XP systems)"
|
||||
|
||||
- title: "Kobo driver: Workaround for invalid shelves created by bugs in the Kobo server"
|
||||
tickets: [1091932]
|
||||
|
||||
- title: "Metadata download: Fix cover downloading from non-US amazon sites broken by a website change."
|
||||
tickets: [1090765]
|
||||
|
||||
improved recipes:
|
||||
- Le Devoir
|
||||
- Nin online
|
||||
- countryfile
|
||||
- Birmingham Post
|
||||
- The Independent
|
||||
- Various Polish news sources
|
||||
|
||||
new recipes:
|
||||
- title: MobileBulgaria
|
||||
author: Martin Tsanchev
|
||||
|
||||
- title: Various Polish news sources
|
||||
author: fenuks
|
||||
|
||||
- version: 0.9.10
|
||||
date: 2012-12-14
|
||||
|
||||
new features:
|
||||
- title: "Drivers for Nextbook Premium 8 se, HTC Desire X and Emerson EM 543"
|
||||
tickets: [1088149, 1088112, 1087978]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix rich text delegate not working with Qt compiled in debug mode."
|
||||
tickets: [1089011]
|
||||
|
||||
- title: "When deleting all books in the library, blank the book details panel"
|
||||
|
||||
- title: "Conversion: Fix malformed values in the bgcolor attribute causing conversion to abort"
|
||||
|
||||
- title: "Conversion: Fix heuristics applying incorrect style in some circumstances"
|
||||
tickets: [1066507]
|
||||
|
||||
- title: "Possible fix for 64bit calibre not starting up on some Windows systems"
|
||||
tickets: [1087816]
|
||||
|
||||
improved recipes:
|
||||
- Sivil Dusunce
|
||||
- Anchorage Daily News
|
||||
- Le Monde
|
||||
- Harpers
|
||||
|
||||
new recipes:
|
||||
- title: Titanic
|
||||
author: Krittika Goyal
|
||||
|
||||
- version: 0.9.9
|
||||
date: 2012-12-07
|
||||
|
||||
new features:
|
||||
- title: "64 bit build for windows"
|
||||
type: major
|
||||
description: "calibre now has a 64 bit version for windows, available at: http://calibre-ebook.com/download_windows64 The 64bit build is not limited to using only 3GB of RAM when converting large/complex documents. It may also be slightly faster for some tasks. You can have both the 32 bit and the 64 bit build installed at the same time, they will use the same libraries, plugins and settings."
|
||||
|
||||
- title: "Content server: Make the identifiers in each books metadata clickable."
|
||||
tickets: [1085726]
|
||||
|
||||
bug fixes:
|
||||
- title: "EPUB Input: Fix an infinite loop while trying to recover a damaged EPUB file."
|
||||
tickets: [1086917]
|
||||
|
||||
- title: "KF8 Input: Fix handling of links in files that link to the obsolete <a name> tags instead of tags with an id attribute."
|
||||
tickets: [1086705]
|
||||
|
||||
- title: "Conversion: Fix a bug in removal of invalid entries from the spine, where not all invalid entries were removed, causing conversion to fail."
|
||||
tickets: [1086054]
|
||||
|
||||
- title: "KF8 Input: Ignore invalid flow references in the KF8 document instead of erroring out on them."
|
||||
tickets: [1085306]
|
||||
|
||||
- title: "Fix command line output on linux systems with incorrect LANG/LC_TYPE env vars."
|
||||
tickets: [1085103]
|
||||
|
||||
- title: "KF8 Input: Fix page breaks specified using the data-AmznPageBreak attribute being ignored by calibre."
|
||||
|
||||
- title: "PDF Output: Fix custom size field not accepting fractional numbers as sizes"
|
||||
|
||||
- title: "Get Books: Update libre.de and publio for website changes"
|
||||
|
||||
- title: "Wireless driver: Increase timeout interval, and when allocating a random port try 9090 first"
|
||||
|
||||
improved recipes:
|
||||
- New York Times
|
||||
- Weblogs SL
|
||||
- Zaman Gazetesi
|
||||
- Aksiyon Dergisi
|
||||
- Endgadget
|
||||
- Metro UK
|
||||
- Heise Online
|
||||
|
||||
- version: 0.9.8
|
||||
date: 2012-11-30
|
||||
|
||||
new features:
|
||||
- title: "Add an option to show the cover size in the book details panel on the right. Option is in Preferences->Look & Feel->Book Details"
|
||||
|
||||
- title: "Kobo driver: Add support for firmware 2.2. Also add an option to send series information to the device."
|
||||
description: "The newest Kobo firmware can display series information. Unfortunately, the Kobo does not read this information from the ebook file itself. It has to be sent separately after the Kobo has finished processing the new files. So you might have to connect - send books - disconnect and then re-connect for the series infor to show up. Fixes #1084388 (Add support for series on Kobo devices)"
|
||||
|
||||
- title: "Catalogs: Allow using custom columns as the source for Genres when generating catalogs"
|
||||
|
||||
- title: "When the user asks calibre to convert a book, show a small animation to highlight that the convert job has been queued to run in the background"
|
||||
|
||||
- title: "Add support for the notification center in OS X 10.8"
|
||||
|
||||
- title: "calibredb: Add an option to specify the cover to use when adding books with calibredb add."
|
||||
tickets: [1083932]
|
||||
|
||||
- title: "EPUB Input: Add support for EPUB files with broken central directory records *and* data descriptors"
|
||||
|
||||
- title: "Comic metadata: Support reading metadata from cbr files. Also read the comments and published date info from the metadata."
|
||||
tickets: [1082340]
|
||||
|
||||
- title: "Speed up processing of RAR and CBR files by avoiding an extra file copy"
|
||||
|
||||
- title: "Add driver for Nexus 10 on linux."
|
||||
tickets: [1082563]
|
||||
|
||||
bug fixes:
|
||||
- title: "KF8 Input: Handle invalid KF8 files with links pointing to non-existent locations and incorrect values in the div table."
|
||||
tickets: [1082669]
|
||||
|
||||
- title: "Viewer: Fix handling of empty self closing tags."
|
||||
tickets: [1083278]
|
||||
|
||||
- title: "Fix use of {formats} in save to disk templates. Fix some formatter functions causing plugboards to not validate."
|
||||
|
||||
- title: "Fix calibre quitting when minimized to system tray and an update available message is shown and then closed."
|
||||
tickets: [1082630]
|
||||
|
||||
- title: "Viewer: Fix vertical margin at the top of the first page of a chapter incorrect in a certain rare circumstance (first child of body being an empty paragraph)."
|
||||
tickets: [1082640]
|
||||
|
||||
- title: "E-book viewer: Fix bug that caused the default language for hyphenation to be ignored for books that do not specify a language"
|
||||
|
||||
improved recipes:
|
||||
- Pro Physik
|
||||
- Aachener Nachrichten
|
||||
- Science News
|
||||
|
||||
- version: 0.9.7
|
||||
date: 2012-11-23
|
||||
|
||||
|
10
README
@ -1,7 +1,7 @@
|
||||
calibre is an e-book library manager. It can view, convert and catalog e-books \
|
||||
in most of the major e-book formats. It can also talk to e-book reader \
|
||||
devices. It can go out to the internet and fetch metadata for your books. \
|
||||
It can download newspapers and convert them into e-books for convenient \
|
||||
calibre is an e-book library manager. It can view, convert and catalog e-books
|
||||
in most of the major e-book formats. It can also talk to e-book reader
|
||||
devices. It can go out to the internet and fetch metadata for your books.
|
||||
It can download newspapers and convert them into e-books for convenient
|
||||
reading. It is cross platform, running on Linux, Windows and OS X.
|
||||
|
||||
For screenshots: https://calibre-ebook.com/demo
|
||||
@ -15,5 +15,5 @@ bzr branch lp:calibre
|
||||
To update your copy of the source code:
|
||||
bzr merge
|
||||
|
||||
Tarballs of the source code for each release are now available \
|
||||
Tarballs of the source code for each release are now available
|
||||
at http://code.google.com/p/calibre-ebook
|
||||
|
366
imgsrc/polish.svg
Normal file
@ -0,0 +1,366 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://web.resource.org/cc/"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="48"
|
||||
height="48"
|
||||
id="svg2"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.45"
|
||||
version="1.0"
|
||||
sodipodi:docname="edit-clear.svg"
|
||||
inkscape:output_extension="org.inkscape.output.svg.inkscape"
|
||||
sodipodi:docbase="/home/dobey/Projects/gnome-icon-theme/scalable/actions">
|
||||
<defs
|
||||
id="defs4">
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient6019">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop6021" />
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop6023" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5999">
|
||||
<stop
|
||||
style="stop-color:#c4a000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop6001" />
|
||||
<stop
|
||||
style="stop-color:#c4a000;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop6003" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5987">
|
||||
<stop
|
||||
style="stop-color:#d7c20f;stop-opacity:1"
|
||||
offset="0"
|
||||
id="stop5989" />
|
||||
<stop
|
||||
style="stop-color:#b6970d;stop-opacity:1"
|
||||
offset="1"
|
||||
id="stop5991" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
id="linearGradient5981"
|
||||
inkscape:collect="always">
|
||||
<stop
|
||||
id="stop5983"
|
||||
offset="0"
|
||||
style="stop-color:#ffffff;stop-opacity:1;" />
|
||||
<stop
|
||||
id="stop5985"
|
||||
offset="1"
|
||||
style="stop-color:#ffffff;stop-opacity:0.69411765" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5996">
|
||||
<stop
|
||||
style="stop-color:#8f5902;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5998" />
|
||||
<stop
|
||||
style="stop-color:#73521e;stop-opacity:1"
|
||||
offset="1"
|
||||
id="stop6000" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5984">
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5986" />
|
||||
<stop
|
||||
style="stop-color:#ffffff;stop-opacity:0.13438736"
|
||||
offset="1"
|
||||
id="stop5988" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5974">
|
||||
<stop
|
||||
style="stop-color:#ad7fa8;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5976" />
|
||||
<stop
|
||||
style="stop-color:#dac6d8;stop-opacity:1"
|
||||
offset="1"
|
||||
id="stop5978" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5966">
|
||||
<stop
|
||||
style="stop-color:#fdef72;stop-opacity:1"
|
||||
offset="0"
|
||||
id="stop5968" />
|
||||
<stop
|
||||
style="stop-color:#e2cb0b;stop-opacity:1"
|
||||
offset="1"
|
||||
id="stop5970" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5958">
|
||||
<stop
|
||||
style="stop-color:#c17d11;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5960" />
|
||||
<stop
|
||||
style="stop-color:#e9b96e;stop-opacity:1"
|
||||
offset="1"
|
||||
id="stop5962" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5958"
|
||||
id="linearGradient5964"
|
||||
x1="28"
|
||||
y1="16"
|
||||
x2="26"
|
||||
y2="8"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(-2,0)" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5966"
|
||||
id="linearGradient5972"
|
||||
x1="20.933708"
|
||||
y1="25.060659"
|
||||
x2="30.208115"
|
||||
y2="30.742676"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(-2,0)" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5974"
|
||||
id="linearGradient5980"
|
||||
x1="27.651777"
|
||||
y1="23.145937"
|
||||
x2="21.59099"
|
||||
y2="20.618719"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(0.9768193,0,0,1,-1.3746633,0)" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5984"
|
||||
id="linearGradient5994"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="28"
|
||||
y1="8"
|
||||
x2="33.447109"
|
||||
y2="16.685888" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5996"
|
||||
id="linearGradient6002"
|
||||
x1="30.324829"
|
||||
y1="9.2407961"
|
||||
x2="34"
|
||||
y2="18"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(-2,0)" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5981"
|
||||
id="linearGradient5973"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="22.319767"
|
||||
y1="41.955986"
|
||||
x2="18.985712"
|
||||
y2="37.029255"
|
||||
gradientTransform="translate(-2,0)" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5987"
|
||||
id="linearGradient5993"
|
||||
x1="17.032078"
|
||||
y1="27.446827"
|
||||
x2="29.494455"
|
||||
y2="37.845814"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(-2,0)" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5999"
|
||||
id="linearGradient6005"
|
||||
x1="27.354809"
|
||||
y1="36.218422"
|
||||
x2="23.489431"
|
||||
y2="34.728424"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="translate(-2,0)" />
|
||||
<radialGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient6019"
|
||||
id="radialGradient6025"
|
||||
cx="38"
|
||||
cy="69"
|
||||
fx="28.603323"
|
||||
fy="69"
|
||||
r="20"
|
||||
gradientTransform="matrix(1,0,0,0.45,0,37.95)"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#f6aaaa"
|
||||
borderopacity="1"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1"
|
||||
inkscape:cx="37.14966"
|
||||
inkscape:cy="21.336383"
|
||||
inkscape:document-units="px"
|
||||
inkscape:current-layer="layer1"
|
||||
width="48px"
|
||||
height="48px"
|
||||
inkscape:showpageshadow="false"
|
||||
showgrid="false"
|
||||
gridspacingx="0.5px"
|
||||
gridspacingy="0.5px"
|
||||
gridempspacing="2"
|
||||
inkscape:grid-points="true"
|
||||
inkscape:window-width="862"
|
||||
inkscape:window-height="875"
|
||||
inkscape:window-x="12"
|
||||
inkscape:window-y="50"
|
||||
inkscape:object-paths="true"
|
||||
inkscape:object-nodes="true"
|
||||
objecttolerance="6"
|
||||
gridtolerance="6"
|
||||
guidetolerance="6"
|
||||
showborder="false">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid5333"
|
||||
spacingx="0.5px"
|
||||
spacingy="0.5px"
|
||||
empspacing="2" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata7">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:creator>
|
||||
<cc:Agent>
|
||||
<dc:title>Ulisse Perusin</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:creator>
|
||||
<cc:license
|
||||
rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" />
|
||||
<dc:source>uli.peru@gmail.com</dc:source>
|
||||
<dc:title>edit-clear</dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Livello 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<path
|
||||
sodipodi:type="arc"
|
||||
style="opacity:0.25;fill:url(#radialGradient6025);fill-opacity:1;stroke:none;stroke-width:0.99999994;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:20;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path6017"
|
||||
sodipodi:cx="38"
|
||||
sodipodi:cy="69"
|
||||
sodipodi:rx="20"
|
||||
sodipodi:ry="9"
|
||||
d="M 58,69 A 20,9 0 1 1 18,69 A 20,9 0 1 1 58,69 z"
|
||||
transform="matrix(1,0,0,0.6666668,-13.999999,-5.0000087)" />
|
||||
<path
|
||||
style="fill:url(#linearGradient5964);fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient6002);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 34.59375,2.46875 C 26.390533,2.5744003 25.19695,18.716276 22.84375,21.625 L 26.84375,23.0625 C 29.475623,18.689953 42.599746,4.1545034 35.40625,2.5 C 35.12676,2.4690309 34.85837,2.4653419 34.59375,2.46875 z M 33.5625,4.53125 C 33.756063,4.5125114 33.930486,4.5369694 34.09375,4.625 C 34.746806,4.9771226 34.817405,6.1198771 34.25,7.15625 C 33.682595,8.1926229 32.684304,8.7583725 32.03125,8.40625 C 31.378197,8.0541272 31.307595,6.9113729 31.875,5.875 C 32.300554,5.0977202 32.981812,4.5874659 33.5625,4.53125 z"
|
||||
id="path5371" />
|
||||
<path
|
||||
sodipodi:type="inkscape:offset"
|
||||
inkscape:radius="-1.0049498"
|
||||
inkscape:original="M 36.59375 2.46875 C 28.390533 2.5744003 27.19695 18.716276 24.84375 21.625 L 28.84375 23.0625 C 31.475623 18.689953 44.599746 4.1545034 37.40625 2.5 C 37.12676 2.4690309 36.85837 2.4653419 36.59375 2.46875 z "
|
||||
style="opacity:0.26666667;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient5994);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
id="path5992"
|
||||
d="M 36.59375,3.46875 C 34.872132,3.4909229 33.585825,4.3246243 32.40625,5.75 C 31.226675,7.1753757 30.257916,9.1916354 29.46875,11.34375 C 28.679584,13.495865 28.04471,15.77802 27.46875,17.71875 C 27.068859,19.066206 26.698893,20.125198 26.25,21.0625 L 28.4375,21.84375 C 30.056094,19.348126 33.476298,15.252572 35.96875,11.21875 C 37.294589,9.0729934 38.25245,7.0407089 38.46875,5.65625 C 38.5769,4.9640206 38.513818,4.4833206 38.34375,4.1875 C 38.179059,3.9010309 37.880274,3.6629145 37.21875,3.5 C 37.019255,3.4812162 36.817917,3.4658629 36.59375,3.46875 z"
|
||||
transform="translate(-2,0)" />
|
||||
<path
|
||||
style="fill:url(#linearGradient5972);fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient5993);stroke-width:0.99999994;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:20;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 20.5,16.5 C 21.209506,18.503007 19.958612,20.237831 19.5,21.5 C 14.308433,23.045674 10.713199,31.203726 5.3674175,35.453585 C 6.0352055,36.150983 6.819644,36.897763 7.5,37.5 L 11.5625,33.96875 L 8.494944,38.493399 C 10.704181,40.284382 13,41.5 14.5,42 L 17.25,38.34375 L 15.5,42.5 C 16.951994,43.088882 20.485286,43.982025 22.5,44 L 24.50389,40.597503 L 23.990721,44.0625 C 24.820284,44.220859 26.428886,44.436716 27.5,44.46875 C 30.862186,38.96875 31.5,30 29.5,26 C 29,24 31,21.5 32.5,20.5 C 30,18.5 24.294411,16.196274 20.5,16.5 z"
|
||||
id="path5367"
|
||||
sodipodi:nodetypes="cccccccccccccccc" />
|
||||
<path
|
||||
style="opacity:0.26666667;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 9,38.5 C 13.816495,33.489105 13.465023,31.296074 19.116117,26.972272 C 16.133675,31.800703 15.650278,34.31233 12,40.5 L 9,38.5 z"
|
||||
id="path5975"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="opacity:0.41568627;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 16.149808,42.202452 L 20.495835,32.362305 C 22.160348,29.378578 23.355507,26.392253 25.024808,24.014952 C 23.422854,29.432989 20.134118,36.136745 17.493558,42.639952 L 16.149808,42.202452 z"
|
||||
id="path5979"
|
||||
sodipodi:nodetypes="ccccc" />
|
||||
<path
|
||||
style="opacity:0.47843137;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient5973);stroke-width:0.99999994px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 21.625,17.5 C 21.721738,19.415146 20.702057,21.029645 20.40625,21.84375 C 20.302147,22.128459 20.07092,22.348124 19.78125,22.4375 C 14.327852,24.672144 12.151447,31.011538 6.7866116,35.523667 C 6.988254,35.723521 7.2786424,35.940971 7.4811412,36.131898 L 16.5,28.5 L 9.923385,38.310313 C 11.193418,39.337926 12.645586,40.194857 14.150041,40.799478 L 21.144394,31.5 L 16.869501,41.911612 C 18.46507,42.437269 19.967804,42.738908 21.81451,43 L 26.43324,35.3125 L 25.0625,43.219317 L 26.9375,43.445312 C 28.370713,40.909818 29.069882,37.778782 29.46875,34.65625 C 29.892695,31.337404 29.463786,28.115072 28.625,26.4375 C 28.597837,26.377291 28.576895,26.314465 28.5625,26.25 C 28.215642,24.862569 28.731642,23.504373 29.4375,22.375 C 29.864393,21.691971 30.367872,21.084221 30.902459,20.573223 C 29.730977,19.790532 28.315762,19.113157 26.53125,18.46875 C 24.769173,17.832444 23.033252,17.518725 21.625,17.5 z"
|
||||
id="path6014"
|
||||
sodipodi:nodetypes="csccccccccccccsssscsc" />
|
||||
<path
|
||||
style="opacity:0.24705882;fill:url(#linearGradient6005);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 18.96875,43 C 21.146316,37.248129 25.364666,32.931057 26.985663,27.064588 C 27.037206,30.726661 27.235383,37.268314 25.09375,43.78125 C 24.773984,43.783025 24.919823,43.670441 24.62387,43.662697 L 25.424662,37.93818 L 22.143176,43.492564 C 19.952368,43.33624 20.848565,43.525163 18.96875,43 z"
|
||||
id="path5977"
|
||||
sodipodi:nodetypes="ccccccc" />
|
||||
<path
|
||||
style="opacity:0.48235294;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 21.003067,22.610447 C 19.751072,23.226826 18.940858,24.137725 18.019961,24.974835 C 19.246448,24.266192 20.398947,23.525841 22.019534,22.986097 L 21.003067,22.610447 z"
|
||||
id="path5995"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="opacity:0.48235294;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 23.008698,23.061049 L 21.992233,25.049787 L 24.972946,23.461537 L 23.008698,23.061049 z"
|
||||
id="path5997"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="opacity:0.48235294;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 22.939805,17.961399 L 22.044612,19.668421 L 23.610339,20.170505 L 22.939805,17.961399 z"
|
||||
id="path6007"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="opacity:0.48235294;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 29.923254,19.88537 L 27.463006,21.720817 L 29.028733,22.222901 L 29.923254,19.88537 z"
|
||||
id="path6009"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="opacity:0.48235294;fill:#c4a000;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 26.012925,17.938566 L 24.499014,20.46318 L 26.064741,20.965264 L 26.012925,17.938566 z"
|
||||
id="path6011"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="opacity:0.2;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1"
|
||||
d="M 18.967726,22.024699 C 18.400946,19.059215 28.300561,24.177602 30.004548,25.019068 C 29.998066,26.098136 30.004548,27.019068 29.027729,27.019068 C 26.460191,25.626088 22.492474,23.413925 18.967726,22.024699 z"
|
||||
id="path6013"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
<path
|
||||
style="fill:url(#linearGradient5980);fill-opacity:1;fill-rule:evenodd;stroke:#5c3566;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1"
|
||||
d="M 18.650133,21.5 C 18.161723,20.5 18.650133,19.5 19.626953,19.5 C 23.618393,20.475417 26.951828,21.706232 30.371965,23.5 C 30.860375,24.5 30.371965,25.5 29.395146,25.5 C 25.861203,23.63558 22.528435,22.425187 18.650133,21.5 z"
|
||||
id="path5373"
|
||||
sodipodi:nodetypes="ccccc" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 16 KiB |
@ -39,27 +39,27 @@ All the |app| python code is in the ``calibre`` package. This package contains t
|
||||
|
||||
* devices - All the device drivers. Just look through some of the built-in drivers to get an idea for how they work.
|
||||
|
||||
* For details, see: devices.interface which defines the interface supported by device drivers and devices.usbms which
|
||||
* For details, see: devices.interface which defines the interface supported by device drivers and ``devices.usbms`` which
|
||||
defines a generic driver that connects to a USBMS device. All USBMS based drivers in |app| inherit from it.
|
||||
|
||||
* ebooks - All the ebook conversion/metadata code. A good starting point is ``calibre.ebooks.conversion.cli`` which is the
|
||||
module powering the :command:`ebook-convert` command. The conversion process is controlled via conversion.plumber.
|
||||
The format independent code is all in ebooks.oeb and the format dependent code is in ebooks.format_name.
|
||||
module powering the :command:`ebook-convert` command. The conversion process is controlled via ``conversion.plumber``.
|
||||
The format independent code is all in ``ebooks.oeb`` and the format dependent code is in ``ebooks.format_name``.
|
||||
|
||||
* Metadata reading, writing, and downloading is all in ebooks.metadata
|
||||
* Metadata reading, writing, and downloading is all in ``ebooks.metadata``
|
||||
* Conversion happens in a pipeline, for the structure of the pipeline,
|
||||
see :ref:`conversion-introduction`. The pipeline consists of an input
|
||||
plugin, various transforms and an output plugin. The code constructs
|
||||
and drives the pipeline is in plumber.py. The pipeline works on a
|
||||
plugin, various transforms and an output plugin. The that code constructs
|
||||
and drives the pipeline is in :file:`plumber.py`. The pipeline works on a
|
||||
representation of an ebook that is like an unzipped epub, with
|
||||
manifest, spine, toc, guide, html content, etc. The
|
||||
class that manages this representation is OEBBook in oeb/base.py. The
|
||||
class that manages this representation is OEBBook in ``ebooks.oeb.base``. The
|
||||
various transformations that are applied to the book during
|
||||
conversions live in `oeb/transforms/*.py`. And the input and output
|
||||
plugins live in `conversion/plugins/*.py`.
|
||||
conversions live in :file:`oeb/transforms/*.py`. And the input and output
|
||||
plugins live in :file:`conversion/plugins/*.py`.
|
||||
|
||||
* library - The database back-end and the content server. See library.database2 for the interface to the |app| library. library.server is the |app| Content Server.
|
||||
* gui2 - The Graphical User Interface. GUI initialization happens in gui2.main and gui2.ui. The ebook-viewer is in gui2.viewer.
|
||||
* library - The database back-end and the content server. See ``library.database2`` for the interface to the |app| library. ``library.server`` is the |app| Content Server.
|
||||
* gui2 - The Graphical User Interface. GUI initialization happens in ``gui2.main`` and ``gui2.ui``. The ebook-viewer is in ``gui2.viewer``.
|
||||
|
||||
If you need help understanding the code, post in the `development forum <http://www.mobileread.com/forums/forumdisplay.php?f=240>`_
|
||||
and you will most likely get help from one of |app|'s many developers.
|
||||
@ -74,10 +74,6 @@ After installing Bazaar, you can get the |app| source code with the command::
|
||||
|
||||
On Windows you will need the complete path name, that will be something like :file:`C:\\Program Files\\Bazaar\\bzr.exe`.
|
||||
|
||||
To update a branch to the latest code, use the command::
|
||||
|
||||
bzr merge
|
||||
|
||||
|app| is a very large project with a very long source control history, so the
|
||||
above can take a while (10mins to an hour depending on your internet speed).
|
||||
|
||||
@ -88,6 +84,11 @@ using::
|
||||
|
||||
bzr branch --stacked lp:calibre
|
||||
|
||||
|
||||
To update a branch to the latest code, use the command::
|
||||
|
||||
bzr merge
|
||||
|
||||
Submitting your changes to be included
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
119
manual/faq.rst
@ -158,12 +158,23 @@ My device is not being detected by |app|?
|
||||
|
||||
Follow these steps to find the problem:
|
||||
|
||||
* Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time.
|
||||
* If you are connecting an Apple iDevice (iPad, iPod Touch, iPhone), use the 'Connect to iTunes' method in the 'Getting started' instructions in `Calibre + Apple iDevices: Start here <http://www.mobileread.com/forums/showthread.php?t=118559>`_.
|
||||
* Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website <http://calibre-ebook.com/download>`_.
|
||||
* Ensure your operating system is seeing the device. That is, the device should show up in Windows Explorer (in Windows) or Finder (in OS X).
|
||||
* In |app|, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled.
|
||||
* If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker <http://bugs.calibre-ebook.com>`_.
|
||||
* Make sure that you are connecting only a single device to your computer
|
||||
at a time. Do not have another |app| supported device like an iPhone/iPad
|
||||
etc. at the same time.
|
||||
* If you are connecting an Apple iDevice (iPad, iPod Touch, iPhone), use
|
||||
the 'Connect to iTunes' method in the 'Getting started' instructions in
|
||||
`Calibre + Apple iDevices: Start here <http://www.mobileread.com/forums/showthread.php?t=118559>`_.
|
||||
* Make sure you are running the latest version of |app|. The latest version
|
||||
can always be downloaded from `the calibre website <http://calibre-ebook.com/download>`_.
|
||||
You can tell what version of |app| you are currently running by looking
|
||||
at the bottom line of the main |app| window.
|
||||
* Ensure your operating system is seeing the device. That is, the device
|
||||
should show up in Windows Explorer (in Windows) or Finder (in OS X).
|
||||
* In |app|, go to Preferences->Ignored Devices and check that your device
|
||||
is not being ignored
|
||||
* If all the above steps fail, go to Preferences->Miscellaneous and click
|
||||
debug device detection with your device attached and post the output as a
|
||||
ticket on `the calibre bug tracker <http://bugs.calibre-ebook.com>`_.
|
||||
|
||||
My device is non-standard or unusual. What can I do to connect to it?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -239,42 +250,71 @@ If you don't want to uninstall it altogether, there are a couple of tricks you c
|
||||
simplest is to simply re-name the executable file that launches the library program. More detail
|
||||
`in the forums <http://www.mobileread.com/forums/showthread.php?t=65809>`_.
|
||||
|
||||
How do I use |app| with my iPad/iPhone/iTouch?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
How do I use |app| with my iPad/iPhone/iPod touch?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Over the air
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
The easiest way to browse your |app| collection on your Apple device (iPad/iPhone/iPod) is by using the calibre content server, which makes your collection available over the net. First perform the following steps in |app|
|
||||
The easiest way to browse your |app| collection on your Apple device
|
||||
(iPad/iPhone/iPod) is by using the |app| content server, which makes your
|
||||
collection available over the net. First perform the following steps in |app|
|
||||
|
||||
* Set the Preferred Output Format in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
|
||||
* Set the output profile to iPad (this will work for iPhone/iPods as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
|
||||
* Convert the books you want to read on your iPhone to EPUB format by selecting them and clicking the Convert button.
|
||||
* Turn on the Content Server in |app|'s preferences and leave |app| running.
|
||||
* Set the Preferred Output Format in |app| to EPUB (The output format can be
|
||||
set under :guilabel:`Preferences->Interface->Behavior`)
|
||||
* Set the output profile to iPad (this will work for iPhone/iPods as well),
|
||||
under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
|
||||
* Convert the books you want to read on your iDevice to EPUB format by
|
||||
selecting them and clicking the Convert button.
|
||||
* Turn on the Content Server by clicking the :guilabel:`Connect/Share` button
|
||||
and leave |app| running. You can also tell |app| to automatically start the
|
||||
content server via :guilabel:`Preferences->Sharing over the net`.
|
||||
|
||||
Now on your iPad/iPhone you have two choices, use either iBooks (version 1.2 and later) or Stanza (version 3.0 and later). Both are available free from the app store.
|
||||
There are many apps for your iDevice that can connect to |app|. Here we
|
||||
describe using two of them, iBooks and Stanza.
|
||||
|
||||
Using Stanza
|
||||
***************
|
||||
|
||||
Now you should be able to access your books on your iPhone by opening Stanza. Go to "Get Books" and then click the "Shared" tab. Under Shared you will see an entry "Books in calibre". If you don't, make sure your iPad/iPhone is connected using the WiFi network in your house, not 3G. If the |app| catalog is still not detected in Stanza, you can add it manually in Stanza. To do this, click the "Shared" tab, then click the "Edit" button and then click "Add book source" to add a new book source. In the Add Book Source screen enter whatever name you like and in the URL field, enter the following::
|
||||
You should be able to access your books on your iPhone by opening Stanza. Go to
|
||||
"Get Books" and then click the "Shared" tab. Under Shared you will see an entry
|
||||
"Books in calibre". If you don't, make sure your iPad/iPhone is connected using
|
||||
the WiFi network in your house, not 3G. If the |app| catalog is still not
|
||||
detected in Stanza, you can add it manually in Stanza. To do this, click the
|
||||
"Shared" tab, then click the "Edit" button and then click "Add book source" to
|
||||
add a new book source. In the Add Book Source screen enter whatever name you
|
||||
like and in the URL field, enter the following::
|
||||
|
||||
http://192.168.1.2:8080/
|
||||
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. Now click "Save" and you are done.
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running
|
||||
|app|. If you have changed the port the |app| content server is running on, you
|
||||
will have to change ``8080`` as well to the new port. The local IP address is
|
||||
the IP address you computer is assigned on your home network. A quick Google
|
||||
search will tell you how to find out your local IP address. Now click "Save"
|
||||
and you are done.
|
||||
|
||||
If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
|
||||
If you get timeout errors while browsing the calibre catalog in Stanza, try
|
||||
increasing the connection timeout value in the stanza settings. Go to
|
||||
Info->Settings and increase the value of Download Timeout.
|
||||
|
||||
Using iBooks
|
||||
**************
|
||||
|
||||
Start the Safari browser and type in the IP address and port of the computer running the calibre server, like this::
|
||||
Start the Safari browser and type in the IP address and port of the computer
|
||||
running the calibre server, like this::
|
||||
|
||||
http://192.168.1.2:8080/
|
||||
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running
|
||||
|app|. If you have changed the port the |app| content server is running on, you
|
||||
will have to change ``8080`` as well to the new port. The local IP address is
|
||||
the IP address you computer is assigned on your home network. A quick Google
|
||||
search will tell you how to find out your local IP address.
|
||||
|
||||
You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
|
||||
You will see a list of books in Safari, just click on the epub link for
|
||||
whichever book you want to read, Safari will then prompt you to open it with
|
||||
iBooks.
|
||||
|
||||
|
||||
With the USB cable + iTunes
|
||||
@ -436,10 +476,10 @@ that allows you to create collections on your Kindle from the |app| metadata. It
|
||||
|
||||
.. note:: Amazon have removed the ability to manipulate collections completely in their newer models, like the Kindle Touch and Kindle Fire, making even the above plugin useless. If you really want the ability to manage collections on your Kindle via a USB connection, we encourage you to complain to Amazon about it, or get a reader where this is supported, like the SONY or Kobo Readers.
|
||||
|
||||
I am getting an error when I try to use |app| with my Kobo Touch?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
I am getting an error when I try to use |app| with my Kobo Touch/Glo/etc.?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The Kobo Touch has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users.
|
||||
The Kobo has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users.
|
||||
|
||||
* Connect the Kobo directly to your computer, not via USB Hub
|
||||
* Try a different USB cable and a different USB port on your computer
|
||||
@ -539,9 +579,9 @@ Yes, you can. Follow the instructions in the answer above for adding custom colu
|
||||
|
||||
How do I move my |app| library from one computer to another?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Simply copy the |app| library folder from the old to the new computer. You can find out what the library folder is by clicking the calibre icon in the toolbar. The very first item is the path to the library folder. Now on the new computer, start |app| for the first time. It will run the Welcome Wizard asking you for the location of the |app| library. Point it to the previously copied folder. If the computer you are transferring to already has a calibre installation, then the Welcome wizard wont run. In that case, click the calibre icon in the tooolbar and point it to the newly copied directory. You will now have two calibre libraries on your computer and you can switch between them by clicking the calibre icon on the toolbar.
|
||||
Simply copy the |app| library folder from the old to the new computer. You can find out what the library folder is by clicking the calibre icon in the toolbar. The very first item is the path to the library folder. Now on the new computer, start |app| for the first time. It will run the Welcome Wizard asking you for the location of the |app| library. Point it to the previously copied folder. If the computer you are transferring to already has a calibre installation, then the Welcome wizard wont run. In that case, right-click the |app| icon in the tooolbar and point it to the newly copied directory. You will now have two calibre libraries on your computer and you can switch between them by clicking the |app| icon on the toolbar. Transferring your library in this manner preserver all your metadata, tags, custom columns, etc.
|
||||
|
||||
Note that if you are transferring between different types of computers (for example Windows to OS X) then after doing the above you should also right-click the calibre icon on the tool bar, select Library Maintenance and run the Check Library action. It will warn you about any problems in your library, which you should fix by hand.
|
||||
Note that if you are transferring between different types of computers (for example Windows to OS X) then after doing the above you should also right-click the |app| icon on the tool bar, select Library Maintenance and run the Check Library action. It will warn you about any problems in your library, which you should fix by hand.
|
||||
|
||||
.. note:: A |app| library is just a folder which contains all the book files and their metadata. All the metadata is stored in a single file called metadata.db, in the top level folder. If this file gets corrupted, you may see an empty list of books in |app|. In this case you can ask |app| to restore your books by doing a right-click on the |app| icon in the toolbar and selecting Library Maintenance->Restore Library.
|
||||
|
||||
@ -652,7 +692,7 @@ Post any output you see in a help message on the `Forum <http://www.mobileread.c
|
||||
|app| freezes/crashes occasionally?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are three possible things I know of, that can cause this:
|
||||
There are six possible things I know of, that can cause this:
|
||||
|
||||
* You recently connected an external monitor or TV to your computer. In
|
||||
this case, whenever |app| opens a new window like the edit metadata
|
||||
@ -660,7 +700,7 @@ There are three possible things I know of, that can cause this:
|
||||
you dont notice it and so you think |app| has frozen. Disconnect your
|
||||
second monitor and restart calibre.
|
||||
|
||||
* You are using a Wacom branded mouse. There is an incompatibility between
|
||||
* You are using a Wacom branded USB mouse. There is an incompatibility between
|
||||
Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
|
||||
mouse.
|
||||
|
||||
@ -668,6 +708,29 @@ There are three possible things I know of, that can cause this:
|
||||
the blacklist of programs inside RoboForm to fix this. Or uninstall
|
||||
RoboForm.
|
||||
|
||||
* The Logitech SetPoint Settings application causes random crashes in
|
||||
|app| when it is open. Close it before starting |app|.
|
||||
|
||||
* Constant Guard Protection by Xfinity causes crashes in |app|. You have to
|
||||
manually allow |app| in it or uninstall Constant Guard Protection.
|
||||
|
||||
* On some 64 bit versions of Windows there are security software/settings
|
||||
that prevent 64-bit |app| from working properly. If you are using the 64-bit
|
||||
version of |app| try switching to the 32-bit version.
|
||||
|
||||
If none of the above apply to you, then there is some other program on your
|
||||
computer that is interfering with |app|. First reboot your computer in safe
|
||||
mode, to have as few running programs as possible, and see if the crashes still
|
||||
happen. If they do not, then you know it is some program causing the problem.
|
||||
The most likely such culprit is a program that modifies other programs'
|
||||
behavior, such as an antivirus, a device driver, something like RoboForm (an
|
||||
automatic form filling app) or an assistive technology like Voice Control or a
|
||||
Screen Reader.
|
||||
|
||||
The only way to find the culprit is to eliminate the programs one by one and
|
||||
see which one is causing the issue. Basically, stop a program, run calibre,
|
||||
check for crashes. If they still happen, stop another program and repeat.
|
||||
|
||||
|app| is not starting on OS X?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@ -767,7 +830,7 @@ Why doesn't |app| have an automatic update?
|
||||
For many reasons:
|
||||
|
||||
* *There is no need to update every week*. If you are happy with how |app| works turn off the update notification and be on your merry way. Check back to see if you want to update once a year or so.
|
||||
* Pre downloading the updates for all users in the background would mean require about 80TB of bandwidth *every week*. That costs thousands of dollars a month. And |app| is currently growing at 300,000 new users every month.
|
||||
* Pre downloading the updates for all users in the background would require about 80TB of bandwidth *every week*. That costs thousands of dollars a month. And |app| is currently growing at 300,000 new users every month.
|
||||
* If I implement a dialog that downloads the update and launches it, instead of going to the website as it does now, that would save the most ardent |app| updater, *at most five clicks a week*. There are far higher priority things to do in |app| development.
|
||||
* If you really, really hate downloading |app| every week but still want to be up to the latest, I encourage you to run from source, which makes updating trivial. Instructions are :ref:`available here <develop>`.
|
||||
|
||||
|
@ -537,6 +537,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
|
||||
- Merge selected records, keeping originals
|
||||
* - :kbd:`O`
|
||||
- Open containing folder
|
||||
* - :kbd:`P`
|
||||
- Polish books
|
||||
* - :kbd:`S`
|
||||
- Save to Disk
|
||||
* - :kbd:`V`
|
||||
|
@ -2,41 +2,70 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
|
||||
title = u'Aachener Nachrichten'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 1
|
||||
__author__ = 'schuster' #AGE update 2012-11-28
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.an-online.de/einwaage/images/an_logo.png'
|
||||
masthead_url = 'http://www.an-online.de/einwaage/images/an_logo.png'
|
||||
extra_css = '''
|
||||
.fliesstext_detail:{margin-bottom:10%;}
|
||||
.headline_1:{margin-bottom:25%;}
|
||||
b{font-family:Arial,Helvetica,sans-serif; font-weight:200;font-size:large;}
|
||||
a{font-family:Arial,Helvetica,sans-serif; font-weight:400;font-size:large;}
|
||||
ll{font-family:Arial,Helvetica,sans-serif; font-weight:100;font-size:large;}
|
||||
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||
dd{font-family:Arial,Helvetica,sans-serif;font-size:large;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
language = 'de'
|
||||
|
||||
# cover_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||
masthead_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='span', attrs={'class':['fliesstext_detail', 'headline_1', 'autor_detail']}),
|
||||
dict(id=['header-logo'])
|
||||
]
|
||||
dict(name='article', attrs={'class':['single']})
|
||||
]
|
||||
|
||||
feeds = [(u'Euregio', u'http://www.an-online.de/an/rss/Euregio.xml'),
|
||||
(u'Aachen', u'http://www.an-online.de/an/rss/Aachen.xml'),
|
||||
(u'Nordkreis', u'http://www.an-online.de/an/rss/Nordkreis.xml'),
|
||||
(u'Düren', u'http://www.an-online.de/an/rss/Dueren.xml'),
|
||||
(u'Eiffel', u'http://www.an-online.de/an/rss/Eifel.xml'),
|
||||
(u'Eschweiler', u'http://www.an-online.de/an/rss/Eschweiler.xml'),
|
||||
(u'Geilenkirchen', u'http://www.an-online.de/an/rss/Geilenkirchen.xml'),
|
||||
(u'Heinsberg', u'http://www.an-online.de/an/rss/Heinsberg.xml'),
|
||||
(u'Jülich', u'http://www.an-online.de/an/rss/Juelich.xml'),
|
||||
(u'Stolberg', u'http://www.an-online.de/an/rss/Stolberg.xml'),
|
||||
(u'Ratgebenr', u'http://www.an-online.de/an/rss/Ratgeber.xml')]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':["clearfix navi-wrapper"]}),
|
||||
dict(name='div', attrs={'id':["article_actions"]}),
|
||||
dict(name='style', attrs={'type':["text/css"]}),
|
||||
dict(name='aside'),
|
||||
dict(name='a', attrs={'class':["btn btn-action"]})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Lokales - Euregio', u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
|
||||
(u'Lokales - Aachen', u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
|
||||
(u'Lokales - Nordkreis', u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
|
||||
(u'Lokales - Düren', u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
|
||||
(u'Lokales - Eiffel', u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
|
||||
(u'Lokales - Eschweiler', u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
|
||||
(u'Lokales - Geilenkirchen', u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
|
||||
(u'Lokales - Heinsberg', u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
|
||||
(u'Lokales - Jülich', u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
|
||||
(u'Lokales - Stolberg', u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
|
||||
(u'News - Politik', u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
|
||||
(u'News - Aus aller Welt', u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
|
||||
(u'News - Wirtschaft', u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
|
||||
(u'News - Kultur', u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
|
||||
(u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'),
|
||||
(u'News - Digital', u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
|
||||
(u'News - Wissenschaft', u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
|
||||
(u'News - Hochschule', u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
|
||||
(u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'),
|
||||
(u'News - Kurioses', u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
|
||||
(u'News - Musik', u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
|
||||
(u'News - Tagesthema', u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
|
||||
(u'News - Newsticker', u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
|
||||
(u'Sport - Aktuell', u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
|
||||
(u'Sport - Fußball', u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
|
||||
(u'Sport - Bundesliga', u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
|
||||
(u'Sport - Alemannia Aachen', u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
|
||||
(u'Sport - Volleyball', u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
|
||||
(u'Sport - Chio', u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
|
||||
(u'Dossier - Kinderuni', u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
|
||||
(u'Dossier - Karlspreis', u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
|
||||
(u'Dossier - Ritterorden', u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
|
||||
(u'Dossier - ZAB-Aachen', u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
|
||||
(u'Dossier - Karneval', u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
|
||||
(u'Ratgeber - Geld', u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
|
||||
(u'Ratgeber - Recht', u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
|
||||
(u'Ratgeber - Gesundheit', u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
|
||||
(u'Ratgeber - Familie', u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
|
||||
(u'Ratgeber - Livestyle', u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
|
||||
(u'Ratgeber - Reisen', u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
|
||||
(u'Ratgeber - Bauen und Wohnen', u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
|
||||
(u'Ratgeber - Bildung und Beruf', u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
|
||||
]
|
||||
|
@ -9,18 +9,19 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
|
||||
index='http://www.adventure-zone.info/fusion/'
|
||||
use_embedded_content=False
|
||||
use_embedded_content = False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<tbody\>'), lambda match: '')]
|
||||
(re.compile(r'</?table.*?>'), lambda match: ''),
|
||||
(re.compile(r'</?tbody.*?>'), lambda match: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||
remove_tags_after= dict(id='comments')
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
|
||||
|
||||
def parse_feeds (self):
|
||||
'''def parse_feeds (self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
|
||||
tag=soup.find(name='channel')
|
||||
@ -33,24 +34,33 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
article.title=titles[feed.articles.index(article)]
|
||||
return feeds
|
||||
return feeds'''
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
'''def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
||||
cover=soup.find(id='box_OstatninumerAZ')
|
||||
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
return getattr(self, 'cover_url', self.cover_url)'''
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
result = re.search('(.+) - Adventure Zone', soup.title.string)
|
||||
if result:
|
||||
article.title = result.group(1)
|
||||
else:
|
||||
result = soup.body.find('strong')
|
||||
if result:
|
||||
article.title = result.string
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
|
||||
skip_tag = skip_tag.findAll(name='a')
|
||||
for r in skip_tag:
|
||||
if r.strong:
|
||||
word=r.strong.string.lower()
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
||||
title = soup.title.string.lower()
|
||||
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
|
||||
for r in skip_tag:
|
||||
if r.strong and r.strong.string:
|
||||
word=r.strong.string.lower()
|
||||
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
footer=soup.find(attrs={'class':'news-footer middle-border'})
|
||||
|
@ -20,6 +20,7 @@ class Aksiyon (BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
ignore_duplicate_articles = { 'title', 'url' }
|
||||
remove_empty_feeds= True
|
||||
feeds = [
|
||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
|
@ -10,14 +10,12 @@ class Alternet(BasicNewsRecipe):
|
||||
category = 'News, Magazine'
|
||||
description = 'News magazine and online community'
|
||||
feeds = [
|
||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
|
||||
(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
|
||||
(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
|
||||
(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
|
||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet')
|
||||
]
|
||||
|
||||
remove_attributes = ['width', 'align','cellspacing']
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
use_embedded_content = True
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'UTF-8'
|
||||
|
@ -42,7 +42,7 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
remove_attributes = ['align']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
|
@ -5,14 +5,16 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
__author__ = 'rty'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
|
||||
(u'Business', u'http://www.adn.com/money/index.xml'),
|
||||
(u'Sports', u'http://www.adn.com/sports/index.xml'),
|
||||
(u'Politics', u'http://www.adn.com/politics/index.xml'),
|
||||
(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
|
||||
(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
|
||||
]
|
||||
(u'Business', u'http://www.adn.com/money/index.xml'),
|
||||
(u'Sports', u'http://www.adn.com/sports/index.xml'),
|
||||
(u'Politics', u'http://www.adn.com/politics/index.xml'),
|
||||
(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
|
||||
(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
|
||||
]
|
||||
description = ''''Alaska's Newspaper'''
|
||||
publisher = 'http://www.adn.com'
|
||||
category = 'news, Alaska, Anchorage'
|
||||
@ -28,13 +30,13 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'left_col story_mainbar'}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'story_tools'}),
|
||||
dict(name='p', attrs={'class':'ad_label'}),
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'advertisement'}),
|
||||
]
|
||||
#keep_only_tags = [
|
||||
#dict(name='div', attrs={'class':'left_col story_mainbar'}),
|
||||
#]
|
||||
#remove_tags = [
|
||||
#dict(name='div', attrs={'class':'story_tools'}),
|
||||
#dict(name='p', attrs={'class':'ad_label'}),
|
||||
#]
|
||||
#remove_tags_after = [
|
||||
#dict(name='div', attrs={'class':'advertisement'}),
|
||||
#]
|
||||
|
@ -3,11 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Android_com_pl(BasicNewsRecipe):
|
||||
title = u'Android.com.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Android.com.pl - biggest polish Android site'
|
||||
description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.'
|
||||
category = 'Android, mobile'
|
||||
language = 'pl'
|
||||
use_embedded_content=True
|
||||
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
||||
cover_url =u'http://android.com.pl/wp-content/themes/android/images/logo.png'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
feeds = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
|
||||
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
||||
|
@ -37,7 +37,7 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
|
||||
#def get_browser(self):
|
||||
#br = BasicNewsRecipe.get_browser()
|
||||
#br = BasicNewsRecipe.get_browser(self)
|
||||
#if self.username is not None and self.password is not None:
|
||||
# br.open('http://www.nytimes.com/auth/login')
|
||||
# br.select_form(name='login')
|
||||
|
@ -22,7 +22,7 @@ class Archeowiesci(BasicNewsRecipe):
|
||||
return feeds
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://archeowiesci.pl/wp-login.php')
|
||||
br.select_form(name='loginform')
|
||||
|
20
recipes/asco_de_vida.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Asco de vida'
|
||||
language = 'es'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
#encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = dict(name='div', attrs={'class':'box story'})
|
||||
|
||||
|
||||
feeds = [
|
||||
('News',
|
||||
'http://feeds2.feedburner.com/AscoDeVida'),
|
||||
]
|
||||
|
19
recipes/astroflesz.recipe
Normal file
@ -0,0 +1,19 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Astroflesz(BasicNewsRecipe):
|
||||
title = u'Astroflesz'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
|
||||
category = 'astronomy'
|
||||
language = 'pl'
|
||||
cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(id="k2Container")]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
|
||||
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
|
||||
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
|
@ -31,7 +31,7 @@ class Azstarnet(BasicNewsRecipe):
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://azstarnet.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'm':'login'
|
||||
|
17
recipes/badania_net.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class BadaniaNet(BasicNewsRecipe):
|
||||
title = u'badania.net'
|
||||
__author__ = 'fenuks'
|
||||
description = u'chcesz wiedzieć więcej?'
|
||||
category = 'science'
|
||||
language = 'pl'
|
||||
cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
|
||||
remove_tags_after = dict(attrs={'class':'omc-single-tags'})
|
||||
keep_only_tags = [dict(id='omc-full-article')]
|
||||
feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
|
@ -19,6 +19,7 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
#auto_cleanup = True
|
||||
recursions = 1
|
||||
|
||||
ignore_duplicate_articles = {'title'}
|
||||
@ -78,6 +79,7 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
#(u'High School', u'http://www.baltimoresun.com/sports/high-school/rss2.0.xml'),
|
||||
#(u'Outdoors', u'http://www.baltimoresun.com/sports/outdoors/rss2.0.xml'),
|
||||
|
||||
|
||||
## Entertainment ##
|
||||
(u'Celebrity News', u'http://www.baltimoresun.com/entertainment/celebrities/rss2.0.xml'),
|
||||
(u'Arts & Theater', u'http://www.baltimoresun.com/entertainment/arts/rss2.0.xml'),
|
||||
@ -142,12 +144,12 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
(u'Read Street', u'http://www.baltimoresun.com/features/books/read-street/rss2.0.xml'),
|
||||
(u'Z on TV', u'http://www.baltimoresun.com/entertainment/tv/z-on-tv-blog/rss2.0.xml'),
|
||||
|
||||
## Life Blogs ##
|
||||
(u'BMore Green', u'http://weblogs.baltimoresun.com/features/green/index.xml'),
|
||||
(u'Baltimore Insider',u'http://www.baltimoresun.com/features/baltimore-insider-blog/rss2.0.xml'),
|
||||
(u'Homefront', u'http://www.baltimoresun.com/features/parenting/homefront/rss2.0.xml'),
|
||||
(u'Picture of Health', u'http://www.baltimoresun.com/health/blog/rss2.0.xml'),
|
||||
(u'Unleashed', u'http://weblogs.baltimoresun.com/features/mutts/blog/index.xml'),
|
||||
### Life Blogs ##
|
||||
#(u'BMore Green', u'http://weblogs.baltimoresun.com/features/green/index.xml'),
|
||||
#(u'Baltimore Insider',u'http://www.baltimoresun.com/features/baltimore-insider-blog/rss2.0.xml'),
|
||||
#(u'Homefront', u'http://www.baltimoresun.com/features/parenting/homefront/rss2.0.xml'),
|
||||
#(u'Picture of Health', u'http://www.baltimoresun.com/health/blog/rss2.0.xml'),
|
||||
#(u'Unleashed', u'http://weblogs.baltimoresun.com/features/mutts/blog/index.xml'),
|
||||
|
||||
## b the site blogs ##
|
||||
(u'Game Cache', u'http://www.baltimoresun.com/entertainment/bthesite/game-cache/rss2.0.xml'),
|
||||
@ -167,6 +169,7 @@ class BaltimoreSun(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
ans = None
|
||||
try:
|
||||
|
@ -28,6 +28,8 @@ class Barrons(BasicNewsRecipe):
|
||||
|
||||
## Don't grab articles more than 7 days old
|
||||
oldest_article = 7
|
||||
use_javascript_to_login = True
|
||||
requires_version = (0, 9, 16)
|
||||
|
||||
extra_css = '''
|
||||
.datestamp{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;}
|
||||
@ -40,7 +42,7 @@ class Barrons(BasicNewsRecipe):
|
||||
.insettipUnit{font-size: x-small;}
|
||||
'''
|
||||
remove_tags = [
|
||||
dict(name ='div', attrs={'class':['tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}),
|
||||
dict(name ='div', attrs={'class':['sTools sTools-t', 'tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}),
|
||||
dict(name = 'a', attrs ={'class':'insetClose'})
|
||||
]
|
||||
|
||||
@ -60,21 +62,17 @@ class Barrons(BasicNewsRecipe):
|
||||
]
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://commerce.barrons.com/auth/login')
|
||||
br.select_form(name='login_form')
|
||||
br['user'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
def javascript_login(self, br, username, password):
|
||||
br.visit('http://commerce.barrons.com/auth/login')
|
||||
f = br.select_form(nr=0)
|
||||
f['username'] = username
|
||||
f['password'] = password
|
||||
br.submit(timeout=120)
|
||||
|
||||
## Use the print version of a page when available.
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.rpartition('?')
|
||||
return main + '#printmode'
|
||||
return main + '#text.print'
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
|
||||
|
@ -35,8 +35,8 @@ class Bash_org_pl(BasicNewsRecipe):
|
||||
soup=self.index_to_soup(u'http://bash.org.pl/random/')
|
||||
#date=soup.find('div', attrs={'class':'right'}).string
|
||||
url=soup.find('a', attrs={'class':'qid click'})
|
||||
title=url.string
|
||||
url='http://bash.org.pl' +url['href']
|
||||
title=''
|
||||
url='http://bash.org.pl/random/'
|
||||
articles.append({'title' : title,
|
||||
'url' : url,
|
||||
'date' : '',
|
||||
@ -44,6 +44,8 @@ class Bash_org_pl(BasicNewsRecipe):
|
||||
})
|
||||
return articles
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find(attrs={'class':'qid click'}).string
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
|
@ -25,7 +25,7 @@ class BigOven(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.bigoven.com/account/login?ReturnUrl=/')
|
||||
br.select_form(nr=1)
|
||||
|
@ -1,9 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
import mechanize
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Birmingham post'
|
||||
description = 'Author D.Asbury. News for Birmingham UK'
|
||||
#timefmt = ''
|
||||
# last update 8/9/12
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
oldest_article = 2
|
||||
@ -12,27 +14,36 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
#auto_cleanup = True
|
||||
auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
|
||||
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
|
||||
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.birminghampost.net')
|
||||
# look for the block containing the sun button and url
|
||||
cov = soup.find(attrs={'height' : re.compile('3'), 'alt' : re.compile('Post')})
|
||||
print
|
||||
print '%%%%%%%%%%%%%%%',cov
|
||||
print
|
||||
cov2 = str(cov['src'])
|
||||
# cov2=cov2[7:]
|
||||
print '88888888 ',cov2,' 888888888888'
|
||||
|
||||
#cover_url=cov2
|
||||
#return cover_url
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(cov2)
|
||||
cover_url = cov2
|
||||
except:
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
return cover_url
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id' : 'article-header'}),
|
||||
#dict(name='h1',attrs={'id' : 'article-header'}),
|
||||
dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
|
||||
dict(name='div',attrs={'class' : 'article-image full'}),
|
||||
dict(attrs={'clas' : 'art-o art-align-center otm-1 '}),
|
||||
dict(name='div',attrs={'class' : 'article main'}),
|
||||
#dict(name='p')
|
||||
#dict(attrs={'id' : 'three-col'})
|
||||
]
|
||||
remove_tags = [
|
||||
# dict(name='div',attrs={'class' : 'span-33 last header-links'})
|
||||
|
||||
]
|
||||
feeds = [
|
||||
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
|
||||
(u'West Mids. News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
|
||||
@ -41,9 +52,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;text-align:center;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
@ -1,33 +1,36 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Börse-online'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 1
|
||||
title = u'Börse-online'
|
||||
__author__ = 'schuster, Armin Geller'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.dpv.de/images/1995/source.gif'
|
||||
masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
remove_tags_bevor = [dict(name='h3')]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
|
||||
remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
|
||||
dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
|
||||
dict(name=['h2', 'Gesamtranking', 'h3',''])]
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
encoding = 'iso-8859-1'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
|
||||
|
||||
cover_url = 'http://www.wirtschaftsmedien-shop.de/s/media/coverimages/7576_2013107.jpg'
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/de/5/56/B%C3%B6rse_Online_Logo.svg'
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['artikelfuss', 'rahmen600']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['breadcrumb', 'rightCol', 'clearall']}),
|
||||
dict(name='div', attrs={'class':['footer', 'artikelfuss']}),
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['contentWrapper']})
|
||||
]
|
||||
|
||||
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.html#nv=rss', '.html?mode=print')
|
||||
|
||||
|
||||
|
||||
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]
|
||||
|
||||
|
@ -40,7 +40,7 @@ class Brecha(BasicNewsRecipe):
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'task':'login'
|
||||
|
@ -11,16 +11,15 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article_body_container'}),
|
||||
]
|
||||
remove_tags = [dict(name='ui'),dict(name='li')]
|
||||
dict(name='div', attrs={'id':'article_body_container'}),
|
||||
]
|
||||
remove_tags = [dict(name='ui'),dict(name='li'),dict(name='div', attrs={'id':['share-email']})]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
#Go to the issue
|
||||
soup = self.index_to_soup('http://www.businessweek.com/magazine/news/articles/business_news.htm')
|
||||
|
||||
@ -39,7 +38,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print'}).a['href']
|
||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
|
||||
|
||||
|
||||
@ -47,7 +46,6 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
|
||||
div1 = soup.find ('div', attrs={'class':'column center'})
|
||||
section_title = ''
|
||||
for div in div1.findAll('h5'):
|
||||
@ -57,7 +55,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print'}).a['href']
|
||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
|
||||
|
||||
if articles:
|
||||
|
@ -17,7 +17,7 @@ class AdvancedUserRecipe1286242553(BasicNewsRecipe):
|
||||
cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://cacm.acm.org/login')
|
||||
br.select_form(nr=1)
|
||||
|
@ -34,7 +34,7 @@ class Caijing(BasicNewsRecipe):
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://service.caijing.com.cn/usermanage/login')
|
||||
br.select_form(name='mainLoginForm')
|
||||
|
@ -132,14 +132,14 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
@ -12,10 +12,10 @@ class Chronicle(BasicNewsRecipe):
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article'}),
|
||||
dict(name='div', attrs={'class':['article','blog-mod']}),
|
||||
]
|
||||
remove_tags = [dict(name='div',attrs={'class':['related module1','maintitle']}),
|
||||
dict(name='div', attrs={'id':['section-nav','icon-row', 'enlarge-popup']}),
|
||||
remove_tags = [dict(name='div',attrs={'class':['related module1','maintitle','entry-utility','object-meta']}),
|
||||
dict(name='div', attrs={'id':['section-nav','icon-row', 'enlarge-popup','confirm-popup']}),
|
||||
dict(name='a', attrs={'class':'show-enlarge enlarge'})]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
@ -23,7 +23,7 @@ class Chronicle(BasicNewsRecipe):
|
||||
|
||||
needs_subscription = True
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://chronicle.com/myaccount/login')
|
||||
br.select_form(nr=1)
|
||||
|
@ -73,7 +73,7 @@ class CNN(BasicNewsRecipe):
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
|
0
recipes/conowego_pl.recipe
Executable file → Normal file
35
recipes/contemporary_argentine_writers.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
contemporaryargentinewriters.wordpress.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class contemporaryargentinewriters(BasicNewsRecipe):
|
||||
title = 'Contemporary Argentine Writers'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Short stories by Argentine writers (and others) translated into English'
|
||||
publisher = 'Dario Bard'
|
||||
category = 'fiction, literature, Argentina, english'
|
||||
oldest_article = 25
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = True
|
||||
language = 'en_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'blog'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Posts', u'http://contemporaryargentinewriters.wordpress.com/feed/')]
|
@ -62,7 +62,7 @@ class ilCorriere(BasicNewsRecipe):
|
||||
day = "%.2d" % st.tm_mday
|
||||
#http://images.corriere.it/primapagina/storico/2010_05_17/images/prima_pagina_grande.png
|
||||
cover='http://images.corriere.it/primapagina/storico/'+ year + '_' + month +'_' + day +'/images/prima_pagina_grande.png'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
@ -7,25 +7,30 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||
__author__ = 'Dave Asbury'
|
||||
description = 'The official website of Countryfile Magazine'
|
||||
# last updated 7/10/12
|
||||
# last updated 8/12/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
#articles_are_obfuscated = True
|
||||
ignore_duplicate_articles = {'title'}
|
||||
#article_already_exists = False
|
||||
#feed_hash = ''
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.countryfile.com/')
|
||||
soup = self.index_to_soup('http://www.countryfile.com/magazine')
|
||||
cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px_wide')})#'width' : '160',
|
||||
print '&&&&&&&& ',cov,' ***'
|
||||
cov=str(cov)
|
||||
#cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||
cov2 = re.findall('/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
||||
|
||||
cov2 = str(cov2)
|
||||
cov2= "http://www.countryfile.com"+cov2[2:len(cov2)-8]
|
||||
|
||||
cov = soup.find(attrs={'width' : '160', 'class' : re.compile('imagecache imagecache-160px_wide')})
|
||||
print '******** ',cov,' ***'
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[10:101]
|
||||
print '******** ',cov2,' ***'
|
||||
#cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg'
|
||||
# try to get cover - if can't get known cover
|
||||
# try to get cover - if can't get known cover
|
||||
br = browser()
|
||||
|
||||
br.set_handle_redirect(False)
|
||||
@ -45,5 +50,3 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
(u'Countryside', u'http://www.countryfile.com/rss/countryside'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
20
recipes/czas_gentlemanow.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CzasGentlemanow(BasicNewsRecipe):
|
||||
title = u'Czas Gentlemanów'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Historia mężczyzn z dala od wielkiej polityki'
|
||||
category = 'blog'
|
||||
language = 'pl'
|
||||
cover_url = 'http://czasgentlemanow.pl/wp-content/uploads/2012/10/logo-Czas-Gentlemanow1.jpg'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
|
||||
remove_tags = [dict(attrs={'class':'meta_comments'})]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'fblikebutton_button'})
|
||||
feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
|
@ -40,7 +40,7 @@ class BHDani(BasicNewsRecipe):
|
||||
remove_attributes = ['height','width','align']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.INDEX)
|
||||
br.select_form(name='form')
|
||||
|
@ -42,7 +42,7 @@ class DerSpiegel(BasicNewsRecipe):
|
||||
else:
|
||||
return True
|
||||
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.PREFIX + '/meinspiegel/login.html')
|
||||
br.select_form(predicate=has_login_name)
|
||||
|
@ -33,6 +33,21 @@ class DiscoverMagazine(BasicNewsRecipe):
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'listingBar'})]
|
||||
|
||||
# Login stuff
|
||||
needs_subscription = True
|
||||
use_javascript_to_login = True
|
||||
requires_version = (0, 9, 20)
|
||||
|
||||
def javascript_login(self, br, username, password):
|
||||
br.visit('http://discovermagazine.com', timeout=120)
|
||||
f = br.select_form('div.login.section div.form')
|
||||
f['username'] = username
|
||||
f['password'] = password
|
||||
br.submit('input[id="signInButton"]', timeout=120)
|
||||
br.run_for_a_time(20)
|
||||
# End login stuff
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('span',attrs={'class':'next'})
|
||||
if pager:
|
||||
|
46
recipes/dobanevinosti.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
dobanevinosti.blogspot.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DobaNevinosti(BasicNewsRecipe):
|
||||
title = 'Doba Nevinosti'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Filmski blog'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'sr'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
auto_cleanup = True
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif}
|
||||
img{margin-bottom: 0.8em; display:block;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'film, blog, srbija, tv'
|
||||
, 'publisher': 'Dimitrije Vojinov'
|
||||
, 'language' : language
|
||||
}
|
||||
remove_attributes = ['lang', 'border']
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [(u'Tekstovi', u'http://dobanevinosti.blogspot.com/feeds/posts/default')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -7,18 +7,64 @@ class Dzieje(BasicNewsRecipe):
|
||||
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
index='http://dzieje.pl'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
index = 'http://dzieje.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets= True
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
|
||||
remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
|
||||
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
||||
#feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = appendtag.find('li', attrs={'class':'pager-next'})
|
||||
if tag:
|
||||
while tag:
|
||||
url = tag.a['href']
|
||||
if not url.startswith('http'):
|
||||
url = 'http://dzieje.pl'+tag.a['href']
|
||||
soup2 = self.index_to_soup(url)
|
||||
pagetext = soup2.find(id='content-area').find(attrs={'class':'content'})
|
||||
for r in pagetext.findAll(attrs={'class':['fieldgroup group-groupkul', 'fieldgroup group-zdjeciekult', 'fieldgroup group-zdjecieciekaw', 'fieldgroup group-zdjecieksiazka', 'fieldgroup group-zdjeciedu', 'field field-type-filefield field-field-zdjecieglownawyd']}):
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag = soup2.find('li', attrs={'class':'pager-next'})
|
||||
for r in appendtag.findAll(attrs={'class':['item-list', 'field field-type-computed field-field-tagi', ]}):
|
||||
r.extract()
|
||||
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
soup=self.index_to_soup(url)
|
||||
tag=soup.find(id='content-area').div.div
|
||||
for i in tag.findAll('div', recursive=False):
|
||||
temp = i.find(attrs={'class':'views-field-title'}).span.a
|
||||
title = temp.string
|
||||
url = self.index + temp['href']
|
||||
date = '' #i.find(attrs={'class':'views-field-created'}).span.string
|
||||
articles.append({'title' : title,
|
||||
'url' : url,
|
||||
'date' : date,
|
||||
'description' : ''
|
||||
})
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Wiadomości", self.find_articles('http://dzieje.pl/wiadomosci')))
|
||||
feeds.append((u"Kultura i sztuka", self.find_articles('http://dzieje.pl/kulturaisztuka')))
|
||||
feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino')))
|
||||
feeds.append((u"Rozmaitości historyczne", self.find_articles('http://dzieje.pl/rozmaitości')))
|
||||
feeds.append((u"Książka", self.find_articles('http://dzieje.pl/ksiazka')))
|
||||
feeds.append((u"Wystawa", self.find_articles('http://dzieje.pl/wystawa')))
|
||||
feeds.append((u"Edukacja", self.find_articles('http://dzieje.pl/edukacja')))
|
||||
feeds.append((u"Dzieje się", self.find_articles('http://dzieje.pl/wydarzenia')))
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
@ -116,7 +116,7 @@ class DziennikPolski24(BasicNewsRecipe):
|
||||
loop=True
|
||||
|
||||
def get_browser(self):
|
||||
br=BasicNewsRecipe.get_browser()
|
||||
br=BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.dziennikpolski24.pl/pl/moje-konto/950606-loguj.html')
|
||||
br.select_form(nr = 1)
|
||||
|
@ -41,10 +41,11 @@ class Economist(BasicNewsRecipe):
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header', 'related-items']}),
|
||||
'share_inline_header', 'related-items',
|
||||
'main-content-container']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
keep_only_tags = [dict(name='article')]
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||
lambda x:'</html>')]
|
||||
@ -56,7 +57,7 @@ class Economist(BasicNewsRecipe):
|
||||
needs_subscription = False
|
||||
'''
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username and self.password:
|
||||
br.open('http://www.economist.com/user/login')
|
||||
br.select_form(nr=1)
|
||||
@ -70,18 +71,6 @@ class Economist(BasicNewsRecipe):
|
||||
return br
|
||||
'''
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.economist.com/printedition/covers')
|
||||
div = soup.find('div', attrs={'class':lambda x: x and
|
||||
'print-cover-links' in x})
|
||||
a = div.find('a', href=True)
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.economist.com' + url
|
||||
soup = self.index_to_soup(url)
|
||||
div = soup.find('div', attrs={'class':'cover-content'})
|
||||
img = div.find('img', src=True)
|
||||
return img.get('src')
|
||||
|
||||
def parse_index(self):
|
||||
return self.economist_parse_index()
|
||||
@ -92,7 +81,7 @@ class Economist(BasicNewsRecipe):
|
||||
if div is not None:
|
||||
img = div.find('img', src=True)
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
self.cover_url = re.sub('thumbnail','full',img['src'])
|
||||
feeds = OrderedDict()
|
||||
for section in soup.findAll(attrs={'class':lambda x: x and 'section' in
|
||||
x}):
|
||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
from collections import OrderedDict
|
||||
|
||||
import time, re
|
||||
import re
|
||||
|
||||
class Economist(BasicNewsRecipe):
|
||||
|
||||
@ -37,16 +37,15 @@ class Economist(BasicNewsRecipe):
|
||||
padding: 7px 0px 9px;
|
||||
}
|
||||
'''
|
||||
|
||||
oldest_article = 7.0
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header', 'related-items']}),
|
||||
'share_inline_header', 'related-items',
|
||||
'main-content-container']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
needs_subscription = False
|
||||
keep_only_tags = [dict(name='article')]
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||
lambda x:'</html>')]
|
||||
@ -55,28 +54,26 @@ class Economist(BasicNewsRecipe):
|
||||
# downloaded with connection reset by peer (104) errors.
|
||||
delay = 1
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.economist.com/printedition/covers')
|
||||
div = soup.find('div', attrs={'class':lambda x: x and
|
||||
'print-cover-links' in x})
|
||||
a = div.find('a', href=True)
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.economist.com' + url
|
||||
soup = self.index_to_soup(url)
|
||||
div = soup.find('div', attrs={'class':'cover-content'})
|
||||
img = div.find('img', src=True)
|
||||
return img.get('src')
|
||||
needs_subscription = False
|
||||
'''
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username and self.password:
|
||||
br.open('http://www.economist.com/user/login')
|
||||
br.select_form(nr=1)
|
||||
br['name'] = self.username
|
||||
br['pass'] = self.password
|
||||
res = br.submit()
|
||||
raw = res.read()
|
||||
if '>Log out<' not in raw:
|
||||
raise ValueError('Failed to login to economist.com. '
|
||||
'Check your username and password.')
|
||||
return br
|
||||
'''
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
try:
|
||||
return self.economist_parse_index()
|
||||
except:
|
||||
raise
|
||||
self.log.warn(
|
||||
'Initial attempt to parse index failed, retrying in 30 seconds')
|
||||
time.sleep(30)
|
||||
return self.economist_parse_index()
|
||||
return self.economist_parse_index()
|
||||
|
||||
def economist_parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
@ -84,7 +81,7 @@ class Economist(BasicNewsRecipe):
|
||||
if div is not None:
|
||||
img = div.find('img', src=True)
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
self.cover_url = re.sub('thumbnail','full',img['src'])
|
||||
feeds = OrderedDict()
|
||||
for section in soup.findAll(attrs={'class':lambda x: x and 'section' in
|
||||
x}):
|
||||
@ -151,154 +148,3 @@ class Economist(BasicNewsRecipe):
|
||||
div.insert(2, img)
|
||||
table.replaceWith(div)
|
||||
return soup
|
||||
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.threadpool import ThreadPool, makeRequests
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
import time, string, re
|
||||
from datetime import datetime
|
||||
from lxml import html
|
||||
|
||||
class Economist(BasicNewsRecipe):
|
||||
|
||||
title = 'The Economist (RSS)'
|
||||
language = 'en'
|
||||
|
||||
__author__ = "Kovid Goyal"
|
||||
description = ('Global news and current affairs from a European'
|
||||
' perspective. Best downloaded on Friday mornings (GMT).'
|
||||
' Much slower than the print edition based version.')
|
||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||
oldest_article = 7.0
|
||||
cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
|
||||
#cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header', 'related-items']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||
lambda x:'</html>')]
|
||||
|
||||
def parse_index(self):
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
if self.test:
|
||||
self.oldest_article = 14.0
|
||||
raw = self.index_to_soup(
|
||||
'http://feeds.feedburner.com/economist/full_print_edition',
|
||||
raw=True)
|
||||
entries = parse(raw).entries
|
||||
pool = ThreadPool(10)
|
||||
self.feed_dict = {}
|
||||
requests = []
|
||||
for i, item in enumerate(entries):
|
||||
title = item.get('title', _('Untitled article'))
|
||||
published = item.date_parsed
|
||||
if not published:
|
||||
published = time.gmtime()
|
||||
utctime = datetime(*published[:6])
|
||||
delta = datetime.utcnow() - utctime
|
||||
if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article:
|
||||
self.log.debug('Skipping article %s as it is too old.'%title)
|
||||
continue
|
||||
link = item.get('link', None)
|
||||
description = item.get('description', '')
|
||||
author = item.get('author', '')
|
||||
|
||||
requests.append([i, link, title, description, author, published])
|
||||
if self.test:
|
||||
requests = requests[:4]
|
||||
requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
|
||||
self.eco_article_failed)
|
||||
for r in requests: pool.putRequest(r)
|
||||
pool.wait()
|
||||
|
||||
return self.eco_sort_sections([(t, a) for t, a in
|
||||
self.feed_dict.items()])
|
||||
|
||||
def eco_sort_sections(self, feeds):
|
||||
if not feeds:
|
||||
raise ValueError('No new articles found')
|
||||
order = {
|
||||
'The World This Week': 1,
|
||||
'Leaders': 2,
|
||||
'Letters': 3,
|
||||
'Briefing': 4,
|
||||
'Business': 5,
|
||||
'Finance And Economics': 6,
|
||||
'Science & Technology': 7,
|
||||
'Books & Arts': 8,
|
||||
'International': 9,
|
||||
'United States': 10,
|
||||
'Asia': 11,
|
||||
'Europe': 12,
|
||||
'The Americas': 13,
|
||||
'Middle East & Africa': 14,
|
||||
'Britain': 15,
|
||||
'Obituary': 16,
|
||||
}
|
||||
return sorted(feeds, cmp=lambda x,y:cmp(order.get(x[0], 100),
|
||||
order.get(y[0], 100)))
|
||||
|
||||
def process_eco_feed_article(self, args):
|
||||
from calibre import browser
|
||||
i, url, title, description, author, published = args
|
||||
br = browser()
|
||||
ret = br.open(url)
|
||||
raw = ret.read()
|
||||
url = br.geturl().split('?')[0]+'/print'
|
||||
root = html.fromstring(raw)
|
||||
matches = root.xpath('//*[@class = "ec-article-info"]')
|
||||
feedtitle = 'Miscellaneous'
|
||||
if matches:
|
||||
feedtitle = string.capwords(html.tostring(matches[-1], method='text',
|
||||
encoding=unicode).split('|')[-1].strip())
|
||||
return (i, feedtitle, url, title, description, author, published)
|
||||
|
||||
def eco_article_found(self, req, result):
|
||||
from calibre.web.feeds import Article
|
||||
i, feedtitle, link, title, description, author, published = result
|
||||
self.log('Found print version for article:', title, 'in', feedtitle,
|
||||
'at', link)
|
||||
|
||||
a = Article(i, title, link, author, description, published, '')
|
||||
|
||||
article = dict(title=a.title, description=a.text_summary,
|
||||
date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url)
|
||||
if feedtitle not in self.feed_dict:
|
||||
self.feed_dict[feedtitle] = []
|
||||
self.feed_dict[feedtitle].append(article)
|
||||
|
||||
def eco_article_failed(self, req, tb):
|
||||
self.log.error('Failed to download %s with error:'%req.args[0][2])
|
||||
self.log.debug(tb)
|
||||
|
||||
def eco_find_image_tables(self, soup):
|
||||
for x in soup.findAll('table', align=['right', 'center']):
|
||||
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
|
||||
yield x
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
body = soup.find('body')
|
||||
for name, val in body.attrs:
|
||||
del body[name]
|
||||
for table in list(self.eco_find_image_tables(soup)):
|
||||
caption = table.find('font')
|
||||
img = table.find('img')
|
||||
div = Tag(soup, 'div')
|
||||
div['style'] = 'text-align:left;font-size:70%'
|
||||
ns = NavigableString(self.tag_to_string(caption))
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
img.extract()
|
||||
del img['width']
|
||||
del img['height']
|
||||
div.insert(2, img)
|
||||
table.replaceWith(div)
|
||||
return soup
|
||||
'''
|
||||
|
||||
|
@ -132,14 +132,14 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
25
recipes/ekologia_pl.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class EkologiaPl(BasicNewsRecipe):
|
||||
title = u'Ekologia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Portal ekologiczny - eko, ekologia, ochrona przyrody, ochrona środowiska, przyroda, środowisko online. Ekologia i ochrona środowiska. Ekologia dla dzieci.'
|
||||
category = 'ecology'
|
||||
language = 'pl'
|
||||
cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
extra_css = '.title {font-size: 200%;}'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
remove_attrs = ['style']
|
||||
remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})]
|
||||
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]
|
||||
|
||||
def print_version(self, url):
|
||||
id = re.search(r',(?P<id>\d+)\.html', url).group('id')
|
||||
return 'http://drukuj.ekologia.pl/artykul/' + id
|
@ -73,7 +73,7 @@ class heraldo(BasicNewsRecipe):
|
||||
#[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url]
|
||||
cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'
|
||||
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
118
recipes/el_diplo.recipe
Normal file
@ -0,0 +1,118 @@
|
||||
# Copyright 2013 Tomás Di Domenico
|
||||
#
|
||||
# This is a news fetching recipe for the Calibre ebook software, for
|
||||
# fetching the Cono Sur edition of Le Monde Diplomatique (www.eldiplo.org).
|
||||
#
|
||||
# This recipe is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This software is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this recipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
from contextlib import closing
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class ElDiplo_Recipe(BasicNewsRecipe):
|
||||
title = u'El Diplo'
|
||||
__author__ = 'Tomas Di Domenico'
|
||||
description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina'
|
||||
langauge = 'es_AR'
|
||||
needs_subscription = True
|
||||
auto_cleanup = True
|
||||
|
||||
def get_cover(self,url):
|
||||
tmp_cover = PersistentTemporaryFile(suffix = ".jpg", prefix = "eldiplo_")
|
||||
self.cover_url = tmp_cover.name
|
||||
|
||||
with closing(self.browser.open(url)) as r:
|
||||
imgdata = r.read()
|
||||
|
||||
img = Image()
|
||||
img.load(imgdata)
|
||||
img.crop(img.size[0],img.size[1]/2,0,0)
|
||||
|
||||
img.save(tmp_cover.name)
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.eldiplo.org/index.php/login/-/do_login/index.html')
|
||||
br.select_form(nr=3)
|
||||
br['uName'] = self.username
|
||||
br['uPassword'] = self.password
|
||||
br.submit()
|
||||
self.browser = br
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
default_sect = 'General'
|
||||
articles = {default_sect:[]}
|
||||
ans = [default_sect]
|
||||
sectionsmarker = 'DOSSIER_TITLE: '
|
||||
sectionsre = re.compile('^'+sectionsmarker)
|
||||
|
||||
soup = self.index_to_soup('http://www.eldiplo.org/index.php')
|
||||
|
||||
coverdivs = soup.findAll(True,attrs={'id':['lmd-foto']})
|
||||
a = coverdivs[0].find('a', href=True)
|
||||
coverurl = a['href'].split("?imagen=")[1]
|
||||
self.get_cover(coverurl)
|
||||
|
||||
thedivs = soup.findAll(True,attrs={'class':['lmd-leermas']})
|
||||
for div in thedivs:
|
||||
a = div.find('a', href=True)
|
||||
if 'Sumario completo' in self.tag_to_string(a, use_alt=True):
|
||||
summaryurl = re.sub(r'\?.*', '', a['href'])
|
||||
summaryurl = 'http://www.eldiplo.org' + summaryurl
|
||||
|
||||
for pagenum in xrange(1,10):
|
||||
soup = self.index_to_soup('{0}/?cms1_paging_p_b32={1}'.format(summaryurl,pagenum))
|
||||
thedivs = soup.findAll(True,attrs={'class':['interna']})
|
||||
|
||||
if len(thedivs) == 0:
|
||||
break
|
||||
|
||||
for div in thedivs:
|
||||
section = div.find(True,text=sectionsre).replace(sectionsmarker,'')
|
||||
if section == '':
|
||||
section = default_sect
|
||||
|
||||
if section not in articles.keys():
|
||||
articles[section] = []
|
||||
ans.append(section)
|
||||
|
||||
nota = div.find(True,attrs={'class':['lmd-pl-titulo-nota-dossier']})
|
||||
a = nota.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
url = 'http://www.eldiplo.org' + url
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
|
||||
summary = div.find(True, attrs={'class':'lmd-sumario-descript'}).find('p')
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
|
||||
aut = div.find(True, attrs={'class':'lmd-autor-sumario'})
|
||||
if aut:
|
||||
auth = self.tag_to_string(aut, use_alt=False).strip()
|
||||
|
||||
if not articles.has_key(section):
|
||||
articles[section] = []
|
||||
|
||||
articles[section].append(dict(title=title,author=auth,url=url,date=None,description=description,content=''))
|
||||
|
||||
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
||||
ans = [(section, articles[section]) for section in ans if articles.has_key(section)]
|
||||
return ans
|
@ -3,29 +3,34 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElMundoTodayRecipe(BasicNewsRecipe):
|
||||
title = 'El Mundo Today'
|
||||
__author__ = 'atordo'
|
||||
description = u'La actualidad del mañana'
|
||||
description = u'La actualidad del ma\u00f1ana'
|
||||
category = 'Noticias, humor'
|
||||
cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png'
|
||||
oldest_article = 30
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 60
|
||||
auto_cleanup = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
language = 'es'
|
||||
use_embedded_content = False
|
||||
publication_type = 'blog'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'</title>.*<!--Begin Article Single-->', re.DOTALL),
|
||||
lambda match: '</title><body>'),
|
||||
#(re.compile(r'^\t{5}<a href.*Permanent Link to ">$'), lambda match: ''),
|
||||
#(re.compile(r'\t{5}</a>$'), lambda match: ''),
|
||||
(re.compile(r'<div class="social4i".*</body>', re.DOTALL),
|
||||
lambda match: '</body>'),
|
||||
lambda match: '</title></head><body>'),
|
||||
(re.compile(r'<img alt="" src="http://www.elmundotoday.com/wp-content/themes/emt/images/otrassecciones-line.gif">'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<div style="clear: both;"></div>.*</body>', re.DOTALL),
|
||||
lambda match: '</body>')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'post-wrapper'})
|
||||
dict(name='div', attrs={'class':'post-wrapper '})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'social4i'}),
|
||||
dict(name='span', attrs={'class':'num-comentarios'})
|
||||
]
|
||||
|
||||
remove_attributes = [ 'href', 'title', 'alt' ]
|
||||
@ -36,8 +41,3 @@ class ElMundoTodayRecipe(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]
|
||||
|
||||
def get_broser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.set_handle_gzip(True)
|
||||
return br
|
||||
|
@ -26,7 +26,7 @@ class elet_es_irodalom(BasicNewsRecipe):
|
||||
|
||||
#Nem ide a kódba kell beleírni a hozzáférés adatait, hanem azt akkor adod meg, ha le akarod tölteni!
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.es.hu/')
|
||||
br.select_form(name='userfrmlogin')
|
||||
|
93
recipes/elguardian.recipe
Normal file
@ -0,0 +1,93 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elguardian.com.ar
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElGuardian(BasicNewsRecipe):
|
||||
title = 'El Guardian'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Semanario con todas las tendencias de un pais"
|
||||
publisher = 'Editorial Apache SA'
|
||||
category = 'news,politics,Argentina'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
issn = '1666-7476'
|
||||
masthead_url = 'http://elguardian.com.ar/application/templates/frontend/images/home/logo.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,sans-serif}
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'series' : title
|
||||
, 'isbn' : issn
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['fotos', 'header_nota', 'nota']})]
|
||||
remove_tags = [dict(name=['meta','link','iframe','embed','object'])]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
(u'El Pais' , u'http://elguardian.com.ar/RSS/el-pais.xml' )
|
||||
,(u'Columnistas' , u'http://elguardian.com.ar/RSS/columnistas.xml' )
|
||||
,(u'Personajes' , u'http://elguardian.com.ar/RSS/personajes.xml' )
|
||||
,(u'Tinta roja' , u'http://elguardian.com.ar/RSS/tinta-roja.xml' )
|
||||
,(u'Yo fui' , u'http://elguardian.com.ar/RSS/yo-fui.xml' )
|
||||
,(u'Ciencia' , u'http://elguardian.com.ar/RSS/ciencia.xml' )
|
||||
,(u'Cronicas' , u'http://elguardian.com.ar/RSS/cronicas.xml' )
|
||||
,(u'Culturas' , u'http://elguardian.com.ar/RSS/culturas.xml' )
|
||||
,(u'DxT' , u'http://elguardian.com.ar/RSS/dxt.xml' )
|
||||
,(u'Fierros' , u'http://elguardian.com.ar/RSS/fierros.xml' )
|
||||
,(u'Frente fashion', u'http://elguardian.com.ar/RSS/frente-fashion.xml')
|
||||
,(u'Pan y vino' , u'http://elguardian.com.ar/RSS/pan-y-vino.xml' )
|
||||
,(u'Turismo' , u'http://elguardian.com.ar/RSS/turismo.xml' )
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://elguardian.com.ar/')
|
||||
udata = soup.find('div', attrs={'class':'datosNumero'})
|
||||
if udata:
|
||||
sdata = udata.find('div')
|
||||
if sdata:
|
||||
stra = re.findall(r'\d+', self.tag_to_string(sdata))
|
||||
self.conversion_options.update({'series_index':int(stra[1])})
|
||||
unumero = soup.find('div', attrs={'class':'ultimoNumero'})
|
||||
if unumero:
|
||||
img = unumero.find('img', src=True)
|
||||
if img:
|
||||
return img['src']
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
@ -116,7 +116,7 @@ class ElMundo(BasicNewsRecipe):
|
||||
day = "%.2d" % st.tm_mday
|
||||
#http://img.kiosko.net/2011/11/19/es/elmundo.750.jpg
|
||||
cover='http://img.kiosko.net/'+ year + '/' + month + '/' + day +'/es/elmundo.750.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
@ -5,6 +5,7 @@ class AdvancedUserRecipe1341650280(BasicNewsRecipe):
|
||||
|
||||
title = u'Empire Magazine'
|
||||
description = 'Author D.Asbury. Film articles from Empire Mag. '
|
||||
language = 'en'
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 7/7/12
|
||||
remove_empty_feeds = True
|
||||
@ -15,7 +16,7 @@ class AdvancedUserRecipe1341650280(BasicNewsRecipe):
|
||||
cover_url = 'http://www.empireonline.com/images/magazine/cover.jpg'
|
||||
conversion_options = {
|
||||
'linearize_tables' : True,
|
||||
}
|
||||
}
|
||||
#auto_cleanup = True
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<a href="http://twitter.com/share.*?</a>', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
||||
@ -32,20 +33,20 @@ class AdvancedUserRecipe1341650280(BasicNewsRecipe):
|
||||
(re.compile(r'<!-- USER REVIEWS: START -->.*?<!-- USER REVIEWS: END -->', re.IGNORECASE | re.DOTALL), lambda match: '<!-- USER REVIEWS: START --><!-- USER REVIEWS: END -->'),
|
||||
(re.compile(r'Advertisement', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
||||
(re.compile(r'<a name="haveyoursay".*?now to have your say.', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
||||
]
|
||||
]
|
||||
keep_only_tags = [
|
||||
# dict(name='h1'),
|
||||
# dict(attrs={'class' : 'mediumblack'}),
|
||||
# dict(attrs={'class' : 'mediumblack'}),
|
||||
]
|
||||
remove_tags = [dict(name='td', attrs={'width':'200', 'valign' : 'top'}),
|
||||
dict(name='b'),
|
||||
dict(name='a',attrs={'name' : 'haveyoursay'}),
|
||||
dict(attrs={'class' : 'newslink'}),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://feed43.com/7338478755673147.xml'),
|
||||
(u'Recent Features',u'http://feed43.com/4346347750304760.xml'),
|
||||
(u'Recent Features',u'http://feed43.com/4346347750304760.xml'),
|
||||
(u'Interviews',u'http://feed43.com/3418350077724081.xml'),
|
||||
(u'Film Reviews',u'http://feed43.com/2643703076510627.xml'),
|
||||
]
|
||||
(u'Film Reviews',u'http://feed43.com/2643703076510627.xml'),
|
||||
]
|
||||
|
@ -21,10 +21,11 @@ class Engadget(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||
#keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||
#remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||
#remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||
|
||||
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
|
||||
@ -33,5 +34,5 @@ class Engadget(BasicNewsRecipe):
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
'''
|
||||
|
||||
|
23
recipes/eso_pl.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ESO(BasicNewsRecipe):
|
||||
title = u'ESO PL'
|
||||
__author__ = 'fenuks'
|
||||
description = u'ESO, Europejskie Obserwatorium Południowe, buduje i obsługuje najbardziej zaawansowane naziemne teleskopy astronomiczne na świecie'
|
||||
category = 'astronomy'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1922519424/eso-twitter-logo.png'
|
||||
keep_only_tags = [dict(attrs={'class':'subcl'})]
|
||||
remove_tags = [dict(id='lang_row'), dict(attrs={'class':['pr_typeid', 'pr_news_feature_link', 'outreach_usage', 'hidden']})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://www.eso.org/public/poland/news/feed/'), (u'Og\u0142oszenia', u'http://www.eso.org/public/poland/announcements/feed/'), (u'Zdj\u0119cie tygodnia', u'http://www.eso.org/public/poland/images/potw/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup.findAll('a', href=True):
|
||||
if a['href'].startswith('/'):
|
||||
a['href'] = 'http://www.eso.org' + a['href']
|
||||
return soup
|
@ -76,7 +76,7 @@ class ESPN(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username and self.password:
|
||||
br.set_handle_refresh(False)
|
||||
url = ('https://r.espn.go.com/members/v3_1/login')
|
||||
|
@ -89,7 +89,7 @@ class expansion_spanish(BasicNewsRecipe):
|
||||
day = "%.2d" % st.tm_mday
|
||||
#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
|
||||
cover='http://img5.kiosko.net/'+ year + '/' + month + '/' + day +'/es/expansion.750.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
19
recipes/film_org_pl.recipe
Normal file
@ -0,0 +1,19 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class FilmOrgPl(BasicNewsRecipe):
|
||||
title = u'Film.org.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
|
||||
category = 'film'
|
||||
language = 'pl'
|
||||
cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = True
|
||||
preprocess_regexps = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: '')]
|
||||
remove_tags = [dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']})]
|
||||
feeds = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
|
@ -17,6 +17,7 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
|
||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
|
||||
remove_attributes = ['style',]
|
||||
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||
feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||
@ -50,4 +51,9 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
for i in soup.findAll('sup'):
|
||||
if not i.string or i.string.startswith('(kliknij'):
|
||||
i.extract()
|
||||
tag = soup.find(name='ul', attrs={'class':'inline sep-line'})
|
||||
if tag:
|
||||
tag.name = 'div'
|
||||
for t in tag.findAll('li'):
|
||||
t.name = 'div'
|
||||
return soup
|
||||
|
@ -34,7 +34,7 @@ class FinancialTimes_rss(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
|
@ -40,7 +40,7 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN2)
|
||||
|
@ -18,7 +18,7 @@ class Fleshbot(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
masthead_url = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
|
||||
masthead_url = 'http://fbassets.s3.amazonaws.com/images/uploads/2012/01/fleshbot-logo.png'
|
||||
extra_css = '''
|
||||
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
|
||||
img{margin-bottom: 1em}
|
||||
@ -31,7 +31,7 @@ class Fleshbot(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/fleshbot/vip?format=xml')]
|
||||
feeds = [(u'Articles', u'http://www.fleshbot.com/feed')]
|
||||
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
|
@ -14,26 +14,17 @@ class AdvancedUserRecipe1313693926(BasicNewsRecipe):
|
||||
language = 'de'
|
||||
encoding = 'UTF-8'
|
||||
|
||||
__author__ = 'Armin Geller' # 2011-08-19
|
||||
__author__ = 'Armin Geller' # 2013-02-05 V3
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':["comments"]}),
|
||||
dict(attrs={'class':['commentlink']}),
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':["grid_8 articleText"]}),
|
||||
dict(name='div', attrs={'class':["articleTextInnerText"]}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'),
|
||||
]
|
||||
|
||||
extra_css = '.cs_img {margin-right: 10pt;}'
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?tpl=1260'
|
||||
|
||||
|
@ -40,7 +40,7 @@ class FSP(BasicNewsRecipe):
|
||||
re.DOTALL|re.IGNORECASE), lambda match: r'')]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://acesso.uol.com.br/login.html')
|
||||
br.form = br.forms().next()
|
||||
|
@ -11,21 +11,21 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
by Chen Wei weichen302@gmx.com, 2012-02-05'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
__author__ = 'Rick Shang, kwetal'
|
||||
language = 'en'
|
||||
version = 1.01
|
||||
|
||||
title = u'Foreign Affairs (Subcription or (free) Registration)'
|
||||
title = u'Foreign Affairs (Subcription)'
|
||||
publisher = u'Council on Foreign Relations'
|
||||
category = u'USA, Foreign Affairs'
|
||||
description = u'The leading forum for serious discussion of American foreign policy and international affairs.'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
|
||||
INDEX = 'http://www.foreignaffairs.com'
|
||||
FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
|
||||
INCLUDE_PREMIUM = False
|
||||
|
||||
|
||||
remove_tags = []
|
||||
@ -68,43 +68,57 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
answer = []
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
|
||||
#get dates
|
||||
date = re.split('\s\|\s',self.tag_to_string(soup.head.title.string))[0]
|
||||
self.timefmt = u' [%s]'%date
|
||||
|
||||
sec_start = soup.findAll('div', attrs= {'class':'panel-pane'})
|
||||
for sec in sec_start:
|
||||
content = sec.nextSibling
|
||||
if content:
|
||||
section = self.tag_to_string(content.find('h2'))
|
||||
articles = []
|
||||
|
||||
tags = []
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tags.append(div)
|
||||
for li in content.findAll('li'):
|
||||
tags.append(li)
|
||||
|
||||
for div in tags:
|
||||
title = url = description = author = None
|
||||
|
||||
if self.INCLUDE_PREMIUM:
|
||||
found_premium = False
|
||||
else:
|
||||
found_premium = div.findAll('span', attrs={'class':
|
||||
'premium-icon'})
|
||||
if not found_premium:
|
||||
tag = div.find('div', attrs={'class': 'views-field-title'})
|
||||
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
description = self.tag_to_string(tag_summary)
|
||||
articles.append({'title':title, 'date':None, 'url':url,
|
||||
'description':description, 'author':author})
|
||||
if articles:
|
||||
articles = []
|
||||
section = self.tag_to_string(sec.find('h2'))
|
||||
if 'Books' in section:
|
||||
reviewsection=sec.find('div', attrs = {'class': 'item-list'})
|
||||
for subsection in reviewsection.findAll('div'):
|
||||
subsectiontitle=self.tag_to_string(subsection.span.a)
|
||||
subsectionurl=self.INDEX + subsection.span.a['href']
|
||||
soup1 = self.index_to_soup(subsectionurl)
|
||||
for div in soup1.findAll('div', attrs = {'class': 'views-field-title'}):
|
||||
if div.find('a') is not None:
|
||||
originalauthor=self.tag_to_string(div.findNext('div', attrs = {'class':'views-field-field-article-book-nid'}).div.a)
|
||||
title=subsectiontitle+': '+self.tag_to_string(div.span.a)+' by '+originalauthor
|
||||
url=self.INDEX+div.span.a['href']
|
||||
atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
|
||||
if atr is not None:
|
||||
author=self.tag_to_string(atr.span.a)
|
||||
else:
|
||||
author=''
|
||||
desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
if desc is not None:
|
||||
description=self.tag_to_string(desc.div.p)
|
||||
else:
|
||||
description=''
|
||||
articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author})
|
||||
subsectiontitle=''
|
||||
else:
|
||||
for div in sec.findAll('div', attrs = {'class': 'views-field-title'}):
|
||||
if div.find('a') is not None:
|
||||
title=self.tag_to_string(div.span.a)
|
||||
url=self.INDEX+div.span.a['href']
|
||||
atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
|
||||
if atr is not None:
|
||||
author=self.tag_to_string(atr.span.a)
|
||||
else:
|
||||
author=''
|
||||
desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
if desc is not None:
|
||||
description=self.tag_to_string(desc.div.p)
|
||||
else:
|
||||
description=''
|
||||
articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author})
|
||||
if articles:
|
||||
answer.append((section, articles))
|
||||
return answer
|
||||
|
||||
@ -115,15 +129,17 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
needs_subscription = True
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://www.foreignaffairs.com/user?destination=home')
|
||||
br.open('https://www.foreignaffairs.com/user?destination=user%3Fop%3Dlo')
|
||||
br.select_form(nr = 1)
|
||||
br['name'] = self.username
|
||||
br['pass'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def cleanup(self):
|
||||
self.browser.open('http://www.foreignaffairs.com/logout?destination=user%3Fop=lo')
|
||||
|
@ -4,9 +4,10 @@ import re
|
||||
class Gildia(BasicNewsRecipe):
|
||||
title = u'Gildia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gildia - cultural site'
|
||||
description = u'Fantastyczny Portal Kulturalny - newsy, recenzje, galerie, wywiady. Literatura, film, gry komputerowe i planszowe, komiks, RPG, sklep. Nie lekceważ potęgi wyobraźni!'
|
||||
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
|
||||
category = 'culture'
|
||||
cover_url = 'http://gildia.pl/images/logo-main.png'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
@ -23,10 +24,13 @@ class Gildia(BasicNewsRecipe):
|
||||
content = soup.find('div', attrs={'class':'news'})
|
||||
if 'recenzj' in soup.title.string.lower():
|
||||
for link in content.findAll(name='a'):
|
||||
if 'recenzj' in link['href']:
|
||||
self.log.warn('odnosnik')
|
||||
self.log.warn(link['href'])
|
||||
if 'recenzj' in link['href'] or 'muzyka/plyty' in link['href']:
|
||||
return self.index_to_soup(link['href'], raw=True)
|
||||
if 'fragmen' in soup.title.string.lower():
|
||||
for link in content.findAll(name='a'):
|
||||
if 'fragment' in link['href']:
|
||||
return self.index_to_soup(link['href'], raw=True)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
|
@ -21,6 +21,10 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||
encoding = 'utf8'
|
||||
publisher = 'Globe & Mail'
|
||||
language = 'en_CA'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
|
||||
|
||||
feeds = [
|
||||
@ -44,12 +48,12 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
|
||||
]
|
||||
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['ShareArticles', 'topStories']}),
|
||||
dict(href=lambda x: x and 'tracking=' in x),
|
||||
{'class':['articleTools', 'pagination', 'Ads', 'topad',
|
||||
'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
|
||||
#remove_tags_before = dict(name='h1')
|
||||
#remove_tags = [
|
||||
#dict(name='div', attrs={'id':['ShareArticles', 'topStories']}),
|
||||
#dict(href=lambda x: x and 'tracking=' in x),
|
||||
#{'class':['articleTools', 'pagination', 'Ads', 'topad',
|
||||
#'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
|
@ -1,19 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
class Gram_pl(BasicNewsRecipe):
|
||||
title = u'Gram.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gram.pl - site about computer games'
|
||||
description = u'Serwis społecznościowy o grach: recenzje, newsy, zapowiedzi, encyklopedia gier, forum. Gry PC, PS3, X360, PS Vita, sprzęt dla graczy.'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
index='http://www.gram.pl'
|
||||
max_articles_per_feed = 100
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
no_stylesheets= True
|
||||
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||
#extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent', 'sharedaddy sd-sharing-enabled']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')]
|
||||
keep_only_tags= [dict(id='articleModule')]
|
||||
remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter']})]
|
||||
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
|
||||
(u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
|
||||
@ -28,35 +29,21 @@ class Gram_pl(BasicNewsRecipe):
|
||||
feed.articles.remove(article)
|
||||
return feeds
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
|
||||
r=appendtag.find(id='pgbox')
|
||||
if r:
|
||||
r.extract()
|
||||
pagetext = soup2.find(attrs={'class':'main'})
|
||||
r=pagetext.find('h1')
|
||||
if r:
|
||||
r.extract()
|
||||
r=pagetext.find('h2')
|
||||
if r:
|
||||
r.extract()
|
||||
for r in pagetext.findAll('script'):
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||
r=appendtag.find(id='pgbox')
|
||||
if r:
|
||||
r.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
||||
for t in tag:
|
||||
t['style']='float: left;'
|
||||
tag=soup.find(name='div', attrs={'class':'summary'})
|
||||
if tag:
|
||||
tag.find(attrs={'class':'pros'}).insert(0, BeautifulSoup('<h2>Plusy:</h2>').h2)
|
||||
tag.find(attrs={'class':'cons'}).insert(0, BeautifulSoup('<h2>Minusy:</h2>').h2)
|
||||
tag = soup.find(name='section', attrs={'class':'cenzurka'})
|
||||
if tag:
|
||||
rate = tag.p.img['data-ocena']
|
||||
tag.p.img.extract()
|
||||
tag.p.insert(len(tag.p.contents)-2, BeautifulSoup('<h2>Ocena: {0}</h2>'.format(rate)).h2)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
tag=soup.find(name='span', attrs={'class':'platforma'})
|
||||
if tag:
|
||||
tag.name = 'p'
|
||||
return soup
|
||||
|
@ -65,7 +65,7 @@ class Haaretz_en(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.PREFIX)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'cb':'parseEngReply'
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
harpers.org
|
||||
'''
|
||||
@ -16,6 +16,7 @@ class Harpers(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -31,27 +32,9 @@ class Harpers(BasicNewsRecipe):
|
||||
.caption{font-family:Verdana,sans-serif;font-size:x-small;color:#666666;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
|
||||
remove_tags = [
|
||||
dict(name='table', attrs={'class':['rcnt','rcnt topline']})
|
||||
,dict(name=['link','object','embed','meta','base'])
|
||||
]
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['postdetailFull', 'articlePost']}) ]
|
||||
remove_tags = [dict(name=['link','object','embed','meta','base'])]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]
|
||||
feeds = [(u"Harper's Magazine", u'http://harpers.org/feed/')]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://harpers.org/'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find(name = 'img',attrs= {'class':"cover"})
|
||||
if link_item:
|
||||
cover_url = 'http://harpers.org' + link_item['src']
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(xmlns=True):
|
||||
del item['xmlns']
|
||||
return soup
|
||||
|
@ -1,18 +1,22 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
harpers.org - paid subscription/ printed issue articles
|
||||
This recipe only get's article's published in text format
|
||||
images and pdf's are ignored
|
||||
If you have institutional subscription based on access IP you do not need to enter
|
||||
anything in username/password fields
|
||||
'''
|
||||
|
||||
import time, re
|
||||
import urllib
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Harpers_full(BasicNewsRecipe):
|
||||
title = "Harper's Magazine - articles from printed edition"
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Harper's Magazine: Founded June 1850."
|
||||
description = "Harper's Magazine, the oldest general-interest monthly in America, explores the issues that drive our national conversation, through long-form narrative journalism and essays, and such celebrated features as the iconic Harper's Index."
|
||||
publisher = "Harpers's"
|
||||
category = 'news, politics, USA'
|
||||
oldest_article = 30
|
||||
@ -21,52 +25,86 @@ class Harpers_full(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
language = 'en'
|
||||
needs_subscription = True
|
||||
masthead_url = 'http://www.harpers.org/media/image/Harpers_305x100.gif'
|
||||
publication_type = 'magazine'
|
||||
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
|
||||
LOGIN = 'http://www.harpers.org'
|
||||
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
|
||||
extra_css = ' body{font-family: "Georgia",serif} '
|
||||
encoding = 'utf8'
|
||||
needs_subscription = 'optional'
|
||||
masthead_url = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif'
|
||||
publication_type = 'magazine'
|
||||
LOGIN = 'http://harpers.org/wp-content/themes/harpers/ajax_login.php'
|
||||
extra_css = """
|
||||
body{font-family: adobe-caslon-pro,serif}
|
||||
.category{font-size: small}
|
||||
.articlePost p:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['postdetailFull','articlePost']}) ]
|
||||
remove_tags = [
|
||||
dict(name='table', attrs={'class':['rcnt','rcnt topline']})
|
||||
,dict(name='link')
|
||||
dict(name='div', attrs={'class':'fRight rightDivPad'})
|
||||
,dict(name=['link','meta','object','embed','iframe'])
|
||||
]
|
||||
remove_attributes=['xmlns']
|
||||
remove_attributes=['xmlns']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://harpers.org/')
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr=1)
|
||||
br['handle' ] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
tt = time.localtime()*1000
|
||||
data = urllib.urlencode({ 'm':self.username
|
||||
,'p':self.password
|
||||
,'rt':'http://harpers.org/'
|
||||
,'tt':tt
|
||||
})
|
||||
br.open(self.LOGIN, data)
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
#find current issue
|
||||
soup = self.index_to_soup('http://harpers.org/')
|
||||
currentIssue=soup.find('div',attrs={'class':'mainNavi'}).find('li',attrs={'class':'curentIssue'})
|
||||
currentIssue_url=self.tag_to_string(currentIssue.a['href'])
|
||||
self.log(currentIssue_url)
|
||||
|
||||
#go to the current issue
|
||||
soup1 = self.index_to_soup(currentIssue_url)
|
||||
date = re.split('\s\|\s',self.tag_to_string(soup1.head.title.string))[0]
|
||||
self.timefmt = u' [%s]'%date
|
||||
|
||||
#get cover
|
||||
coverurl='http://harpers.org/wp-content/themes/harpers/ajax_microfiche.php?img=harpers-'+re.split('harpers.org/',currentIssue_url)[1]+'gif/0001.gif'
|
||||
soup2 = self.index_to_soup(coverurl)
|
||||
self.cover_url = self.tag_to_string(soup2.find('img')['src'])
|
||||
self.log(self.cover_url)
|
||||
articles = []
|
||||
print 'Processing ' + self.INDEX
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
for item in soup.findAll('div', attrs={'class':'title'}):
|
||||
text_link = item.parent.find('img',attrs={'alt':'Text'})
|
||||
if text_link:
|
||||
url = self.LOGIN + item.a['href']
|
||||
title = item.a.contents[0]
|
||||
date = strftime(' %B %Y')
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
return [(soup.head.title.string, articles)]
|
||||
count = 0
|
||||
for item in soup1.findAll('div', attrs={'class':'articleData'}):
|
||||
text_links = item.findAll('h2')
|
||||
for text_link in text_links:
|
||||
if count == 0:
|
||||
count = 1
|
||||
else:
|
||||
url = text_link.a['href']
|
||||
title = text_link.a.contents[0]
|
||||
date = strftime(' %B %Y')
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
return [(soup1.head.title.string, articles)]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?single=1'
|
||||
|
||||
def cleanup(self):
|
||||
soup = self.index_to_soup('http://harpers.org/')
|
||||
signouturl=self.tag_to_string(soup.find('li', attrs={'class':'subLogOut'}).findNext('li').a['href'])
|
||||
self.log(signouturl)
|
||||
self.browser.open(signouturl)
|
||||
|
||||
|
@ -11,11 +11,11 @@ class HBR(BasicNewsRecipe):
|
||||
timefmt = ' [%B %Y]'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
||||
' to log into their website. This is unsupported in calibre, so'
|
||||
' this recipe has been disabled. If you would like to see '
|
||||
' HBR supported in calibre, contact hbr.org and ask them'
|
||||
' to provide a javascript free login method.')
|
||||
# recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
||||
# ' to log into their website. This is unsupported in calibre, so'
|
||||
# ' this recipe has been disabled. If you would like to see '
|
||||
# ' HBR supported in calibre, contact hbr.org and ask them'
|
||||
# ' to provide a javascript free login method.')
|
||||
|
||||
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
||||
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
||||
@ -38,46 +38,38 @@ class HBR(BasicNewsRecipe):
|
||||
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
|
||||
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
|
||||
'''
|
||||
use_javascript_to_login = True
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
self.logout_url = None
|
||||
|
||||
#'''
|
||||
br.open(self.LOGIN_URL)
|
||||
br.select_form(name='signin-form')
|
||||
br['signin-form:username'] = self.username
|
||||
br['signin-form:password'] = self.password
|
||||
raw = br.submit().read()
|
||||
if '>Sign out<' not in raw:
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
def javascript_login(self, br, username, password):
|
||||
from calibre.web.jsbrowser.browser import Timeout
|
||||
try:
|
||||
link = br.find_link(text='Sign out')
|
||||
if link:
|
||||
self.logout_url = link.absolute_url
|
||||
except:
|
||||
self.logout_url = self.LOGOUT_URL
|
||||
#'''
|
||||
return br
|
||||
|
||||
def cleanup(self):
|
||||
if self.logout_url is not None:
|
||||
self.browser.open(self.logout_url)
|
||||
br.visit('https://hbr.org/login?request_url=/', timeout=20)
|
||||
except Timeout:
|
||||
pass
|
||||
br.click('#accordion div[tabindex="0"]', wait_for_load=False)
|
||||
f = br.select_form('#signin-form')
|
||||
f['signin-form:username'] = username
|
||||
f['signin-form:password'] = password
|
||||
br.submit(wait_for_load=False)
|
||||
br.run_for_a_time(30)
|
||||
|
||||
def map_url(self, url):
|
||||
if url.endswith('/ar/1'):
|
||||
return url[:-1]+'pr'
|
||||
|
||||
|
||||
def hbr_get_toc(self):
|
||||
#return self.index_to_soup(open('/t/hbr.html').read())
|
||||
# return self.index_to_soup(open('/t/toc.html').read())
|
||||
|
||||
today = date.today()
|
||||
future = today + timedelta(days=30)
|
||||
for x in [x.strftime('%y%m') for x in (future, today)]:
|
||||
past = today - timedelta(days=30)
|
||||
for x in [x.strftime('%y%m') for x in (future, today, past)]:
|
||||
url = self.INDEX + x
|
||||
soup = self.index_to_soup(url)
|
||||
if not soup.find(text='Issue Not Found'):
|
||||
if (not soup.find(text='Issue Not Found') and not soup.find(
|
||||
text="We're Sorry. There was an error processing your request")
|
||||
and 'Exception: java.io.FileNotFoundException' not in
|
||||
unicode(soup)):
|
||||
return soup
|
||||
raise Exception('Could not find current issue')
|
||||
|
||||
@ -85,8 +77,9 @@ class HBR(BasicNewsRecipe):
|
||||
feeds = []
|
||||
current_section = None
|
||||
articles = []
|
||||
for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
|
||||
if x.name == 'h3':
|
||||
for x in soup.find(id='issueFeaturesContent').findAll(['li', 'h4']):
|
||||
if x.name == 'h4':
|
||||
if x.get('class', None) == 'basic':continue
|
||||
if current_section is not None and articles:
|
||||
feeds.append((current_section, articles))
|
||||
current_section = self.tag_to_string(x).capitalize()
|
||||
@ -102,7 +95,7 @@ class HBR(BasicNewsRecipe):
|
||||
if url.startswith('/'):
|
||||
url = 'http://hbr.org' + url
|
||||
url = self.map_url(url)
|
||||
p = x.parent.find('p')
|
||||
p = x.find('p', attrs={'class':'author'})
|
||||
desc = ''
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
@ -114,10 +107,9 @@ class HBR(BasicNewsRecipe):
|
||||
'date':''})
|
||||
return feeds
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.hbr_get_toc()
|
||||
#open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
||||
# open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
||||
feeds = self.hbr_parse_toc(soup)
|
||||
return feeds
|
||||
|
||||
|
@ -15,23 +15,12 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
timeout = 5
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'mitte_news'}),
|
||||
dict(name='h1', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'meldung_wrapper'})]
|
||||
|
||||
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
|
||||
remove_tags = [dict(id='navi_top_container'),
|
||||
dict(id='navi_bottom'),
|
||||
dict(id='mitte_rechts'),
|
||||
dict(id='navigation'),
|
||||
dict(id='subnavi'),
|
||||
dict(id='social_bookmarks'),
|
||||
dict(id='permalink'),
|
||||
dict(id='content_foren'),
|
||||
dict(id='seiten_navi'),
|
||||
dict(id='adbottom'),
|
||||
dict(id='sitemap'),
|
||||
dict(name='div', attrs={'id':'sitemap'}),
|
||||
dict(name='ul', attrs={'class':'erste_zeile'}),
|
||||
dict(name='ul', attrs={'class':'zweite_zeile'}),
|
||||
dict(name='div', attrs={'class':'navi_top_container'})]
|
||||
dict(name='p', attrs={'class':'size80'})]
|
||||
|
||||
feeds = [
|
||||
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
|
||||
@ -54,5 +43,3 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?view=print'
|
||||
|
||||
|
||||
|
@ -53,7 +53,7 @@ class heraldo(BasicNewsRecipe):
|
||||
day = "%.2d" % st.tm_mday
|
||||
#[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url]
|
||||
cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
|
@ -16,10 +16,14 @@ class TheHindu(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [dict(id='content')]
|
||||
remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
|
||||
dict(id=['email-section', 'right-column', 'printfooter'])]
|
||||
dict(id=['email-section', 'right-column', 'printfooter', 'topover',
|
||||
'slidebox', 'th_footer'])]
|
||||
|
||||
extra_css = '.photo-caption { font-size: smaller }'
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td','center']):
|
||||
t.name = 'div'
|
||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Historia_org_pl(BasicNewsRecipe):
|
||||
title = u'Historia.org.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'history site'
|
||||
description = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.'
|
||||
cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
@ -12,16 +12,15 @@ class Historia_org_pl(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
max_articles_per_feed = 100
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
feeds = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=atom'),
|
||||
(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=atom'),
|
||||
(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=atom'),
|
||||
(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=atom'),
|
||||
(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=atom'),
|
||||
(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=atom'),
|
||||
(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=atom'),
|
||||
(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=atom'),
|
||||
(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=atom')]
|
||||
|
||||
feeds = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
|
||||
(u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
|
||||
(u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
|
||||
(u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
|
||||
(u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
|
||||
(u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -21,7 +21,7 @@ class HistoryToday(BasicNewsRecipe):
|
||||
needs_subscription = True
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.historytoday.com/user/login')
|
||||
br.select_form(nr=1)
|
||||
|
BIN
recipes/icons/astroflesz.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/badania_net.png
Normal file
After Width: | Height: | Size: 968 B |
BIN
recipes/icons/czas_gentlemanow.png
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
recipes/icons/ekologia_pl.png
Normal file
After Width: | Height: | Size: 702 B |
BIN
recipes/icons/elguardian.png
Normal file
After Width: | Height: | Size: 305 B |
BIN
recipes/icons/eso_pl.png
Normal file
After Width: | Height: | Size: 3.6 KiB |
BIN
recipes/icons/kurier_galicyjski.png
Normal file
After Width: | Height: | Size: 726 B |
BIN
recipes/icons/libartes.png
Normal file
After Width: | Height: | Size: 282 B |
BIN
recipes/icons/libertad_digital.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/more_intelligent_life.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
recipes/icons/nauka_w_polsce.png
Normal file
After Width: | Height: | Size: 744 B |
BIN
recipes/icons/osworld_pl.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/poradnia_pwn.png
Normal file
After Width: | Height: | Size: 350 B |
BIN
recipes/icons/pravda_rs.png
Normal file
After Width: | Height: | Size: 606 B |
Before Width: | Height: | Size: 605 B After Width: | Height: | Size: 605 B |
BIN
recipes/icons/tvp_info.png
Normal file
After Width: | Height: | Size: 329 B |
BIN
recipes/icons/ubuntu_pomoc_org.png
Normal file
After Width: | Height: | Size: 757 B |
BIN
recipes/icons/wprost_rss.png
Normal file
After Width: | Height: | Size: 1.7 KiB |
BIN
recipes/icons/zaufana_trzecia_strona.png
Normal file
After Width: | Height: | Size: 412 B |
@ -28,12 +28,15 @@ class IlMessaggero(BasicNewsRecipe):
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
extra_css = ' .bianco31lucida{color: black} '
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':'titoloLettura2'}),
|
||||
dict(name='h2', attrs={'class':'sottotitLettura'}),
|
||||
dict(name='span', attrs={'class':'testoArticoloG'})
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':['titoloLettura2','titoloart','bianco31lucida']}),
|
||||
dict(name='h2', attrs={'class':['sottotitLettura','grigio16']}),
|
||||
dict(name='span', attrs={'class':'testoArticoloG'}),
|
||||
dict(name='div', attrs={'id':'testodim'})
|
||||
]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = None
|
||||
st = time.localtime()
|
||||
@ -41,7 +44,7 @@ class IlMessaggero(BasicNewsRecipe):
|
||||
month = "%.2d" % st.tm_mon
|
||||
day = "%.2d" % st.tm_mday
|
||||
cover='http://carta.ilmessaggero.it/' + year + month + day + '/jpeg/MSGR_20_CITTA_1.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
@ -55,17 +58,16 @@ class IlMessaggero(BasicNewsRecipe):
|
||||
feeds = [
|
||||
(u'HomePage', u'http://www.ilmessaggero.it/rss/home.xml'),
|
||||
(u'Primo Piano', u'http://www.ilmessaggero.it/rss/initalia_primopiano.xml'),
|
||||
(u'Cronaca Bianca', u'http://www.ilmessaggero.it/rss/initalia_cronacabianca.xml'),
|
||||
(u'Cronaca Nera', u'http://www.ilmessaggero.it/rss/initalia_cronacanera.xml'),
|
||||
(u'Economia e Finanza', u'http://www.ilmessaggero.it/rss/economia.xml'),
|
||||
(u'Politica', u'http://www.ilmessaggero.it/rss/initalia_politica.xml'),
|
||||
(u'Scienza e Tecnologia', u'http://www.ilmessaggero.it/rss/scienza.xml'),
|
||||
(u'Cinema', u'http://www.ilmessaggero.it/rss.php?refresh_ce#'),
|
||||
(u'Viaggi', u'http://www.ilmessaggero.it/rss.php?refresh_ce#'),
|
||||
(u'Cultura', u'http://www.ilmessaggero.it/rss/cultura.xml'),
|
||||
(u'Tecnologia', u'http://www.ilmessaggero.it/rss/tecnologia.xml'),
|
||||
(u'Spettacoli', u'http://www.ilmessaggero.it/rss/spettacoli.xml'),
|
||||
(u'Edizioni Locali', u'http://www.ilmessaggero.it/rss/edlocali.xml'),
|
||||
(u'Roma', u'http://www.ilmessaggero.it/rss/roma.xml'),
|
||||
(u'Cultura e Tendenze', u'http://www.ilmessaggero.it/rss/roma_culturaspet.xml'),
|
||||
(u'Benessere', u'http://www.ilmessaggero.it/rss/benessere.xml'),
|
||||
(u'Sport', u'http://www.ilmessaggero.it/rss/sport.xml'),
|
||||
(u'Calcio', u'http://www.ilmessaggero.it/rss/sport_calcio.xml'),
|
||||
(u'Motori', u'http://www.ilmessaggero.it/rss/sport_motori.xml')
|
||||
(u'Moda', u'http://www.ilmessaggero.it/rss/moda.xml')
|
||||
]
|
||||
|
||||
|
||||
|
@ -47,9 +47,10 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
|
||||
dict(name='img',attrs={'alt' : ['view gallery']}),
|
||||
dict(attrs={'style' : re.compile('.*')}),
|
||||
dict(attrs={'class':lambda x: x and 'voicesRelatedTopics' in x.split()}),
|
||||
]
|
||||
|
||||
keep_only_tags =[dict(attrs={'id':'main'})]
|
||||
keep_only_tags =[dict(attrs={'id':['main','top']})]
|
||||
recursions = 0
|
||||
|
||||
# fixes non compliant html nesting and 'marks' article graphics links
|
||||
@ -69,7 +70,7 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
extra_css = """
|
||||
h1{font-family: Georgia,serif }
|
||||
h1{font-family: Georgia,serif ; font-size: x-large; }
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.starRating img {float: left}
|
||||
@ -77,16 +78,21 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
.image {clear:left; font-size: x-small; color:#888888;}
|
||||
.articleByTimeLocation {font-size: x-small; color:#888888;
|
||||
margin-bottom:0.2em ; margin-top:0.2em ; display:block}
|
||||
.subtitle {clear:left}
|
||||
.subtitle {clear:left ;}
|
||||
.column-1 h1 { color: #191919}
|
||||
.column-1 h2 { color: #333333}
|
||||
.column-1 h3 { color: #444444}
|
||||
.column-1 p { color: #777777}
|
||||
.column-1 p,a,h1,h2,h3 { margin: 0; }
|
||||
.column-1 div{color:#888888; margin: 0;}
|
||||
.subtitle { color: #777777; font-size: medium;}
|
||||
.column-1 a,h1,h2,h3 { margin: 0; }
|
||||
.column-1 div{margin: 0;}
|
||||
.articleContent {display: block; clear:left;}
|
||||
.articleContent {color: #000000; font-size: medium;}
|
||||
.ivDrip-section {color: #000000; font-size: medium;}
|
||||
.datetime {color: #888888}
|
||||
.title {font-weight:bold;}
|
||||
.storyTop{}
|
||||
.pictureContainer img { max-width: 400px; max-height: 400px;}
|
||||
.image img { max-width: 400px; max-height: 400px;}
|
||||
"""
|
||||
|
||||
oldest_article = 1
|
||||
@ -325,6 +331,20 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
item.contents[0] = ''
|
||||
|
||||
def postprocess_html(self,soup, first_fetch):
|
||||
|
||||
#mark subtitle parent as non-compliant nesting causes
|
||||
# p's to be 'popped out' of the h3 tag they are nested in.
|
||||
subtitle = soup.find('h3', attrs={'class' : 'subtitle'})
|
||||
subtitle_div = None
|
||||
if subtitle:
|
||||
subtitle_div = subtitle.parent
|
||||
if subtitle_div:
|
||||
clazz = ''
|
||||
if 'class' in subtitle_div:
|
||||
clazz = subtitle_div['class'] + ' '
|
||||
clazz = clazz + 'subtitle'
|
||||
subtitle_div['class'] = clazz
|
||||
|
||||
#find broken images and remove captions
|
||||
items_to_extract = []
|
||||
for item in soup.findAll('div', attrs={'class' : 'image'}):
|
||||
@ -501,6 +521,9 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
),
|
||||
(u'Opinion',
|
||||
u'http://www.independent.co.uk/opinion/?service=rss'),
|
||||
(u'Voices',
|
||||
u'http://www.independent.co.uk/voices/?service=rss'
|
||||
),
|
||||
(u'Environment',
|
||||
u'http://www.independent.co.uk/environment/?service=rss'),
|
||||
(u'Sport - Athletics',
|
||||
|
@ -1,5 +1,4 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Informacje_USA(BasicNewsRecipe):
|
||||
title = u'Informacje USA'
|
||||
oldest_article = 7
|
||||
@ -8,11 +7,10 @@ class Informacje_USA(BasicNewsRecipe):
|
||||
description = u'portal wiadomości amerykańskich'
|
||||
category = 'news'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
|
||||
cover_url='http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
|
||||
cover_url='http://www.informacjeusa.com/wp-content/uploads/2013/01/V3BANNER420-90new.jpg'
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [(re.compile(ur'<p>Zobacz:.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><a href=".*?Zobacz także:.*?</a></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><p>Zobacz też:.*?</a></p>', re.DOTALL), lambda match: '')]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'box box-single'})]
|
||||
remove_tags_after= dict(attrs={'class':'tags'})
|
||||
remove_tags= [dict(attrs={'class':['postmetadata', 'tags', 'banner']}), dict(name='a', attrs={'title':['Drukuj', u'Wyślij']})]
|
||||
use_embedded_content = False
|
||||
keep_only_tags=[dict(id='post-area')]
|
||||
remove_tags_after= dict(id='content-area')
|
||||
remove_tags= [dict(attrs={'class':['breadcrumb']}), dict(id=['social-box', 'social-box-vert'])]
|
||||
feeds = [(u'Informacje', u'http://www.informacjeusa.com/feed/')]
|
||||
|