[Sync] Sync with trunk. Revision 9809.
@ -4,6 +4,7 @@ src/calibre/plugins
|
|||||||
resources/images.qrc
|
resources/images.qrc
|
||||||
src/calibre/manual/.build/
|
src/calibre/manual/.build/
|
||||||
src/calibre/manual/cli/
|
src/calibre/manual/cli/
|
||||||
|
src/calibre/manual/template_ref.rst
|
||||||
build
|
build
|
||||||
dist
|
dist
|
||||||
docs
|
docs
|
||||||
@ -13,6 +14,7 @@ resources/scripts.pickle
|
|||||||
resources/ebook-convert-complete.pickle
|
resources/ebook-convert-complete.pickle
|
||||||
resources/builtin_recipes.xml
|
resources/builtin_recipes.xml
|
||||||
resources/builtin_recipes.zip
|
resources/builtin_recipes.zip
|
||||||
|
resources/template-functions.json
|
||||||
setup/installer/windows/calibre/build.log
|
setup/installer/windows/calibre/build.log
|
||||||
src/calibre/translations/.errors
|
src/calibre/translations/.errors
|
||||||
src/cssutils/.svn/
|
src/cssutils/.svn/
|
||||||
@ -31,4 +33,4 @@ nbproject/
|
|||||||
.pydevproject
|
.pydevproject
|
||||||
.settings/
|
.settings/
|
||||||
*.DS_Store
|
*.DS_Store
|
||||||
calibre_plugins/
|
calibre_plugins/
|
||||||
|
392
Changelog.yaml
@ -19,6 +19,398 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.8.9
|
||||||
|
date: 2011-07-08
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Kobo Touch: Display Preview Tag for book previews on the device"
|
||||||
|
|
||||||
|
- title: "Improved display of grouped search terms in Tag Browser"
|
||||||
|
|
||||||
|
- title: "When adding HTML files to calibre, add an option to process links in breadth first rather than depth first order. Access it via Preferences->Plugins and customize the HTML to ZIP plugin"
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Add option to control if duplicate entries are allowed when generating the Table of Contents from links."
|
||||||
|
tickets: [806095]
|
||||||
|
|
||||||
|
- title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downlaoding published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not."
|
||||||
|
tickets: [798309]
|
||||||
|
|
||||||
|
- title: "Get Books: Remove OpenLibrary since it has the same files as archive.org. Allow direct downloading from Project Gutenberg."
|
||||||
|
|
||||||
|
- title: "Add functions to the template language that allow getting the last modified time and size of the individual format files for a book. Also add a has_cover() function."
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix true/false searches dont work on device views"
|
||||||
|
tickets: [807262]
|
||||||
|
|
||||||
|
- title: "Fix renaming of collections in device views"
|
||||||
|
tickets: [807256]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke the use of the device_db plugboard"
|
||||||
|
tickets: [806483]
|
||||||
|
|
||||||
|
- title: "Kobo driver: Hide Expired Book Status for deleted books. Also fix regression that broke connecting to Kobo devices running very old firmware."
|
||||||
|
tickets: [802083]
|
||||||
|
|
||||||
|
- title: "Fix bug in 0.8.8 that could cause the metadata.db to be left in an unusable state if calibre is interrupted at just the wrong moment or if the db is stored in dropbox"
|
||||||
|
|
||||||
|
- title: "Fix sorting of composite custom columns that display numbers."
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- "Computer Act!ve"
|
||||||
|
- Metro News NL
|
||||||
|
- Spiegel Online International
|
||||||
|
- cracked.com
|
||||||
|
- Endgadget
|
||||||
|
- Independent
|
||||||
|
- Telegraph UK
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Blog da Cidadania and Noticias UnB"
|
||||||
|
author: Diniz Bortolotto
|
||||||
|
|
||||||
|
- title: "Galicia Confidential"
|
||||||
|
author: Susana Sotelo Docio
|
||||||
|
|
||||||
|
- title: "South China Morning Post"
|
||||||
|
author: llam
|
||||||
|
|
||||||
|
- title: "Szinti Derigisi"
|
||||||
|
author: thomass
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.8.8
|
||||||
|
date: 2011-07-01
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Make author names in the Book Details panel clickable. Clicking them takes you to the wikipedia page for the author by default. You may have to tell calibre to display author names in the Book details panel first via Preferences->Look & Feel->Book details. You can change the link for individual authors by right clicking on the author's name in the Tag Browser and selecting Manage Authors."
|
||||||
|
|
||||||
|
- title: "Get Books: Add 'Open Books' as an available book source"
|
||||||
|
|
||||||
|
- title: "Get Books: When a free download is available for a search result, for example, for public domain books, allow direct download of the book into your calibre library."
|
||||||
|
|
||||||
|
- title: "Support for detecting and mounting reader devices on FreeBSD."
|
||||||
|
tickets: [802708]
|
||||||
|
|
||||||
|
- title: "When creating a composite custom column, allow the use of HTML to create links and other markup that display in the Book details panel"
|
||||||
|
|
||||||
|
- title: "Add the swap_around_comma function to the template language."
|
||||||
|
|
||||||
|
- title: "Drivers for HTC G2, Advent Vega, iRiver Story HD, Lark FreeMe and Moovyman mp7"
|
||||||
|
|
||||||
|
- title: "Quick View: Survives changing libraries. Also allow sorting by series index as well as name."
|
||||||
|
|
||||||
|
- title: "Connect to iTunes: Add an option to control how the driver works depending on whether you have iTunes setup to copy files to its media directory or not. Set this option by customizing the Apple driver in Preferences->Plugins. Having iTunes copy media to its storage folder is no longer neccessary. See http://www.mobileread.com/forums/showthread.php?t=118559 for details"
|
||||||
|
|
||||||
|
- title: "Remove the delete library functionality from calibre, instead you can now remove a library, so calibre will forget about it, but you have to delete the files manually"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix a regression introduced in 0.8.7 in the Tag Browser that could cause calibre to crash after performing various actions"
|
||||||
|
|
||||||
|
- title: "Fix an unhandled error when deleting all saved searches"
|
||||||
|
tickets: [804383]
|
||||||
|
|
||||||
|
- title: "Fix row numbers in a previous selection being incorrect after a sort operation."
|
||||||
|
|
||||||
|
- title: "Fix ISBN identifier type not recognized if it is in upper case"
|
||||||
|
tickets: [802288]
|
||||||
|
|
||||||
|
- title: "Fix a regression in 0.8.7 that broke reading metadata from MOBI files in the Edit metadata dialog."
|
||||||
|
tickets: [801981]
|
||||||
|
|
||||||
|
- title: "Fix handling of filenames that have an even number of periods before the file extension."
|
||||||
|
tickets: [801939]
|
||||||
|
|
||||||
|
- title: "Fix lack of thread saefty in template format system, that could lead to incorrect template evaluation in some cases."
|
||||||
|
tickets: [801944]
|
||||||
|
|
||||||
|
- title: "Fix conversion to PDB when the input document has no text"
|
||||||
|
tickets: [801888]
|
||||||
|
|
||||||
|
- title: "Fix clicking on first letter of author names generating incorrect search."
|
||||||
|
|
||||||
|
- title: "Also fix updating bulk metadata in custom column causing unnneccessary Tag Browser refreshes."
|
||||||
|
|
||||||
|
- title: "Fix a regression in 0.8.7 that broke renaming items via the Tag Browser"
|
||||||
|
|
||||||
|
- title: "Fix a regression in 0.8.7 that caused the regex builder wizard to fail with LIT files as the input"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Zaman Gazetesi
|
||||||
|
- Infobae
|
||||||
|
- El Cronista
|
||||||
|
- Critica de la Argentina
|
||||||
|
- Buenos Aires Economico
|
||||||
|
- El Universal (Venezuela)
|
||||||
|
- wprost
|
||||||
|
- Financial Times UK
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Today's Zaman by thomass"
|
||||||
|
|
||||||
|
- title: "Athens News by Darko Miletic"
|
||||||
|
|
||||||
|
- title: "Catholic News Agency"
|
||||||
|
author: Jetkey
|
||||||
|
|
||||||
|
- title: "Arizona Republic"
|
||||||
|
author: Jim Olo
|
||||||
|
|
||||||
|
- title: "Add Ming Pao Vancouver and Toronto"
|
||||||
|
author: Eddie Lau
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.8.7
|
||||||
|
date: 2011-06-24
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Connect to iTunes: You now need to tell iTunes to keep its own copy of every ebook. Do this in iTunes by going to Preferences->Advanced and setting the 'Copy files to iTunes Media folder when adding to library' option. To learn about why this is necessary, see: http://www.mobileread.com/forums/showthread.php?t=140260"
|
||||||
|
|
||||||
|
- title: "Add a couple of date related functions to the calibre template langauge to get 'todays' date and create text based on the value of a date type field"
|
||||||
|
|
||||||
|
- title: "Improved reading of metadata from FB2 files, with support for reading isbns, tags, published date, etc."
|
||||||
|
|
||||||
|
- title: "Driver for the Imagine IMEB5"
|
||||||
|
tickets: [800642]
|
||||||
|
|
||||||
|
- title: "Show the currently used network proxies in Preferences->Miscellaneous"
|
||||||
|
|
||||||
|
- title: "Kobo Touch driver: Show Favorites as a device collection. Various other minor fixes."
|
||||||
|
|
||||||
|
- title: "Content server now sends the Content-Disposition header when sending ebook files."
|
||||||
|
|
||||||
|
- title: "Allow search and replace on comments custom columns."
|
||||||
|
|
||||||
|
- title: "Add a new action 'Quick View' to show the books in your library by the author/tags/series/etc. of the currently selected book, in a separate window. You can add it to your toolbar or right click menu by going to Preferences->Toolbars."
|
||||||
|
|
||||||
|
- title: "Get Books: Add libri.de as a book source. Fix a bug that caused some books downloads to fail. Fixes to the Legimi and beam-ebooks.de stores"
|
||||||
|
tickets: [799367]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix a memory leak that could result in the leaking of several MB of memory with large libraries"
|
||||||
|
tickets: [800952]
|
||||||
|
|
||||||
|
- title: "Fix the read metadata from format button in the edit metadata dialog using the wrong timezone when setting published date"
|
||||||
|
tickets: [799777]
|
||||||
|
|
||||||
|
- title: "Generating catalog: Fix occassional file in use errors when generating catalogs on windows"
|
||||||
|
|
||||||
|
- title: "Fix clicking on News in Tag Browser not working in non English locales."
|
||||||
|
tickets: [799471]
|
||||||
|
|
||||||
|
- title: "HTML Input: Fix a regression in 0.8.6 that caused CSS stylesheets to be ignored"
|
||||||
|
tickets: [799171]
|
||||||
|
|
||||||
|
- title: "Fix a regression that caused restore database to stop working on some windows sytems"
|
||||||
|
|
||||||
|
- title: "EPUB Output: Convert <br> tags with text in them into <divs> as ADE cannot handle them."
|
||||||
|
tickets: [794427]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Le Temps
|
||||||
|
- Perfil
|
||||||
|
- Financial Times UK
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Daytona Beach Journal"
|
||||||
|
author: BRGriff
|
||||||
|
|
||||||
|
- title: "El club del ebook and Frontline"
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.8.6
|
||||||
|
date: 2011-06-17
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Builtin support for downloading and installing/updating calibre plugins. Go to Preferences->Plugins and click 'Get new plugins'"
|
||||||
|
description: "When updates for installed plugins are available, calibre will automatically (unobtrusively) notify you"
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- title: "Metadata download configuration: Allow defining a set of 'default' fields for metadata download and quichly switching to/from them"
|
||||||
|
|
||||||
|
- title: "Allow clicking on the news category in the Tag Browser to display all downloaded periodicals"
|
||||||
|
|
||||||
|
- title: "Driver for the Libre Air"
|
||||||
|
|
||||||
|
- title: "Email sending: Allow user to stop email jobs (note that stopping may not actually prevent the email from being sent, depending on when the stop happens). Also automatically abort email sending if it takes longer than 15mins."
|
||||||
|
tickets: [795960]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "MOBI Output: Allow setting of background color on tables also set the border attribute on the table if the table has any border related css defined."
|
||||||
|
tickets: [797580]
|
||||||
|
|
||||||
|
- title: "Nook TSR: Put news sent to the device in My Files/Newspapers instaed of My Files/Books."
|
||||||
|
tickets: [796674]
|
||||||
|
|
||||||
|
- title: "MOBI Output: Fix a bug where linking to the very first element in an HTML file could sometimes result in the link pointing to the last element in the previous file."
|
||||||
|
tickets: [797214]
|
||||||
|
|
||||||
|
- title: "CSV catalog: Convert HTML comments to plain text"
|
||||||
|
|
||||||
|
- title: "HTML Input: Ignore links to text files."
|
||||||
|
tickets: [791568]
|
||||||
|
|
||||||
|
- title: "EPUB Output: Change orphaned <td> tags to <div> as they cause ADE to crash."
|
||||||
|
|
||||||
|
- title: "Fix 'Stop selected jobs' button trying to stop the same job multiple times"
|
||||||
|
|
||||||
|
- title: "Database: Explicitly test for case sensitivity on OS X instead of assuming a case insensitive filesystem."
|
||||||
|
tickets: [796258]
|
||||||
|
|
||||||
|
- title: "Get Books: More fixes to the Amazon store plugin"
|
||||||
|
|
||||||
|
- title: "FB2 Input: Do not specify font families/background colors"
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Philadelphia Inquirer
|
||||||
|
- Macleans Magazone
|
||||||
|
- Metro UK
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Christian Post, Down To Earth and Words Without Borders"
|
||||||
|
author: sexymax15
|
||||||
|
|
||||||
|
- title: "Noticias R7"
|
||||||
|
author: Diniz Bortolotto
|
||||||
|
|
||||||
|
- title: "UK Daily Mirror"
|
||||||
|
author: Dave Asbury
|
||||||
|
|
||||||
|
- title: "New Musical Express Magazine"
|
||||||
|
author: scissors
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.8.5
|
||||||
|
date: 2011-06-10
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "A new 'portable' calibre build, useful if you like to carry around calibre and its library on a USB key"
|
||||||
|
type: major
|
||||||
|
description: "For details, see: http://calibre-ebook.com/download_portable"
|
||||||
|
|
||||||
|
- title: "E-book viewer: Remember the last used font size multiplier."
|
||||||
|
tickets: [774343]
|
||||||
|
|
||||||
|
- title: "Preliminary support for the Kobo Touch. Drivers for the ZTE v9 tablet, Samsung S2, Notion Ink Adam and PocketBook 360+"
|
||||||
|
|
||||||
|
- title: "When downloading metadata merge rather than replace tags"
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: When pasting in an ISBN, if not valid ISBN if present on the clipboard popup a box for the user to enter the ISBN"
|
||||||
|
|
||||||
|
- title: "Windows build: Add code to load .pyd python extensions from a zip file. This allows many more files in the calibre installation to be zipped up, speeding up the installer."
|
||||||
|
- title: "Add an action to remove all formats from the selected books to the remove books button"
|
||||||
|
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Various minor bug fixes to the column coloring code"
|
||||||
|
|
||||||
|
- title: "Fix the not() template function"
|
||||||
|
|
||||||
|
- title: "Nook Color/TSR: When sending books to the storage card place them in the My Files/Books subdirectory. Also do not upload cover thumbnails as users report that the NC/TSR don't use them."
|
||||||
|
tickets: [792842]
|
||||||
|
|
||||||
|
- title: "Get Books: Update plugins for Amazon and B&N stores to handle website changes. Enable some stores by default on first run. Add Zixo store"
|
||||||
|
tickets: [792762]
|
||||||
|
|
||||||
|
- title: "Comic Input: Replace the # character in filenames as it can cause problem with conversion/vieweing."
|
||||||
|
tickets: [792723]
|
||||||
|
|
||||||
|
- title: "When writing files to zipfile, reset timestamp if it doesn't fit in 1980's vintage storage structures"
|
||||||
|
|
||||||
|
- title: "Amazon metadata plugin: Fix parsing of published date from amazon.de when it has februar in it"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Ambito
|
||||||
|
- GoComics
|
||||||
|
- Le Monde Diplomatique
|
||||||
|
- Max Planck
|
||||||
|
- express.de
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Ambito Financiero
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Stiin Tas Technica
|
||||||
|
author: Silviu Cotoara
|
||||||
|
|
||||||
|
- title: "Metro News NL"
|
||||||
|
author: DrMerry
|
||||||
|
|
||||||
|
- title: "Brigitte.de, Polizeipresse DE and Heise Online"
|
||||||
|
author: schuster
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.8.4
|
||||||
|
date: 2011-06-03
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "New and much simpler interface for specifying column coloring via Preferences->Look & Feel->Column Coloring"
|
||||||
|
|
||||||
|
- title: "Driver for Trekstor eBook Player 5M, Samsung Galaxy SII I9100, Motorola Defy and miBuk GAMMA 6.2"
|
||||||
|
tickets: [792091, 791216]
|
||||||
|
|
||||||
|
- title: "Get Books: Add EpubBud, WH Smits and E-book Shoppe stores"
|
||||||
|
|
||||||
|
- title: "When deleting 'all formats except ...', do not delete if it leaves a book with no formats"
|
||||||
|
|
||||||
|
- title: "Change default toolbar to make it a little more new user friendly. The icons have been re-arranged and now the text is always visiblke by default. You can change that in Preferences->Look & Feeel and Preferences->Toolbar"
|
||||||
|
|
||||||
|
- title: "Windows installer: Remember and use previous settings for installing desktop icons, adding to path, etc. This makes the installer a little slower, complaints should go to Microsoft."
|
||||||
|
|
||||||
|
- title: "Template language: Add str_in_list and on_device formatter functions. Make debugging templates a little easier"
|
||||||
|
|
||||||
|
- title: "Allow the user to specify formatting for number type custom columns"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix typo in NOOK TSR driver that prevented it from working on windows"
|
||||||
|
|
||||||
|
- title: "Fix quotes in identifiers causing Tag Browser to be blank."
|
||||||
|
tickets: [791044]
|
||||||
|
|
||||||
|
- title: "Speedup auto complete when there are lots of items (>2500) the downside being that non ASCII characters are not sorted correctly. The threshold can be controlled via Preferences->Tweaks"
|
||||||
|
tickets: [792191]
|
||||||
|
|
||||||
|
- title: "RTF Output: Fix handling of curly brackets"
|
||||||
|
tickets: [791805]
|
||||||
|
|
||||||
|
- title: "Fix searching in Get Books not working with non ASCII characters"
|
||||||
|
tickets: [791788]
|
||||||
|
|
||||||
|
- title: "Fix excessive memory consumption when moving very large files during a metadata change"
|
||||||
|
tickets: [791806]
|
||||||
|
|
||||||
|
- title: "Fix series index being overwritten even when series is turned off in bulk metadata download"
|
||||||
|
tickets: [789990]
|
||||||
|
|
||||||
|
- title: "Fix regression in templates where id and other non standard fields no longer worked."
|
||||||
|
|
||||||
|
- title: "EPUB Output: Fix crash caused by ids with non-ascii characters in them"
|
||||||
|
|
||||||
|
- title: "Try to preserve the timestamps of files in a ZIP container"
|
||||||
|
|
||||||
|
- title: "After adding books always select the most recently added book."
|
||||||
|
tickets: [789343]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- bild.de
|
||||||
|
- CNN
|
||||||
|
- BBC News (fast)
|
||||||
|
- Dilema Veche
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Metro UK
|
||||||
|
author: Dave Asbury
|
||||||
|
|
||||||
|
- title: Alt om Herning and Version2.dk
|
||||||
|
author: Rasmus Lauritsen
|
||||||
|
|
||||||
|
- title: Observatorul cultural
|
||||||
|
author: song2
|
||||||
|
|
||||||
|
|
||||||
- version: 0.8.3
|
- version: 0.8.3
|
||||||
date: 2011-05-27
|
date: 2011-05-27
|
||||||
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ambito.com
|
ambito.com
|
||||||
'''
|
'''
|
||||||
@ -11,51 +9,56 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Ambito(BasicNewsRecipe):
|
class Ambito(BasicNewsRecipe):
|
||||||
title = 'Ambito.com'
|
title = 'Ambito.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
|
||||||
publisher = 'Ambito.com'
|
publisher = 'Editorial Nefir S.A.'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, economy, finances, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
masthead_url = 'http://www.ambito.com/img/logo_.jpg'
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
language = 'es_AR'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: "Trebuchet MS",Verdana,sans-serif}
|
||||||
|
.volanta{font-size: small}
|
||||||
|
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
|
||||||
|
"""
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description
|
|
||||||
, '--category', category
|
|
||||||
, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||||
|
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
|
||||||
remove_tags = [dict(name=['object','link'])]
|
remove_attributes = ['align']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||||
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
||||||
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
||||||
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
||||||
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
||||||
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
||||||
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
||||||
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
|
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' )
|
||||||
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
|
|
||||||
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
return url.replace('/noticia.asp?','/noticias/imprimir.asp?')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
str = item.string
|
||||||
|
if str is None:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es_AR'
|
|
||||||
|
87
recipes/ambito_financiero.recipe
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
ambito.com/diario
|
||||||
|
'''
|
||||||
|
|
||||||
|
import time
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Ambito_Financiero(BasicNewsRecipe):
|
||||||
|
title = 'Ambito Financiero'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Informacion Libre las 24 horas'
|
||||||
|
publisher = 'Editorial Nefir S.A.'
|
||||||
|
category = 'news, politics, economy, Argentina'
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
masthead_url = 'http://www.ambito.com/diario/img/logo_af.gif'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
needs_subscription = 'optional'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es_AR'
|
||||||
|
PREFIX = 'http://www.ambito.com'
|
||||||
|
INDEX = PREFIX + '/diario/index.asp'
|
||||||
|
LOGIN = PREFIX + '/diario/login/entrada.asp'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: "Trebuchet MS",Verdana,sans-serif}
|
||||||
|
.volanta{font-size: small}
|
||||||
|
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||||
|
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
|
||||||
|
remove_attributes = ['align']
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(self.INDEX)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open(self.LOGIN)
|
||||||
|
br.select_form(name='frmlogin')
|
||||||
|
br['USER_NAME'] = self.username
|
||||||
|
br['USER_PASS'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('/diario/noticia.asp?','/noticias/imprimir.asp?')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
str = item.string
|
||||||
|
if str is None:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
cover_item = soup.find('img',attrs={'class':'fotodespliegue'})
|
||||||
|
if cover_item:
|
||||||
|
self.cover_url = self.PREFIX + cover_item['src']
|
||||||
|
articles = []
|
||||||
|
checker = []
|
||||||
|
for feed_link in soup.findAll('a', attrs={'class':['t0_portada','t2_portada','bajada']}):
|
||||||
|
url = self.PREFIX + feed_link['href']
|
||||||
|
title = self.tag_to_string(feed_link)
|
||||||
|
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
||||||
|
if url not in checker:
|
||||||
|
checker.append(url)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':u''
|
||||||
|
})
|
||||||
|
return [(self.title, articles)]
|
68
recipes/arizona_republic.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, jolo'
|
||||||
|
'''
|
||||||
|
azrepublic.com
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||||
|
title = u'AZRepublic'
|
||||||
|
__author__ = 'Jim Olo'
|
||||||
|
language = 'en'
|
||||||
|
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years"
|
||||||
|
publisher = 'AZRepublic/AZCentral'
|
||||||
|
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
|
||||||
|
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
|
||||||
|
category = 'news, politics, USA, AZ, Arizona'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
|
||||||
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
|
||||||
|
remove_attributes = ['width','height','h2','subHeadline','style']
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
||||||
|
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
||||||
|
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
||||||
|
dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}),
|
||||||
|
dict(name='h6', attrs={'class':['section-header']}),
|
||||||
|
dict(name='a', attrs={'href':['#comments']}),
|
||||||
|
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}),
|
||||||
|
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
||||||
|
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
||||||
|
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}),
|
||||||
|
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
||||||
|
dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}),
|
||||||
|
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}),
|
||||||
|
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
||||||
|
dict(name='h1', attrs={'id':['SEOtext']}),
|
||||||
|
dict(name='table', attrs={'class':['ap-mediabox-table']}),
|
||||||
|
dict(name='p', attrs={'class':['ap_para']}),
|
||||||
|
dict(name='span', attrs={'class':['source-org vcard', 'org fn']}),
|
||||||
|
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
||||||
|
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}),
|
||||||
|
dict(name='div', attrs={'id':['onespot_nextclick']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'),
|
||||||
|
(u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'),
|
||||||
|
(u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'),
|
||||||
|
(u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'),
|
||||||
|
(u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'),
|
||||||
|
(u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'),
|
||||||
|
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
|
||||||
|
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
70
recipes/athens_news.recipe
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.athensnews.gr
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AthensNews(BasicNewsRecipe):
|
||||||
|
title = 'Athens News'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Greece in English since 1952'
|
||||||
|
publisher = 'NEP Publishing Company SA'
|
||||||
|
category = 'news, politics, Greece, Athens'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_GR'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
.big{font-size: xx-large; font-family: Georgia,serif}
|
||||||
|
.articlepubdate{font-size: small; color: gray; font-family: Georgia,serif}
|
||||||
|
.lezanta{font-size: x-small; font-weight: bold; text-align: left; margin-bottom: 1em; display: block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','link'])
|
||||||
|
]
|
||||||
|
keep_only_tags=[
|
||||||
|
dict(name='span',attrs={'class':'big'})
|
||||||
|
,dict(name='td', attrs={'class':['articlepubdate','text']})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
|
||||||
|
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' )
|
||||||
|
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' )
|
||||||
|
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed')
|
||||||
|
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' )
|
||||||
|
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' )
|
||||||
|
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' )
|
||||||
|
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' )
|
||||||
|
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' )
|
||||||
|
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed')
|
||||||
|
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?action=print'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
39
recipes/automatiseringgids.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class autogids(BasicNewsRecipe):
|
||||||
|
title = u'Automatiseringgids IT'
|
||||||
|
oldest_article = 7
|
||||||
|
__author__ = 'DrMerry'
|
||||||
|
description = 'IT-nieuws van Automatiseringgids'
|
||||||
|
language = 'nl'
|
||||||
|
publisher = 'AutomatiseringGids'
|
||||||
|
category = 'Nieuws, IT, Nederlandstalig'
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
#delay = 1
|
||||||
|
timefmt = ' [%A, %d %B, %Y]'
|
||||||
|
#timefmt = ''
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.automatiseringgids.nl/siteimg/header_logo.gif'
|
||||||
|
keep_only_tags = [dict(id=['content'])]
|
||||||
|
extra_css = '.artikelheader {font-size:0.8em; color: #666;} .artikelintro {font-weight:bold} div.imgArticle {float: right; margin: 0 0em 1em 1em; display: block; position: relative; } \
|
||||||
|
h2 { margin: 0 0 0.5em; min-height: 30px; font-size: 1.5em; letter-spacing: -0.2px; margin: 0 0 0.5em; color: black; font-weight: bold; line-height: 1.2em; padding: 4px 3px 0; }'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':['loginbox','reactiecollapsible','reactiebox']}),
|
||||||
|
dict(name='div', attrs={'class':['column_a','column_c','bannerfullsize','reactieheader','reactiecollapsible','formulier','artikel_headeroptions']}),
|
||||||
|
dict(name='ul', attrs={'class':['highlightlist']}),
|
||||||
|
dict(name='input', attrs={'type':['button']}),
|
||||||
|
dict(name='div', attrs={'style':['display:block; width:428px; height:30px; float:left;']}),
|
||||||
|
]
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'Actueel', u'http://www.automatiseringgids.nl/rss.aspx')]
|
@ -1,27 +1,30 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010 - 2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
news.bbc.co.uk
|
news.bbc.co.uk
|
||||||
'''
|
'''
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class BBC(BasicNewsRecipe):
|
class BBC(BasicNewsRecipe):
|
||||||
title = 'BBC News (fast)'
|
title = 'BBC News (fast)'
|
||||||
__author__ = 'Darko Miletic, Starson17'
|
__author__ = 'Darko Miletic, Starson17'
|
||||||
description = 'News from UK. A much faster version that does not download pictures'
|
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#delay = 1
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
publisher = 'BBC'
|
publisher = 'BBC'
|
||||||
category = 'news, UK, world'
|
category = 'news, UK, world'
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
masthead_url = 'http://news.bbcimg.co.uk/img/1_0_1/cream/hi/news/news-blocks.gif'
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
extra_css = """
|
||||||
|
body{ font-family: Verdana,Helvetica,Arial,sans-serif }
|
||||||
|
.introduction{font-weight: bold}
|
||||||
|
.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small}
|
||||||
|
.story-feature h2{text-align: center; text-transform: uppercase}
|
||||||
|
"""
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
@ -31,31 +34,54 @@ class BBC(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':['layout-block-a layout-block']})
|
dict(name='div', attrs={'class':['layout-block-a layout-block']})
|
||||||
,dict(attrs={'class':['story-body','storybody']})
|
,dict(attrs={'class':['story-body','storybody']})
|
||||||
|
,dict(attrs={'id':['meta-information','story-body']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
|
dict(name='div', attrs={'class':['story-feature related narrow', \
|
||||||
'story-feature wide ', 'story-feature narrow']})
|
'share-help', 'embedded-hyper', \
|
||||||
, dict(name=['img'])
|
'story-feature wide ', \
|
||||||
]
|
'story-feature narrow', \
|
||||||
|
'hidden','story-actions', \
|
||||||
|
'embedded-hyper']})
|
||||||
|
,dict(name=['img','meta','link','object','embed','iframe','base'])
|
||||||
|
,dict(attrs={'class':['hidden','videoInStoryC']})
|
||||||
|
,dict(attrs={'id':['bbccom_sponsor_section','toggle-controls', \
|
||||||
|
'toggle-images','toggle-title']})
|
||||||
|
]
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width','height','xmlns:og','lang','clear']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
|
('Top Stories' , 'http://feeds.bbci.co.uk/news/rss.xml' ),
|
||||||
('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
|
('Science/Environment', 'http://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
|
||||||
('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
|
('Technology' , 'http://feeds.bbci.co.uk/news/technology/rss.xml' ),
|
||||||
('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
|
('Entertainment/Arts' , 'http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml' ),
|
||||||
('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
|
('Magazine' , 'http://feeds.bbci.co.uk/news/magazine/rss.xml' ),
|
||||||
('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
|
('Business' , 'http://feeds.bbci.co.uk/news/business/rss.xml' ),
|
||||||
('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
|
('Politics' , 'http://feeds.bbci.co.uk/news/politics/rss.xml' ),
|
||||||
('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
|
('Health' , 'http://feeds.bbci.co.uk/news/health/rss.xml' ),
|
||||||
('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
|
('US&Canada' , 'http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml' ),
|
||||||
('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
|
('Latin America' , 'http://feeds.bbci.co.uk/news/world/latin_america/rss.xml' ),
|
||||||
('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
|
('Europe' , 'http://feeds.bbci.co.uk/news/world/europe/rss.xml' ),
|
||||||
('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
|
('South Asia' , 'http://feeds.bbci.co.uk/news/world/south_asia/rss.xml' ),
|
||||||
('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
|
('England' , 'http://feeds.bbci.co.uk/news/england/rss.xml' ),
|
||||||
|
('Asia-Pacific' , 'http://feeds.bbci.co.uk/news/world/asia_pacific/rss.xml' ),
|
||||||
|
('Africa' , 'http://feeds.bbci.co.uk/news/world/africa/rss.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('left'):
|
||||||
|
item.name='span'
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Bild.de'
|
title = u'Bild.de'
|
||||||
__author__ = 'schuster'
|
__author__ = 'schuster'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'de'
|
language = 'de'
|
||||||
@ -12,11 +13,25 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
|
|
||||||
# get cover from myspace
|
# get cover from myspace
|
||||||
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
|
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
|
||||||
|
masthead_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
|
||||||
|
|
||||||
# set what to fetch on the site
|
# set what to fetch on the site
|
||||||
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
|
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
|
||||||
remove_tags_after = dict(name ='div', attrs={'class':'back'})
|
remove_tags_after = dict(name ='div', attrs={'class':'back'})
|
||||||
|
|
||||||
|
|
||||||
|
# remove things on the site that we don't want
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'credit'}),
|
||||||
|
dict(name='div', attrs={'class':'index'}),
|
||||||
|
dict(name='div', attrs={'id':'zstart31'}),
|
||||||
|
dict(name='div', attrs={'class':'hentry'}),
|
||||||
|
dict(name='div', attrs={'class':'back'}),
|
||||||
|
dict(name='div', attrs={'class':'pagination'}),
|
||||||
|
dict(name='div', attrs={'class':'header'}),
|
||||||
|
dict(name='div', attrs={'class':'element floatL'}),
|
||||||
|
dict(name='div', attrs={'class':'stWrap'})
|
||||||
|
]
|
||||||
|
|
||||||
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
|
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
|
||||||
# this one removes a lot of direct-link's
|
# this one removes a lot of direct-link's
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -42,5 +57,18 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
|
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
|
||||||
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
|
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
|
||||||
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
|
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
|
||||||
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
|
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml'),
|
||||||
|
(u'Reg. - Berlin', u'http://rss.bild.de/bild-berlin.xml'),
|
||||||
|
(u'Reg. - Bremen', u'http://rss.bild.de/bild-bremen.xml'),
|
||||||
|
(u'Reg. - Dresden', u'http://rss.bild.de/bild-dresden.xml'),
|
||||||
|
(u'Reg. - Düsseldorf', u'http://rss.bild.de/bild-duesseldorf.xml'),
|
||||||
|
(u'Reg. - Frankfurt-Main', u'http://rss.bild.de/bild-frankfurt-main.xml'),
|
||||||
|
(u'Reg. - Hamburg', u'http://rss.bild.de/bild-hamburg.xml'),
|
||||||
|
(u'Reg. - Hannover', u'http://rss.bild.de/bild-hannover.xml'),
|
||||||
|
(u'Reg. - Köln', u'http://rss.bild.de/bild-koeln.xml'),
|
||||||
|
(u'Reg. - Leipzig', u'http://rss.bild.de/bild-leipzig.xml'),
|
||||||
|
(u'Reg. - München', u'http://rss.bild.de/bild-muenchen.xml'),
|
||||||
|
(u'Reg. - Ruhrgebiet', u'http://rss.bild.de/bild-ruhrgebiet.xml'),
|
||||||
|
(u'Reg. - Stuttgart', u'http://rss.bild.de/bild-stuttgart.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
20
recipes/blog_da_cidadania.recipe
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BlogdaCidadania(BasicNewsRecipe):
|
||||||
|
title = 'Blog da Cidadania'
|
||||||
|
__author__ = 'Diniz Bortolotto'
|
||||||
|
description = 'Posts do Blog da Cidadania'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Eduardo Guimaraes'
|
||||||
|
category = 'politics, Brazil'
|
||||||
|
language = 'pt_BR'
|
||||||
|
publication_type = 'politics portal'
|
||||||
|
|
||||||
|
feeds = [(u'Blog da Cidadania', u'http://www.blogcidadania.com.br/feed/')]
|
||||||
|
|
||||||
|
reverse_article_order = True
|
||||||
|
|
36
recipes/brigitte_de.recipe
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Brigitte.de'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
timeout = 10
|
||||||
|
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
||||||
|
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}),
|
||||||
|
dict(id=['header', 'artTools', 'context', 'interact', 'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
|
||||||
|
dict(name=['hjtrs', 'kud'])]
|
||||||
|
|
||||||
|
feeds = [(u'Mode', u'http://www.brigitte.de/mode/feed.rss'),
|
||||||
|
(u'Beauty', u'http://www.brigitte.de/beauty/feed.rss'),
|
||||||
|
(u'Luxus', u'http://www.brigitte.de/luxus/feed.rss'),
|
||||||
|
(u'Figur', u'http://www.brigitte.de/figur/feed.rss'),
|
||||||
|
(u'Gesundheit', u'http://www.brigitte.de/gesundheit/feed.rss'),
|
||||||
|
(u'Liebe&Sex', u'http://www.brigitte.de/liebe-sex/feed.rss'),
|
||||||
|
(u'Gesellschaft', u'http://www.brigitte.de/gesellschaft/feed.rss'),
|
||||||
|
(u'Kultur', u'http://www.brigitte.de/kultur/feed.rss'),
|
||||||
|
(u'Reise', u'http://www.brigitte.de/reise/feed.rss'),
|
||||||
|
(u'Kochen', u'http://www.brigitte.de/kochen/feed.rss'),
|
||||||
|
(u'Wohnen', u'http://www.brigitte.de/wohnen/feed.rss'),
|
||||||
|
(u'Job', u'http://www.brigitte.de/job/feed.rss'),
|
||||||
|
(u'Erfahrungen', u'http://www.brigitte.de/erfahrungen/feed.rss'),
|
||||||
|
]
|
@ -1,72 +1,59 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
www.diariobae.com
|
||||||
'''
|
'''
|
||||||
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class BsAsEconomico(BasicNewsRecipe):
|
class BsAsEconomico(BasicNewsRecipe):
|
||||||
title = 'Buenos Aires Economico'
|
title = 'Buenos Aires Economico'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Revista Argentina'
|
description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'Diario BAE'
|
||||||
category = 'news, politics, economy, Argentina'
|
category = 'news, politics, economy, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
|
cover_url = strftime('http://www.diariobae.com/imgs_portadas/%Y%m%d_portadasBAE.jpg')
|
||||||
|
masthead_url = 'http://www.diariobae.com/img/logo_bae.png'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Georgia,"Times New Roman",Times,serif}
|
||||||
|
#titulo{font-size: x-large}
|
||||||
|
#epi{font-size: small; font-style: italic; font-weight: bold}
|
||||||
|
img{display: block; margin-top: 1em}
|
||||||
|
"""
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
lang = 'es-AR'
|
remove_tags_before= dict(attrs={'id':'titulo'})
|
||||||
direction = 'ltr'
|
remove_tags_after = dict(attrs={'id':'autor' })
|
||||||
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
|
remove_tags = [
|
||||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
dict(name=['meta','base','iframe','link','lang'])
|
||||||
|
,dict(attrs={'id':'barra_tw'})
|
||||||
html2lrf_options = [
|
|
||||||
'--comment' , description
|
|
||||||
, '--category' , category
|
|
||||||
, '--publisher', publisher
|
|
||||||
]
|
]
|
||||||
|
remove_attributes = ['data-count','data-via']
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
feeds = [
|
||||||
|
(u'Argentina' , u'http://www.diariobae.com/rss/argentina.xml' )
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
,(u'Valores' , u'http://www.diariobae.com/rss/valores.xml' )
|
||||||
|
,(u'Finanzas' , u'http://www.diariobae.com/rss/finanzas.xml' )
|
||||||
remove_tags = [dict(name='link')]
|
,(u'Negocios' , u'http://www.diariobae.com/rss/negocios.xml' )
|
||||||
|
,(u'Mundo' , u'http://www.diariobae.com/rss/mundo.xml' )
|
||||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
|
,(u'5 dias' , u'http://www.diariobae.com/rss/5dias.xml' )
|
||||||
|
,(u'Espectaculos', u'http://www.diariobae.com/rss/espectaculos.xml')
|
||||||
def print_version(self, url):
|
]
|
||||||
main, sep, article_part = url.partition('/nota-')
|
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
soup.html['dir' ] = self.direction
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
soup.head.insert(1,mcharset)
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
|
||||||
if cover_item:
|
|
||||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
|
||||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
|
||||||
base, sep, rest = url.rpartition('?Id=')
|
|
||||||
img, sep2, rrest = rest.partition('&')
|
|
||||||
return base + sep + img
|
|
||||||
|
13
recipes/catholic_news_agency.recipe
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1301972345(BasicNewsRecipe):
|
||||||
|
title = u'Catholic News Agency'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Jetkey'
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
|
||||||
|
feeds = [(u'U.S. News', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-us'),
|
||||||
|
(u'Vatican', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-vatican'),
|
||||||
|
(u'Bishops Corner', u'http://feeds.feedburner.com/catholicnewsagency/columns/bishopscorner'),
|
||||||
|
(u'Saint of the Day', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]
|
37
recipes/christian_post.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#created by sexymax15 ....sexymax15@gmail.com
|
||||||
|
#christian post recipe
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ChristianPost(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'The Christian Post'
|
||||||
|
__author__ = 'sexymax15'
|
||||||
|
description = 'Homepage'
|
||||||
|
language = 'en'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
|
||||||
|
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } '''
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Homepage', 'http://www.christianpost.com/services/rss/feed/'),
|
||||||
|
('Most Popular', 'http://www.christianpost.com/services/rss/feed/most-popular'),
|
||||||
|
('Entertainment', 'http://www.christianpost.com/services/rss/feed/entertainment/'),
|
||||||
|
('Politics', 'http://www.christianpost.com/services/rss/feed/politics/'),
|
||||||
|
('Living', 'http://www.christianpost.com/services/rss/feed/living/'),
|
||||||
|
('Business', 'http://www.christianpost.com/services/rss/feed/business/'),
|
||||||
|
('Opinion', 'http://www.christianpost.com/services/rss/feed/opinion/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url +'print.html'
|
||||||
|
|
@ -3,71 +3,39 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
Profile to download CNN
|
Profile to download CNN
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
|
|
||||||
class CNN(BasicNewsRecipe):
|
class CNN(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'CNN'
|
title = 'CNN'
|
||||||
description = 'Global news'
|
description = 'Global news'
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
__author__ = 'Krittika Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
recursions = 1
|
#recursions = 1
|
||||||
match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
|
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
extra_css = '''
|
preprocess_regexps = [
|
||||||
.cnn_strycntntlft{font-family :Arial,Helvetica,sans-serif;}
|
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
|
||||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
|
||||||
.cnnTxtCmpnt{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
(re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''),
|
||||||
.cnnTMcontent{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#575757}
|
|
||||||
.storytext{font-family :Arial,Helvetica,sans-serif; font-size:small}
|
|
||||||
.storybyline{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
|
|
||||||
.credit{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
|
|
||||||
.storyBrandingBanner{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
|
|
||||||
.storytimestamp{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
|
|
||||||
.timestamp{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
|
|
||||||
.cnn_strytmstmp{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.cnn_stryimg640caption{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.cnn_strylccimg300cntr{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.cnn_stryichgfcpt{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.cnnByline{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.cnn_bulletbin cnnStryHghLght{ font-size:xx-small;}
|
|
||||||
.subhead p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
.cnnStoryContent{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
|
||||||
.cnnContentContainer{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
|
||||||
.col1{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.col3{color:#333333; font-family :Arial,Helvetica,sans-serif; font-size:x-small;font-weight:bold;}
|
|
||||||
.cnnInlineT1Caption{font-family :Arial,Helvetica,sans-serif; font-size:x-small;font-weight:bold;}
|
|
||||||
.cnnInlineT1Credit{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#333333;}
|
|
||||||
.col10{color:#5A637E;}
|
|
||||||
.cnnInlineRailBulletList{color:black;}
|
|
||||||
.cnnLine0{font-family :Arial,Helvetica,sans-serif; color:#666666;font-weight:bold;}
|
|
||||||
.cnnTimeStamp{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#333333;}
|
|
||||||
.galleryhedDek{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#575757;}
|
|
||||||
.galleryWidgetHeader{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#004276;}
|
|
||||||
.article-content{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
|
||||||
.cnnRecapStory{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
|
|
||||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
|
|
||||||
.captionname{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#575757;}
|
|
||||||
inStoryIE{{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
|
||||||
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'})
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='iframe'),
|
|
||||||
dict(name='div', attrs={'class':['cnnEndOfStory', 'cnnShareThisItem', 'cnn_strylctcntr cnn_strylctcqrelt', 'cnnShareBoxContent', 'cnn_strybtmcntnt', 'cnn_strycntntrgt']}),
|
|
||||||
dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}),
|
|
||||||
#dict(name='ul', attrs={'class':'article-tools'}),
|
|
||||||
#dict(name='ul', attrs={'class':'articleTools'}),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(id='cnnContentContainer')]
|
||||||
|
remove_tags = [
|
||||||
|
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
||||||
|
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
||||||
|
'cnn_strycntntrgt']},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
|
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
|
||||||
('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
|
('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
|
||||||
@ -84,15 +52,8 @@ class CNN(BasicNewsRecipe):
|
|||||||
('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
|
('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
|
||||||
('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
|
('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
|
||||||
]
|
]
|
||||||
def preprocess_html(self, soup):
|
|
||||||
story = soup.find(name='div', attrs={'class':'cnnBody_Left'})
|
def get_article_url(self, article):
|
||||||
if story is None:
|
ans = BasicNewsRecipe.get_article_url(self, article)
|
||||||
story = soup.find(name='div', attrs={'id':'cnnContentContainer'})
|
return ans.partition('?')[0]
|
||||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
|
||||||
body = soup.find(name='body')
|
|
||||||
body.insert(0, story)
|
|
||||||
else:
|
|
||||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
|
||||||
body = soup.find(name='body')
|
|
||||||
body.insert(0, story)
|
|
||||||
return soup
|
|
||||||
|
@ -1,19 +1,20 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Lorenzo Vigentini'
|
__author__ = 'DrMerry Based on v1.01 by Lorenzo Vigentini'
|
||||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
__copyright__ = 'For version 1.02, 1.03: DrMerry'
|
||||||
__version__ = 'v1.01'
|
__version__ = 'v1.03'
|
||||||
__date__ = '14, January 2010'
|
__date__ = '11, July 2011'
|
||||||
__description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
|
__description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day. Original version (c): 2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.computeractive.co.uk/
|
http://www.computeractive.co.uk/
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class computeractive(BasicNewsRecipe):
|
class computeractive(BasicNewsRecipe):
|
||||||
__author__ = 'Lorenzo Vigentini'
|
__author__ = 'DrMerry'
|
||||||
description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
|
description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
|
||||||
cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif'
|
cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif'
|
||||||
|
|
||||||
@ -31,24 +32,27 @@ class computeractive(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'article_tags_block'})
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'main'})
|
dict(name='div', attrs={'id':'container_left'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':['seeAlsoTags','commentsModule','relatedArticles','mainLeft','mainRight']}),
|
dict(name='div', attrs={'id':['seeAlsoTags','commentsModule','relatedArticles','mainLeft','mainRight','recent_comment_block_parent','reviewDetails']}),
|
||||||
dict(name='div', attrs={'class':['buyIt','detailMpu']}),
|
dict(name='div', attrs={'class':['buyIt','detailMpu','small_section','recent_comment_block_parent','title_right_button_fix','section_title.title_right_button_fix','common_button']}),
|
||||||
dict(name='a', attrs={'class':'largerImage'})
|
dict(name='a', attrs={'class':'largerImage'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'(<a [^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'General content', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'),
|
(u'General content', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'),
|
||||||
(u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'),
|
(u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'),
|
||||||
(u'Downloads', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/downloads'),
|
|
||||||
(u'Hardware', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/hardware'),
|
|
||||||
(u'Software', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/software'),
|
|
||||||
(u'Competitions', u'http://www.v3.co.uk/feeds/rss20/personal-technology/competitions')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,83 +1,63 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
|
||||||
|
|
||||||
class Cracked(BasicNewsRecipe):
|
class Cracked(BasicNewsRecipe):
|
||||||
title = u'Cracked.com'
|
title = u'Cracked.com'
|
||||||
__author__ = u'Nudgenudge'
|
__author__ = 'UnWeave'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = 'America''s Only Humor and Video Site, since 1958'
|
description = "America's Only HumorSite since 1958"
|
||||||
publisher = 'Cracked'
|
publisher = 'Cracked'
|
||||||
category = 'comedy, lists'
|
category = 'comedy, lists'
|
||||||
oldest_article = 2
|
oldest_article = 3 #days
|
||||||
delay = 10
|
max_articles_per_feed = 100
|
||||||
max_articles_per_feed = 2
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'ascii'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = u'http://www.cracked.com'
|
|
||||||
extra_css = """
|
feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]
|
||||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
|
||||||
.pageheader_title{font-size: xx-large; color: #394128}
|
|
||||||
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
|
||||||
.score_bg {display: inline; width: 100%; margin-bottom: 2em}
|
|
||||||
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
|
|
||||||
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
|
|
||||||
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
|
|
||||||
.score_header{font-size: large; color: #50544A}
|
|
||||||
.bodytext{display: block}
|
|
||||||
body{font-family: Helvetica,Arial,sans-serif}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
, 'linearize_tables' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
remove_tags_before = dict(id='PrimaryContent')
|
||||||
dict(name='div', attrs={'class':['Column1']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS')]
|
remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
|
||||||
|
|
||||||
def get_article_url(self, article):
|
remove_tags = [ dict(name='div', attrs={'class':['social',
|
||||||
return article.get('guid', None)
|
'FacebookLike',
|
||||||
|
'shareBar'
|
||||||
|
]}),
|
||||||
|
|
||||||
def cleanup_page(self, soup):
|
dict(name='div', attrs={'id':['inline-share-buttons',
|
||||||
for item in soup.findAll(style=True):
|
]}),
|
||||||
del item['style']
|
|
||||||
for alink in soup.findAll('a'):
|
|
||||||
if alink.string is not None:
|
|
||||||
tstr = alink.string
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
for div_to_remove in soup.findAll('div', attrs={'id':['googlead_1','fb-like-article','comments_section']}):
|
|
||||||
div_to_remove.extract()
|
|
||||||
for div_to_remove in soup.findAll('div', attrs={'class':['share_buttons_col_1','GenericModule1']}):
|
|
||||||
div_to_remove.extract()
|
|
||||||
for div_to_remove in soup.findAll('div', attrs={'class':re.compile("prev_next")}):
|
|
||||||
div_to_remove.extract()
|
|
||||||
for ul_to_remove in soup.findAll('ul', attrs={'class':['Nav6']}):
|
|
||||||
ul_to_remove.extract()
|
|
||||||
for image in soup.findAll('img', attrs={'alt': 'article image'}):
|
|
||||||
image.extract()
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
dict(name='span', attrs={'class':['views',
|
||||||
pager = soup.find('a',attrs={'class':'next_arrow_active'})
|
'KonaFilter'
|
||||||
if pager:
|
]}),
|
||||||
nexturl = self.INDEX + pager['href']
|
#dict(name='img'),
|
||||||
soup2 = self.index_to_soup(nexturl)
|
]
|
||||||
texttag = soup2.find('div', attrs={'class':re.compile("userStyled")})
|
|
||||||
newpos = len(texttag.contents)
|
def appendPage(self, soup, appendTag, position):
|
||||||
self.append_page(soup2,texttag,newpos)
|
# Check if article has multiple pages
|
||||||
texttag.extract()
|
pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
|
||||||
self.cleanup_page(appendtag)
|
if pageNav:
|
||||||
appendtag.insert(position,texttag)
|
# Check not at last page
|
||||||
else:
|
nextPage = pageNav.find('a', attrs={'class':'next'})
|
||||||
self.cleanup_page(appendtag)
|
if nextPage:
|
||||||
|
nextPageURL = nextPage['href']
|
||||||
|
nextPageSoup = self.index_to_soup(nextPageURL)
|
||||||
|
# 8th <section> tag contains article content
|
||||||
|
nextPageContent = nextPageSoup.findAll('section')[7]
|
||||||
|
newPosition = len(nextPageContent.contents)
|
||||||
|
self.appendPage(nextPageSoup,nextPageContent,newPosition)
|
||||||
|
nextPageContent.extract()
|
||||||
|
pageNav.extract()
|
||||||
|
appendTag.insert(position,nextPageContent)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body, 3)
|
self.appendPage(soup, soup.body, 3)
|
||||||
return self.adeify_images(soup)
|
return soup
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
criticadigital.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class CriticaDigital(BasicNewsRecipe):
|
|
||||||
title = 'Critica de la Argentina'
|
|
||||||
__author__ = 'Darko Miletic and Sujata Raman'
|
|
||||||
description = 'Noticias de Argentina'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'es_AR'
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'cp1252'
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:"Trebuchet MS";}
|
|
||||||
h3{color:#9A0000; font-family:Tahoma; font-size:x-small;}
|
|
||||||
h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;}
|
|
||||||
#epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;}
|
|
||||||
p {font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
#fecha{color:#858585; font-family:Tahoma; font-size:x-small;}
|
|
||||||
#autor{color:#858585; font-family:Tahoma; font-size:x-small;}
|
|
||||||
#hora{color:#F00000;font-family:Tahoma; font-size:x-small;}
|
|
||||||
'''
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']})
|
|
||||||
,dict(name='div', attrs={'id':'boxautor'})
|
|
||||||
,dict(name='p', attrs={'id':'textoNota'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class':'box300' })
|
|
||||||
,dict(name='div', style=True )
|
|
||||||
,dict(name='div', attrs={'class':'titcomentario'})
|
|
||||||
,dict(name='div', attrs={'class':'comentario' })
|
|
||||||
,dict(name='div', attrs={'class':'paginador' })
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
|
|
||||||
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
|
|
||||||
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
|
|
||||||
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
|
|
||||||
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
|
|
||||||
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
|
|
||||||
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
|
|
||||||
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
|
|
||||||
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
|
|
||||||
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
index = 'http://www.criticadigital.com/impresa/'
|
|
||||||
soup = self.index_to_soup(index)
|
|
||||||
link_item = soup.find('div',attrs={'class':'tapa'})
|
|
||||||
if link_item:
|
|
||||||
cover_url = index + link_item.img['src']
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
|
|
52
recipes/daily_mirror.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||||
|
title = u'The Daily Mirror'
|
||||||
|
description = 'News as provide by The Daily Mirror -UK'
|
||||||
|
|
||||||
|
__author__ = 'Dave Asbury'
|
||||||
|
language = 'en_GB'
|
||||||
|
|
||||||
|
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||||
|
|
||||||
|
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||||
|
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1'),
|
||||||
|
dict(attrs={'class':['article-attr']}),
|
||||||
|
dict(name='div', attrs={'class' : [ 'article-body', 'crosshead']})
|
||||||
|
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class' : ['caption', 'article-resize']}),
|
||||||
|
dict( attrs={'class':'append-html'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
(u'News', u'http://www.mirror.co.uk/news/rss.xml')
|
||||||
|
,(u'Tech News', u'http://www.mirror.co.uk/news/technology/rss.xml')
|
||||||
|
,(u'Weird World','http://www.mirror.co.uk/news/weird-world/rss.xml')
|
||||||
|
,(u'Film Gossip','http://www.mirror.co.uk/celebs/film/rss.xml')
|
||||||
|
,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
|
||||||
|
,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
|
||||||
|
,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
|
||||||
|
,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
|
||||||
|
,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
|
||||||
|
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||||
|
|
||||||
|
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||||
|
]
|
||||||
|
|
78
recipes/daytona_beach.recipe
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DaytonBeachNewsJournal(BasicNewsRecipe):
|
||||||
|
title ='Daytona Beach News Journal'
|
||||||
|
__author__ = 'BRGriff'
|
||||||
|
pubisher = 'News-JournalOnline.com'
|
||||||
|
description = 'Daytona Beach, Florida, Newspaper'
|
||||||
|
category = 'News, Daytona Beach, Florida'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
filterDuplicates = True
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'page-header'}),
|
||||||
|
dict(name='div', attrs={'class':'asset-body'})
|
||||||
|
]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':['byline-section', 'asset-meta']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
#####NEWS#####
|
||||||
|
(u"News", u"http://www.news-journalonline.com/rss.xml"),
|
||||||
|
(u"Breaking News", u"http://www.news-journalonline.com/breakingnews/rss.xml"),
|
||||||
|
(u"Local - East Volusia", u"http://www.news-journalonline.com/news/local/east-volusia/rss.xml"),
|
||||||
|
(u"Local - West Volusia", u"http://www.news-journalonline.com/news/local/west-volusia/rss.xml"),
|
||||||
|
(u"Local - Southeast", u"http://www.news-journalonline.com/news/local/southeast-volusia/rss.xml"),
|
||||||
|
(u"Local - Flagler", u"http://www.news-journalonline.com/news/local/flagler/rss.xml"),
|
||||||
|
(u"Florida", u"http://www.news-journalonline.com/news/florida/rss.xml"),
|
||||||
|
(u"National/World", u"http://www.news-journalonline.com/news/nationworld/rss.xml"),
|
||||||
|
(u"Politics", u"http://www.news-journalonline.com/news/politics/rss.xml"),
|
||||||
|
(u"News of Record", u"http://www.news-journalonline.com/news/news-of-record/rss.xml"),
|
||||||
|
####BUSINESS####
|
||||||
|
(u"Business", u"http://www.news-journalonline.com/business/rss.xml"),
|
||||||
|
#(u"Jobs", u"http://www.news-journalonline.com/business/jobs/rss.xml"),
|
||||||
|
#(u"Markets", u"http://www.news-journalonline.com/business/markets/rss.xml"),
|
||||||
|
#(u"Real Estate", u"http://www.news-journalonline.com/business/real-estate/rss.xml"),
|
||||||
|
#(u"Technology", u"http://www.news-journalonline.com/business/technology/rss.xml"),
|
||||||
|
####SPORTS####
|
||||||
|
(u"Sports", u"http://www.news-journalonline.com/sports/rss.xml"),
|
||||||
|
(u"Racing", u"http://www.news-journalonline.com/racing/rss.xml"),
|
||||||
|
(u"Highschool", u"http://www.news-journalonline.com/sports/highschool/rss.xml"),
|
||||||
|
(u"College", u"http://www.news-journalonline.com/sports/college/rss.xml"),
|
||||||
|
(u"Basketball", u"http://www.news-journalonline.com/sports/basketball/rss.xml"),
|
||||||
|
(u"Football", u"http://www.news-journalonline.com/sports/football/rss.xml"),
|
||||||
|
(u"Golf", u"http://www.news-journalonline.com/sports/golf/rss.xml"),
|
||||||
|
(u"Other Sports", u"http://www.news-journalonline.com/sports/other/rss.xml"),
|
||||||
|
####LIFESTYLE####
|
||||||
|
(u"Lifestyle", u"http://www.news-journalonline.com/lifestyle/rss.xml"),
|
||||||
|
#(u"Fashion", u"http://www.news-journalonline.com/lifestyle/fashion/rss.xml"),
|
||||||
|
(u"Food", u"http://www.news-journalonline.com/lifestyle/food/rss.xml"),
|
||||||
|
#(u"Health", u"http://www.news-journalonline.com/lifestyle/health/rss.xml"),
|
||||||
|
(u"Home and Garden", u"http://www.news-journalonline.com/lifestyle/home-and-garden/rss.xml"),
|
||||||
|
(u"Living", u"http://www.news-journalonline.com/lifestyle/living/rss.xml"),
|
||||||
|
(u"Religion", u"http://www.news-journalonline.com/lifestyle/religion/rss.xml"),
|
||||||
|
#(u"Travel", u"http://www.news-journalonline.com/lifestyle/travel/rss.xml"),
|
||||||
|
####OPINION####
|
||||||
|
#(u"Opinion", u"http://www.news-journalonline.com/opinion/rss.xml"),
|
||||||
|
#(u"Letters to Editor", u"http://www.news-journalonline.com/opinion/letters-to-the-editor/rss.xml"),
|
||||||
|
#(u"Columns", u"http://www.news-journalonline.com/columns/rss.xml"),
|
||||||
|
#(u"Podcasts", u"http://www.news-journalonline.com/podcasts/rss.xml"),
|
||||||
|
####ENTERTAINMENT#### ##Weekly Feature##
|
||||||
|
(u"Entertainment", u"http://www.go386.com/rss.xml"),
|
||||||
|
(u"Go Out", u"http://www.go386.com/go/rss.xml"),
|
||||||
|
(u"Music", u"http://www.go386.com/music/rss.xml"),
|
||||||
|
(u"Movies", u"http://www.go386.com/movies/rss.xml"),
|
||||||
|
#(u"Culture", u"http://www.go386.com/culture/rss.xml"),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.page-header{font-family:Arial,Helvetica,sans-serif; font-style:bold;font-size:22pt;}
|
||||||
|
.asset-body{font-family:Helvetica,Arial,sans-serif; font-size:16pt;}
|
||||||
|
|
||||||
|
'''
|
18
recipes/down_to_earth.recipe
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1307834113(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Down To Earth'
|
||||||
|
oldest_article = 300
|
||||||
|
__author__ = 'sexymax15'
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_IN'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags_before = dict(name='div', id='PageContent')
|
||||||
|
remove_tags_after = [dict(name='div'),{'class':'box'}]
|
||||||
|
remove_tags =[{'class':'box'}]
|
||||||
|
feeds = [(u'editor', u'http://www.downtoearth.org.in/taxonomy/term/20348/0/feed'), (u'cover story', u'http://www.downtoearth.org.in/taxonomy/term/20345/0/feed'), (u'special report', u'http://www.downtoearth.org.in/taxonomy/term/20384/0/feed'), (u'features', u'http://www.downtoearth.org.in/taxonomy/term/20350/0/feed'), (u'news', u'http://www.downtoearth.org.in/taxonomy/term/20366/0/feed'), (u'debate', u'http://www.downtoearth.org.in/taxonomy/term/20347/0/feed'), (u'natural disasters', u'http://www.downtoearth.org.in/taxonomy/term/20822/0/feed')]
|
61
recipes/elclubdelebook.recipe
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.clubdelebook.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElClubDelEbook(BasicNewsRecipe):
|
||||||
|
title = 'El club del ebook'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'El Club del eBook, es la primera fuente de informacion sobre ebooks de Argentina. Aca vas a encontrar noticias, tips, tutoriales, recursos y opiniones sobre el mundo de los libros electronicos.'
|
||||||
|
tags = 'ebook, libro electronico, e-book, ebooks, libros electronicos, e-books'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'es_AR'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
masthead_url = 'http://dl.dropbox.com/u/2845131/elclubdelebook.png'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif}
|
||||||
|
img{ margin-bottom: 0.8em;
|
||||||
|
border: 1px solid #333333;
|
||||||
|
padding: 4px; display: block
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : tags
|
||||||
|
, 'publisher': title
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'id':'crp_related'})]
|
||||||
|
remove_tags_after = dict(attrs={'id':'crp_related'})
|
||||||
|
|
||||||
|
feeds = [(u'Articulos', u'http://feeds.feedburner.com/ElClubDelEbook')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
@ -1,72 +1,59 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
cronista.com
|
www.cronista.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElCronista(BasicNewsRecipe):
|
class Pagina12(BasicNewsRecipe):
|
||||||
title = 'El Cronista'
|
title = 'El Cronista Comercial'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina'
|
description = 'El Cronista Comercial es el Diario economico-politico mas valorado. Es la fuente mas confiable de informacion en temas de economia, finanzas y negocios enmarcados politicamente.'
|
||||||
|
publisher = 'Cronista.com'
|
||||||
|
category = 'news, politics, economy, finances, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'es_AR'
|
max_articles_per_feed = 200
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
language = 'es_AR'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.cronista.com/export/sites/diarioelcronista/arte/header-logo.gif'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
h2{font-family: Georgia,"Times New Roman",Times,serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
.nom{font-weight: bold; vertical-align: baseline}
|
||||||
|
.autor-cfoto{border-bottom: 1px solid #D2D2D2;
|
||||||
|
border-top: 1px solid #D2D2D2;
|
||||||
|
display: inline-block;
|
||||||
|
margin: 0 10px 10px 0;
|
||||||
|
padding: 10px;
|
||||||
|
width: 210px}
|
||||||
|
.under{font-weight: bold}
|
||||||
|
.time{font-size: small}
|
||||||
|
"""
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comment' : description
|
||||||
, '--category' , 'news, Argentina'
|
, 'tags' : category
|
||||||
, '--publisher' , title
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
remove_tags = [
|
||||||
dict(name='table', attrs={'width':'100%' })
|
dict(name=['meta','link','base','iframe','object','embed'])
|
||||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
,dict(attrs={'class':['user-tools','tabsmedia']})
|
||||||
]
|
]
|
||||||
|
remove_attributes = ['lang']
|
||||||
|
remove_tags_before = dict(attrs={'class':'top'})
|
||||||
|
remove_tags_after = dict(attrs={'class':'content-nota'})
|
||||||
|
feeds = [(u'Ultimas noticias', u'http://www.cronista.com/rss.html')]
|
||||||
|
|
||||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
|
||||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
|
||||||
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
|
|
||||||
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
|
|
||||||
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
|
|
||||||
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
|
|
||||||
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
|
|
||||||
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
|
|
||||||
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
|
|
||||||
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
|
|
||||||
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
main, sep, rest = url.partition('.com/notas/')
|
|
||||||
article_id, lsep, rrest = rest.partition('-')
|
|
||||||
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
for item in soup.findAll(style=True):
|
||||||
soup.head.insert(0,mtag)
|
del item['style']
|
||||||
soup.head.base.extract()
|
|
||||||
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
|
|
||||||
htext.name = 'p'
|
|
||||||
soup.prettify()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
index = 'http://www.cronista.com/contenidos/'
|
|
||||||
soup = self.index_to_soup(index + 'ee.html')
|
|
||||||
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
|
|
||||||
if link_item:
|
|
||||||
cover_url = index + link_item.img['src']
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.eluniversal.com
|
www.eluniversal.com
|
||||||
'''
|
'''
|
||||||
@ -15,12 +15,20 @@ class ElUniversal(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
publisher = 'El Universal'
|
publisher = 'El Universal'
|
||||||
category = 'news, Caracas, Venezuela, world'
|
category = 'news, Caracas, Venezuela, world'
|
||||||
language = 'es_VE'
|
language = 'es_VE'
|
||||||
|
publication_type = 'newspaper'
|
||||||
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
|
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
|
||||||
|
extra_css = """
|
||||||
|
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
|
||||||
|
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
|
||||||
|
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
|
||||||
|
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
|
||||||
|
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||||
|
"""
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
@ -28,10 +36,11 @@ class ElUniversal(BasicNewsRecipe):
|
|||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'Nota'})]
|
remove_tags_before=dict(attrs={'class':'header-print MB10'})
|
||||||
|
remove_tags_after= dict(attrs={'id':'SizeText'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','script','iframe'])
|
dict(name=['object','link','script','iframe','meta'])
|
||||||
,dict(name='div',attrs={'class':'Herramientas'})
|
,dict(attrs={'class':'header-print MB10'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008 - 2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = 'Copyright 2011 Starson17'
|
||||||
'''
|
'''
|
||||||
engadget.com
|
engadget.com
|
||||||
'''
|
'''
|
||||||
@ -10,13 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Engadget(BasicNewsRecipe):
|
class Engadget(BasicNewsRecipe):
|
||||||
title = u'Engadget'
|
title = u'Engadget'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Starson17'
|
||||||
|
__version__ = 'v1.00'
|
||||||
|
__date__ = '02, July 2011'
|
||||||
description = 'Tech news'
|
description = 'Tech news'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Express.de'
|
title = u'Express.de'
|
||||||
@ -12,7 +11,6 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
extra_css = '''
|
extra_css = '''
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||||
|
|
||||||
'''
|
'''
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
|
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
|
||||||
@ -25,6 +23,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
dict(id='Logo'),
|
dict(id='Logo'),
|
||||||
dict(id='MainLinkSpacer'),
|
dict(id='MainLinkSpacer'),
|
||||||
dict(id='MainLinks'),
|
dict(id='MainLinks'),
|
||||||
|
dict(id='ContainerPfad'), #neu
|
||||||
dict(title='Diese Seite Bookmarken'),
|
dict(title='Diese Seite Bookmarken'),
|
||||||
|
|
||||||
dict(name='span'),
|
dict(name='span'),
|
||||||
@ -44,7 +43,8 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'HeaderSearch'}),
|
dict(name='div', attrs={'class':'HeaderSearch'}),
|
||||||
dict(name='div', attrs={'class':'sbutton'}),
|
dict(name='div', attrs={'class':'sbutton'}),
|
||||||
dict(name='div', attrs={'class':'active'}),
|
dict(name='div', attrs={'class':'active'}),
|
||||||
|
dict(name='div', attrs={'class':'MoreNews'}), #neu
|
||||||
|
dict(name='div', attrs={'class':'ContentBoxSubline'}) #neu
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -68,7 +68,5 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
|
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
|
||||||
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
|
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
|
||||||
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
|
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
]
|
|
||||||
|
@ -1,32 +1,41 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ft.com
|
www.ft.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import datetime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class FinancialTimes(BasicNewsRecipe):
|
class FinancialTimes_rss(BasicNewsRecipe):
|
||||||
title = u'Financial Times'
|
title = 'Financial Times'
|
||||||
__author__ = 'Darko Miletic and Sujata Raman'
|
__author__ = 'Darko Miletic'
|
||||||
description = ('Financial world news. Available after 5AM '
|
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
|
||||||
'GMT, daily.')
|
publisher = 'The Financial Times Ltd.'
|
||||||
|
category = 'news, finances, politics, World'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
max_articles_per_feed = 250
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
simultaneous_downloads= 1
|
encoding = 'utf8'
|
||||||
delay = 1
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||||
|
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||||
|
INDEX = 'http://www.ft.com'
|
||||||
|
|
||||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(self.INDEX)
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open(self.LOGIN)
|
br.open(self.LOGIN)
|
||||||
br.select_form(name='loginForm')
|
br.select_form(name='loginForm')
|
||||||
@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
|
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
||||||
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'floating-con'})
|
dict(name='div', attrs={'id':'floating-con'})
|
||||||
|
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||||
|
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
|
||||||
]
|
]
|
||||||
|
remove_attributes = ['width','height','lang']
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
body{font-family:Arial,Helvetica,sans-serif;}
|
body{font-family: Georgia,Times,"Times New Roman",serif}
|
||||||
h2(font-size:large;}
|
h2{font-size:large}
|
||||||
.ft-story-header(font-size:xx-small;}
|
.ft-story-header{font-size: x-small}
|
||||||
.ft-story-body(font-size:small;}
|
|
||||||
a{color:#003399;}
|
|
||||||
.container{font-size:x-small;}
|
.container{font-size:x-small;}
|
||||||
h3{font-size:x-small;color:#003399;}
|
h3{font-size:x-small;color:#003399;}
|
||||||
'''
|
.copyright{font-size: x-small}
|
||||||
|
img{margin-top: 0.8em; display: block}
|
||||||
|
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
|
||||||
|
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
|
||||||
|
"""
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'UK' , u'http://www.ft.com/rss/home/uk' )
|
(u'UK' , u'http://www.ft.com/rss/home/uk' )
|
||||||
,(u'US' , u'http://www.ft.com/rss/home/us' )
|
,(u'US' , u'http://www.ft.com/rss/home/us' )
|
||||||
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
|
|
||||||
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
|
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
|
||||||
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
|
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
content_type = soup.find('meta', {'http-equiv':'Content-Type'})
|
items = ['promo-box','promo-title',
|
||||||
if content_type:
|
'promo-headline','promo-image',
|
||||||
content_type['content'] = 'text/html; charset=utf-8'
|
'promo-intro','promo-link','subhead']
|
||||||
|
for item in items:
|
||||||
|
for it in soup.findAll(item):
|
||||||
|
it.name = 'div'
|
||||||
|
it.attrs = []
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cdate = datetime.date.today()
|
||||||
|
if cdate.isoweekday() == 7:
|
||||||
|
cdate -= datetime.timedelta(days=1)
|
||||||
|
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')
|
||||||
|
|
||||||
|
@ -1,15 +1,19 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ft.com
|
www.ft.com/uk-edition
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import datetime
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class FinancialTimes(BasicNewsRecipe):
|
class FinancialTimes(BasicNewsRecipe):
|
||||||
title = u'Financial Times - UK printed edition'
|
title = 'Financial Times - UK printed edition'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Financial world news'
|
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
|
||||||
|
publisher = 'The Financial Times Ltd.'
|
||||||
|
category = 'news, finances, politics, UK, World'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
max_articles_per_feed = 250
|
max_articles_per_feed = 250
|
||||||
@ -17,14 +21,23 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
simultaneous_downloads= 1
|
publication_type = 'newspaper'
|
||||||
delay = 1
|
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||||
INDEX = 'http://www.ft.com/uk-edition'
|
INDEX = 'http://www.ft.com/uk-edition'
|
||||||
PREFIX = 'http://www.ft.com'
|
PREFIX = 'http://www.ft.com'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(self.INDEX)
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open(self.LOGIN)
|
br.open(self.LOGIN)
|
||||||
br.select_form(name='loginForm')
|
br.select_form(name='loginForm')
|
||||||
@ -33,29 +46,34 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
|
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
||||||
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'floating-con'})
|
dict(name='div', attrs={'id':'floating-con'})
|
||||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||||
|
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
|
||||||
]
|
]
|
||||||
remove_attributes = ['width','height','lang']
|
remove_attributes = ['width','height','lang']
|
||||||
|
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family:Arial,Helvetica,sans-serif;}
|
body{font-family: Georgia,Times,"Times New Roman",serif}
|
||||||
h2{font-size:large;}
|
h2{font-size:large}
|
||||||
.ft-story-header{font-size:xx-small;}
|
.ft-story-header{font-size: x-small}
|
||||||
.ft-story-body{font-size:small;}
|
|
||||||
a{color:#003399;}
|
|
||||||
.container{font-size:x-small;}
|
.container{font-size:x-small;}
|
||||||
h3{font-size:x-small;color:#003399;}
|
h3{font-size:x-small;color:#003399;}
|
||||||
.copyright{font-size: x-small}
|
.copyright{font-size: x-small}
|
||||||
|
img{margin-top: 0.8em; display: block}
|
||||||
|
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
|
||||||
|
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_artlinks(self, elem):
|
def get_artlinks(self, elem):
|
||||||
articles = []
|
articles = []
|
||||||
for item in elem.findAll('a',href=True):
|
for item in elem.findAll('a',href=True):
|
||||||
url = self.PREFIX + item['href']
|
rawlink = item['href']
|
||||||
|
if rawlink.startswith('http://'):
|
||||||
|
url = rawlink
|
||||||
|
else:
|
||||||
|
url = self.PREFIX + rawlink
|
||||||
title = self.tag_to_string(item)
|
title = self.tag_to_string(item)
|
||||||
date = strftime(self.timefmt)
|
date = strftime(self.timefmt)
|
||||||
articles.append({
|
articles.append({
|
||||||
@ -65,7 +83,7 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
,'description':''
|
,'description':''
|
||||||
})
|
})
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
@ -80,11 +98,41 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
strest.insert(0,st)
|
strest.insert(0,st)
|
||||||
for item in strest:
|
for item in strest:
|
||||||
ftitle = self.tag_to_string(item)
|
ftitle = self.tag_to_string(item)
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||||
feedarts = self.get_artlinks(item.parent.ul)
|
feedarts = self.get_artlinks(item.parent.ul)
|
||||||
feeds.append((ftitle,feedarts))
|
feeds.append((ftitle,feedarts))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
items = ['promo-box','promo-title',
|
||||||
|
'promo-headline','promo-image',
|
||||||
|
'promo-intro','promo-link','subhead']
|
||||||
|
for item in items:
|
||||||
|
for it in soup.findAll(item):
|
||||||
|
it.name = 'div'
|
||||||
|
it.attrs = []
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cdate = datetime.date.today()
|
||||||
|
if cdate.isoweekday() == 7:
|
||||||
|
cdate -= datetime.timedelta(days=1)
|
||||||
|
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
||||||
|
|
81
recipes/frontlineonnet.recipe
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
frontlineonnet.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Frontlineonnet(BasicNewsRecipe):
|
||||||
|
title = 'Frontline'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "India's national magazine"
|
||||||
|
publisher = 'Frontline'
|
||||||
|
category = 'news, politics, India'
|
||||||
|
no_stylesheets = True
|
||||||
|
delay = 1
|
||||||
|
INDEX = 'http://frontlineonnet.com/'
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
language = 'en_IN'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
masthead_url = 'http://frontlineonnet.com/images/newfline.jpg'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||||
|
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'.*?<base', re.DOTALL|re.IGNORECASE),lambda match: '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html dir="ltr" xml:lang="en-IN"><head><title>title</title><base')
|
||||||
|
,(re.compile(r'<base .*?>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
|
||||||
|
,(re.compile(r'<byline>', re.DOTALL|re.IGNORECASE),lambda match: '<div class="byline">')
|
||||||
|
,(re.compile(r'</byline>', re.DOTALL|re.IGNORECASE),lambda match: '</div>')
|
||||||
|
,(re.compile(r'<center>', re.DOTALL|re.IGNORECASE),lambda match: '<div class="ctr">')
|
||||||
|
,(re.compile(r'</center>', re.DOTALL|re.IGNORECASE),lambda match: '</div>')
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags= [
|
||||||
|
dict(name='font', attrs={'class':'storyhead'})
|
||||||
|
,dict(attrs={'class':'byline'})
|
||||||
|
]
|
||||||
|
remove_attributes=['size','noshade','border']
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
for feed_link in soup.findAll('a',href=True):
|
||||||
|
if feed_link['href'].startswith('stories/'):
|
||||||
|
url = self.INDEX + feed_link['href']
|
||||||
|
title = self.tag_to_string(feed_link)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
return [('Frontline', articles)]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
|
||||||
|
|
||||||
|
def image_url_processor(self, baseurl, url):
|
||||||
|
return url.replace('../images/', self.INDEX + 'images/').strip()
|
49
recipes/galicia_confidential.recipe
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.web.feeds import Feed
|
||||||
|
|
||||||
|
class GC_gl(BasicNewsRecipe):
|
||||||
|
title = u'Galicia Confidencial (RSS)'
|
||||||
|
__author__ = u'Susana Sotelo Docío'
|
||||||
|
description = u'Unha fiestra de información aberta a todos'
|
||||||
|
publisher = u'Galicia Confidencial'
|
||||||
|
category = u'news, society, politics, Galicia'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'gl'
|
||||||
|
direction = 'ltr'
|
||||||
|
cover_url = 'http://galiciaconfidencial.com/imagenes/header/logo_gc.gif'
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
center_navbar = False
|
||||||
|
|
||||||
|
feeds = [(u'Novas no RSS', u'http://galiciaconfidencial.com/rss2/xeral.rss')]
|
||||||
|
|
||||||
|
extra_css = u' p{text-align:left} '
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://galiciaconfidencial.com/nova/', 'http://galiciaconfidencial.com/imprimir/')
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
self.gc_parse_feeds(feeds)
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def gc_parse_feeds(self, feeds):
|
||||||
|
rssFeeds = Feed()
|
||||||
|
rssFeeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
self.feed_to_index_append(rssFeeds[:], feeds)
|
||||||
|
|
||||||
|
|
||||||
|
def feed_to_index_append(self, feedObject, masterFeed):
|
||||||
|
for feed in feedObject:
|
||||||
|
newArticles = []
|
||||||
|
for article in feed.articles:
|
||||||
|
newArt = {
|
||||||
|
'title' : article.title,
|
||||||
|
'url' : article.url,
|
||||||
|
'date' : article.date
|
||||||
|
}
|
||||||
|
newArticles.append(newArt)
|
||||||
|
masterFeed.append((feed.title,newArticles))
|
||||||
|
|
35
recipes/geek_poke.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
|
||||||
|
title = u'Geek and Poke'
|
||||||
|
__author__ = u'DrMerry'
|
||||||
|
description = u'Geek and Poke Cartoons'
|
||||||
|
oldest_article = 31
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = u'en'
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
#delay = 1
|
||||||
|
timefmt = ' [%A, %d %B, %Y]'
|
||||||
|
summary_length = -1
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
|
||||||
|
preprocess_regexps = [ (re.compile(r'(<p> </p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
|
||||||
|
(re.compile(r'( | )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
|
||||||
|
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]
|
@ -11,8 +11,8 @@ import mechanize, re
|
|||||||
class GoComics(BasicNewsRecipe):
|
class GoComics(BasicNewsRecipe):
|
||||||
title = 'GoComics'
|
title = 'GoComics'
|
||||||
__author__ = 'Starson17'
|
__author__ = 'Starson17'
|
||||||
__version__ = '1.05'
|
__version__ = '1.06'
|
||||||
__date__ = '19 may 2011'
|
__date__ = '07 June 2011'
|
||||||
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
||||||
category = 'news, comics'
|
category = 'news, comics'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
@ -56,225 +56,318 @@ class GoComics(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, url in [
|
for title, url in [
|
||||||
######## COMICS - GENERAL ########
|
(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
|
||||||
(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
|
#(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
|
||||||
# (u"9 to 5", u"http://www.gocomics.com/9to5"),
|
(u"9 to 5", u"http://www.gocomics.com/9to5"),
|
||||||
# (u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
|
#(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
|
||||||
# (u"Adam@Home", u"http://www.gocomics.com/adamathome"),
|
(u"Agnes", u"http://www.gocomics.com/agnes"),
|
||||||
# (u"Agnes", u"http://www.gocomics.com/agnes"),
|
#(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
|
||||||
# (u"Andy Capp", u"http://www.gocomics.com/andycapp"),
|
#(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
|
||||||
# (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
|
#(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
|
||||||
# (u"Annie", u"http://www.gocomics.com/annie"),
|
#(u"Annie", u"http://www.gocomics.com/annie"),
|
||||||
(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
|
#(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
|
||||||
# (u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
|
#(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
|
||||||
(u"B.C.", u"http://www.gocomics.com/bc"),
|
(u"B.C.", u"http://www.gocomics.com/bc"),
|
||||||
# (u"Back in the Day", u"http://www.gocomics.com/backintheday"),
|
#(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
|
||||||
# (u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
|
#(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
|
||||||
# (u"Baldo", u"http://www.gocomics.com/baldo"),
|
#(u"Baldo", u"http://www.gocomics.com/baldo"),
|
||||||
# (u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
|
#(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
|
||||||
# (u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
|
#(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
|
||||||
# (u"The Barn", u"http://www.gocomics.com/thebarn"),
|
#(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
|
||||||
# (u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
|
#(u"Ben", u"http://www.gocomics.com/ben"),
|
||||||
# (u"Bewley", u"http://www.gocomics.com/bewley"),
|
#(u"Betty", u"http://www.gocomics.com/betty"),
|
||||||
# (u"Big Top", u"http://www.gocomics.com/bigtop"),
|
#(u"Bewley", u"http://www.gocomics.com/bewley"),
|
||||||
# (u"Biographic", u"http://www.gocomics.com/biographic"),
|
#(u"Big Nate", u"http://www.gocomics.com/bignate"),
|
||||||
(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
|
#(u"Big Top", u"http://www.gocomics.com/bigtop"),
|
||||||
# (u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
|
#(u"Biographic", u"http://www.gocomics.com/biographic"),
|
||||||
# (u"Bliss", u"http://www.gocomics.com/bliss"),
|
#(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
|
||||||
(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
|
#(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
|
||||||
# (u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
|
#(u"Bliss", u"http://www.gocomics.com/bliss"),
|
||||||
# (u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
|
(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
|
||||||
# (u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
|
#(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
|
||||||
# (u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
|
#(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
|
||||||
# (u"The Boondocks", u"http://www.gocomics.com/boondocks"),
|
#(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
|
||||||
# (u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
|
#(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
|
||||||
# (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
|
#(u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
|
||||||
# (u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
|
#(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
|
||||||
# (u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
|
#(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
|
||||||
# (u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
|
#(u"Brevity", u"http://www.gocomics.com/brevity"),
|
||||||
# (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
|
#(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
|
||||||
(u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
|
#(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
|
||||||
# (u"Candorville", u"http://www.gocomics.com/candorville"),
|
(u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
|
||||||
# (u"Cathy", u"http://www.gocomics.com/cathy"),
|
#(u"Candorville", u"http://www.gocomics.com/candorville"),
|
||||||
# (u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
|
#(u"Cathy", u"http://www.gocomics.com/cathy"),
|
||||||
# (u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
|
#(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
|
||||||
# (u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
|
#(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
|
||||||
# (u"The City", u"http://www.gocomics.com/thecity"),
|
#(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
|
||||||
# (u"Cleats", u"http://www.gocomics.com/cleats"),
|
#(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
|
||||||
# (u"Close to Home", u"http://www.gocomics.com/closetohome"),
|
#(u"Cleats", u"http://www.gocomics.com/cleats"),
|
||||||
# (u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
|
#(u"Close to Home", u"http://www.gocomics.com/closetohome"),
|
||||||
# (u"Cornered", u"http://www.gocomics.com/cornered"),
|
#(u"Committed", u"http://www.gocomics.com/committed"),
|
||||||
(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
|
#(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
|
||||||
# (u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
|
#(u"Cornered", u"http://www.gocomics.com/cornered"),
|
||||||
# (u"Deep Cover", u"http://www.gocomics.com/deepcover"),
|
#(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
|
||||||
# (u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
|
#(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
|
||||||
# (u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
|
#(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
|
||||||
# (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
|
#(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
|
||||||
# (u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
|
#(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
|
||||||
# (u"Doodles", u"http://www.gocomics.com/doodles"),
|
(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
|
||||||
(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
|
#(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
|
||||||
# (u"The Doozies", u"http://www.gocomics.com/thedoozies"),
|
(u"Doodles", u"http://www.gocomics.com/doodles"),
|
||||||
# (u"The Duplex", u"http://www.gocomics.com/duplex"),
|
(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
|
||||||
# (u"Eek!", u"http://www.gocomics.com/eek"),
|
#(u"Drabble", u"http://www.gocomics.com/drabble"),
|
||||||
# (u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
|
#(u"Eek!", u"http://www.gocomics.com/eek"),
|
||||||
# (u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
|
#(u"F Minus", u"http://www.gocomics.com/fminus"),
|
||||||
# (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
|
#(u"Family Tree", u"http://www.gocomics.com/familytree"),
|
||||||
# (u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
|
#(u"Farcus", u"http://www.gocomics.com/farcus"),
|
||||||
(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
|
(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
|
||||||
# (u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
|
#(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
|
||||||
# (u"Fort Knox", u"http://www.gocomics.com/fortknox"),
|
#(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
|
||||||
# (u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
|
(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
|
||||||
(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
|
#(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
|
||||||
# (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
|
#(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
|
||||||
# (u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
|
#(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
|
||||||
# (u"Free Range", u"http://www.gocomics.com/freerange"),
|
#(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
|
||||||
# (u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
|
(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
|
||||||
# (u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
|
#(u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
|
||||||
(u"Garfield", u"http://www.gocomics.com/garfield"),
|
#(u"Frazz", u"http://www.gocomics.com/frazz"),
|
||||||
# (u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
|
#(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
|
||||||
# (u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
|
#(u"Free Range", u"http://www.gocomics.com/freerange"),
|
||||||
# (u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
|
#(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
|
||||||
# (u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
|
#(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
|
||||||
# (u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
|
(u"Garfield", u"http://www.gocomics.com/garfield"),
|
||||||
# (u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
|
#(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
|
||||||
# (u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
|
#(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
|
||||||
# (u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
|
#(u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
|
||||||
# (u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
|
#(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
|
||||||
# (u"Home and Away", u"http://www.gocomics.com/homeandaway"),
|
#(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
|
||||||
# (u"Housebroken", u"http://www.gocomics.com/housebroken"),
|
#(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
|
||||||
# (u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
|
#(u"Graffiti", u"http://www.gocomics.com/graffiti"),
|
||||||
# (u"Imagine This", u"http://www.gocomics.com/imaginethis"),
|
#(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
|
||||||
# (u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
|
#(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
|
||||||
# (u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
|
#(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
|
||||||
# (u"Ink Pen", u"http://www.gocomics.com/inkpen"),
|
(u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
|
||||||
# (u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
|
#(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
|
||||||
# (u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
|
#(u"Herman", u"http://www.gocomics.com/herman"),
|
||||||
# (u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
|
#(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
|
||||||
# (u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
|
#(u"Housebroken", u"http://www.gocomics.com/housebroken"),
|
||||||
# (u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
|
#(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
|
||||||
# (u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
|
#(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
|
||||||
(u"Lio", u"http://www.gocomics.com/lio"),
|
#(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
|
||||||
# (u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
|
#(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
|
||||||
# (u"Little Otto", u"http://www.gocomics.com/littleotto"),
|
#(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
|
||||||
# (u"Loose Parts", u"http://www.gocomics.com/looseparts"),
|
#(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
|
||||||
# (u"Love Is...", u"http://www.gocomics.com/loveis"),
|
#(u"Jane's World", u"http://www.gocomics.com/janesworld"),
|
||||||
# (u"Maintaining", u"http://www.gocomics.com/maintaining"),
|
#(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
|
||||||
# (u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
|
#(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
|
||||||
# (u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
|
#(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
|
||||||
# (u"The Middletons", u"http://www.gocomics.com/themiddletons"),
|
#(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
|
||||||
# (u"Momma", u"http://www.gocomics.com/momma"),
|
#(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
|
||||||
# (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
|
#(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
|
||||||
# (u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
|
#(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
|
||||||
# (u"Nest Heads", u"http://www.gocomics.com/nestheads"),
|
#(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
|
||||||
# (u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
|
#(u"Lio", u"http://www.gocomics.com/lio"),
|
||||||
(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
|
#(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
|
||||||
(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
|
#(u"Little Otto", u"http://www.gocomics.com/littleotto"),
|
||||||
# (u"The Norm", u"http://www.gocomics.com/thenorm"),
|
#(u"Lola", u"http://www.gocomics.com/lola"),
|
||||||
# (u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
|
#(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
|
||||||
# (u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
|
#(u"Love Is...", u"http://www.gocomics.com/loveis"),
|
||||||
# (u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
|
#(u"Luann", u"http://www.gocomics.com/luann"),
|
||||||
# (u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
|
#(u"Maintaining", u"http://www.gocomics.com/maintaining"),
|
||||||
# (u"Overboard", u"http://www.gocomics.com/overboard"),
|
(u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
|
||||||
# (u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
|
#(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
|
||||||
# (u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
|
#(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
|
||||||
(u"Pickles", u"http://www.gocomics.com/pickles"),
|
#(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
|
||||||
# (u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
|
#(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
|
||||||
# (u"Pluggers", u"http://www.gocomics.com/pluggers"),
|
(u"Momma", u"http://www.gocomics.com/momma"),
|
||||||
(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
|
#(u"Monty", u"http://www.gocomics.com/monty"),
|
||||||
# (u"PreTeena", u"http://www.gocomics.com/preteena"),
|
#(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
|
||||||
# (u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
|
(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
|
||||||
# (u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
|
#(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
|
||||||
(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
|
#(u"Nancy", u"http://www.gocomics.com/nancy"),
|
||||||
# (u"Red and Rover", u"http://www.gocomics.com/redandrover"),
|
#(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
|
||||||
# (u"Red Meat", u"http://www.gocomics.com/redmeat"),
|
#(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
|
||||||
# (u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
|
#(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
|
||||||
# (u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
|
#(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
|
||||||
# (u"Rubes", u"http://www.gocomics.com/rubes"),
|
#(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
|
||||||
# (u"Scary Gary", u"http://www.gocomics.com/scarygary"),
|
#(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
|
||||||
(u"Shoe", u"http://www.gocomics.com/shoe"),
|
#(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
|
||||||
# (u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
|
#(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
|
||||||
# (u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
|
#(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
|
||||||
# (u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
|
#(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
|
||||||
# (u"Speed Bump", u"http://www.gocomics.com/speedbump"),
|
#(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
|
||||||
# (u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
|
#(u"Overboard", u"http://www.gocomics.com/overboard"),
|
||||||
(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
|
#(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
|
||||||
# (u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
|
(u"Peanuts", u"http://www.gocomics.com/peanuts"),
|
||||||
# (u"Sylvia", u"http://www.gocomics.com/sylvia"),
|
#(u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
|
||||||
# (u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
|
#(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
|
||||||
# (u"Tiny Sepuku", u"http://www.gocomics.com/tinysepuku"),
|
#(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
|
||||||
# (u"TOBY", u"http://www.gocomics.com/toby"),
|
(u"Pickles", u"http://www.gocomics.com/pickles"),
|
||||||
# (u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
|
#(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
|
||||||
# (u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
|
#(u"Pluggers", u"http://www.gocomics.com/pluggers"),
|
||||||
# (u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
|
#(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
|
||||||
# (u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
|
#(u"PreTeena", u"http://www.gocomics.com/preteena"),
|
||||||
# (u"Wee Pals", u"http://www.gocomics.com/weepals"),
|
#(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
|
||||||
# (u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
|
#(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
|
||||||
(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
|
#(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
|
||||||
# (u"Working It Out", u"http://www.gocomics.com/workingitout"),
|
#(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
|
||||||
# (u"Yenny", u"http://www.gocomics.com/yenny"),
|
#(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
|
||||||
# (u"Zack Hill", u"http://www.gocomics.com/zackhill"),
|
#(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
|
||||||
(u"Ziggy", u"http://www.gocomics.com/ziggy"),
|
#(u"Red Meat", u"http://www.gocomics.com/redmeat"),
|
||||||
######## COMICS - EDITORIAL ########
|
#(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
|
||||||
("Lalo Alcaraz","http://www.gocomics.com/laloalcaraz"),
|
#(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
|
||||||
("Nick Anderson","http://www.gocomics.com/nickanderson"),
|
#(u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
|
||||||
("Chuck Asay","http://www.gocomics.com/chuckasay"),
|
#(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
|
||||||
("Tony Auth","http://www.gocomics.com/tonyauth"),
|
#(u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
|
||||||
("Donna Barstow","http://www.gocomics.com/donnabarstow"),
|
#(u"Rubes", u"http://www.gocomics.com/rubes"),
|
||||||
# ("Bruce Beattie","http://www.gocomics.com/brucebeattie"),
|
#(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
|
||||||
# ("Clay Bennett","http://www.gocomics.com/claybennett"),
|
#(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
|
||||||
# ("Lisa Benson","http://www.gocomics.com/lisabenson"),
|
#(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
|
||||||
# ("Steve Benson","http://www.gocomics.com/stevebenson"),
|
#(u"Shoe", u"http://www.gocomics.com/shoe"),
|
||||||
# ("Chip Bok","http://www.gocomics.com/chipbok"),
|
#(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
|
||||||
# ("Steve Breen","http://www.gocomics.com/stevebreen"),
|
#(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
|
||||||
# ("Chris Britt","http://www.gocomics.com/chrisbritt"),
|
#(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
|
||||||
# ("Stuart Carlson","http://www.gocomics.com/stuartcarlson"),
|
#(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
|
||||||
# ("Ken Catalino","http://www.gocomics.com/kencatalino"),
|
#(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
|
||||||
# ("Paul Conrad","http://www.gocomics.com/paulconrad"),
|
#(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
|
||||||
# ("Jeff Danziger","http://www.gocomics.com/jeffdanziger"),
|
#(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
|
||||||
# ("Matt Davies","http://www.gocomics.com/mattdavies"),
|
#(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
|
||||||
# ("John Deering","http://www.gocomics.com/johndeering"),
|
#(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
|
||||||
# ("Bob Gorrell","http://www.gocomics.com/bobgorrell"),
|
#(u"Sylvia", u"http://www.gocomics.com/sylvia"),
|
||||||
# ("Walt Handelsman","http://www.gocomics.com/walthandelsman"),
|
#(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
|
||||||
# ("Clay Jones","http://www.gocomics.com/clayjones"),
|
#(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
|
||||||
# ("Kevin Kallaugher","http://www.gocomics.com/kevinkallaugher"),
|
#(u"That's Life", u"http://www.gocomics.com/thatslife"),
|
||||||
# ("Steve Kelley","http://www.gocomics.com/stevekelley"),
|
#(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
|
||||||
# ("Dick Locher","http://www.gocomics.com/dicklocher"),
|
#(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
|
||||||
# ("Chan Lowe","http://www.gocomics.com/chanlowe"),
|
#(u"The Barn", u"http://www.gocomics.com/thebarn"),
|
||||||
# ("Mike Luckovich","http://www.gocomics.com/mikeluckovich"),
|
#(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
|
||||||
# ("Gary Markstein","http://www.gocomics.com/garymarkstein"),
|
#(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
|
||||||
# ("Glenn McCoy","http://www.gocomics.com/glennmccoy"),
|
#(u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
|
||||||
# ("Jim Morin","http://www.gocomics.com/jimmorin"),
|
#(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
|
||||||
# ("Jack Ohman","http://www.gocomics.com/jackohman"),
|
#(u"The City", u"http://www.gocomics.com/thecity"),
|
||||||
# ("Pat Oliphant","http://www.gocomics.com/patoliphant"),
|
#(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
|
||||||
# ("Joel Pett","http://www.gocomics.com/joelpett"),
|
#(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
|
||||||
# ("Ted Rall","http://www.gocomics.com/tedrall"),
|
#(u"The Duplex", u"http://www.gocomics.com/duplex"),
|
||||||
# ("Michael Ramirez","http://www.gocomics.com/michaelramirez"),
|
#(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
|
||||||
# ("Marshall Ramsey","http://www.gocomics.com/marshallramsey"),
|
#(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
|
||||||
# ("Steve Sack","http://www.gocomics.com/stevesack"),
|
#(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
|
||||||
# ("Ben Sargent","http://www.gocomics.com/bensargent"),
|
#(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
|
||||||
# ("Drew Sheneman","http://www.gocomics.com/drewsheneman"),
|
#(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
|
||||||
# ("John Sherffius","http://www.gocomics.com/johnsherffius"),
|
#(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
|
||||||
# ("Small World","http://www.gocomics.com/smallworld"),
|
#(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
|
||||||
# ("Scott Stantis","http://www.gocomics.com/scottstantis"),
|
#(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
|
||||||
# ("Wayne Stayskal","http://www.gocomics.com/waynestayskal"),
|
#(u"The Norm", u"http://www.gocomics.com/thenorm"),
|
||||||
# ("Dana Summers","http://www.gocomics.com/danasummers"),
|
#(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
|
||||||
# ("Paul Szep","http://www.gocomics.com/paulszep"),
|
#(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
|
||||||
# ("Mike Thompson","http://www.gocomics.com/mikethompson"),
|
#(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
|
||||||
# ("Tom Toles","http://www.gocomics.com/tomtoles"),
|
#(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
|
||||||
# ("Gary Varvel","http://www.gocomics.com/garyvarvel"),
|
#(u"TOBY", u"http://www.gocomics.com/toby"),
|
||||||
# ("ViewsAfrica","http://www.gocomics.com/viewsafrica"),
|
#(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
|
||||||
# ("ViewsAmerica","http://www.gocomics.com/viewsamerica"),
|
#(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
|
||||||
# ("ViewsAsia","http://www.gocomics.com/viewsasia"),
|
#(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
|
||||||
# ("ViewsBusiness","http://www.gocomics.com/viewsbusiness"),
|
#(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
|
||||||
# ("ViewsEurope","http://www.gocomics.com/viewseurope"),
|
#(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
|
||||||
# ("ViewsLatinAmerica","http://www.gocomics.com/viewslatinamerica"),
|
#(u"Wee Pals", u"http://www.gocomics.com/weepals"),
|
||||||
# ("ViewsMidEast","http://www.gocomics.com/viewsmideast"),
|
#(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
|
||||||
# ("Views of the World","http://www.gocomics.com/viewsoftheworld"),
|
#(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
|
||||||
# ("Kerry Waghorn","http://www.gocomics.com/facesinthenews"),
|
#(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
|
||||||
# ("Dan Wasserman","http://www.gocomics.com/danwasserman"),
|
#(u"Working It Out", u"http://www.gocomics.com/workingitout"),
|
||||||
# ("Signe Wilkinson","http://www.gocomics.com/signewilkinson"),
|
#(u"Yenny", u"http://www.gocomics.com/yenny"),
|
||||||
# ("Wit of the World","http://www.gocomics.com/witoftheworld"),
|
#(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
|
||||||
# ("Don Wright","http://www.gocomics.com/donwright"),
|
(u"Ziggy", u"http://www.gocomics.com/ziggy"),
|
||||||
|
#
|
||||||
|
######## EDITORIAL CARTOONS #####################
|
||||||
|
(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
|
||||||
|
#(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
|
||||||
|
#(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
|
||||||
|
#(u"Bill Day", u"http://www.gocomics.com/billday"),
|
||||||
|
#(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
|
||||||
|
#(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
|
||||||
|
(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
|
||||||
|
#(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
|
||||||
|
#(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
|
||||||
|
#(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
|
||||||
|
#(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
|
||||||
|
#(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
|
||||||
|
#(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
|
||||||
|
#(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
|
||||||
|
#(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
|
||||||
|
#(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
|
||||||
|
#(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
|
||||||
|
#(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
|
||||||
|
#(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
|
||||||
|
#(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
|
||||||
|
(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
|
||||||
|
#(u"Don Wright",u"http://www.gocomics.com/donwright"),
|
||||||
|
#(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
|
||||||
|
#(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
|
||||||
|
#(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
|
||||||
|
#(u"Ed Stein", u"http://www.gocomics.com/edstein"),
|
||||||
|
#(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
|
||||||
|
#(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
|
||||||
|
#(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
|
||||||
|
#(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
|
||||||
|
#(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
|
||||||
|
#(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
|
||||||
|
#(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
|
||||||
|
#(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
|
||||||
|
#(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
|
||||||
|
#(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
|
||||||
|
#(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
|
||||||
|
#(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
|
||||||
|
#(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
|
||||||
|
#(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
|
||||||
|
#(u"John Cole", u"http://www.gocomics.com/johncole"),
|
||||||
|
#(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
|
||||||
|
#(u"John Deering",u"http://www.gocomics.com/johndeering"),
|
||||||
|
#(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
|
||||||
|
#(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
|
||||||
|
#(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
|
||||||
|
#(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
|
||||||
|
#(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
|
||||||
|
#(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
|
||||||
|
#(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
|
||||||
|
#(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
|
||||||
|
#(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
|
||||||
|
#(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
|
||||||
|
#(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
|
||||||
|
#(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
|
||||||
|
#(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
|
||||||
|
#(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
|
||||||
|
#(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
|
||||||
|
#(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
|
||||||
|
#(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
|
||||||
|
#(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
|
||||||
|
#(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
|
||||||
|
#(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
|
||||||
|
#(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
|
||||||
|
#(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
|
||||||
|
#(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
|
||||||
|
#(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
|
||||||
|
#(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
|
||||||
|
#(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
|
||||||
|
#(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
|
||||||
|
#(u"Small World",u"http://www.gocomics.com/smallworld"),
|
||||||
|
#(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
|
||||||
|
#(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
|
||||||
|
#(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
|
||||||
|
#(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
|
||||||
|
#(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
|
||||||
|
#(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
|
||||||
|
#(u"(Th)ink", u"http://www.gocomics.com/think"),
|
||||||
|
#(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
|
||||||
|
(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
|
||||||
|
#(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
|
||||||
|
#(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
|
||||||
|
#(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
|
||||||
|
#(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
|
||||||
|
#(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
|
||||||
|
#(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
|
||||||
|
#(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
|
||||||
|
#(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
|
||||||
|
(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
|
||||||
|
#(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
|
||||||
|
#(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
|
||||||
]:
|
]:
|
||||||
print 'Working on: ', title
|
print 'Working on: ', title
|
||||||
articles = self.make_links(url)
|
articles = self.make_links(url)
|
||||||
@ -352,3 +445,4 @@ class GoComics(BasicNewsRecipe):
|
|||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ class Guardian(BasicNewsRecipe):
|
|||||||
# List of section titles to ignore
|
# List of section titles to ignore
|
||||||
# For example: ['Sport']
|
# For example: ['Sport']
|
||||||
ignore_sections = []
|
ignore_sections = []
|
||||||
|
|
||||||
timefmt = ' [%a, %d %b %Y]'
|
timefmt = ' [%a, %d %b %Y]'
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
|
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
|
||||||
@ -87,8 +87,14 @@ class Guardian(BasicNewsRecipe):
|
|||||||
idx = soup.find('div', id='book-index')
|
idx = soup.find('div', id='book-index')
|
||||||
for s in idx.findAll('strong', attrs={'class':'book'}):
|
for s in idx.findAll('strong', attrs={'class':'book'}):
|
||||||
a = s.find('a', href=True)
|
a = s.find('a', href=True)
|
||||||
yield (self.tag_to_string(a), a['href'])
|
section_title = self.tag_to_string(a)
|
||||||
|
if not section_title in self.ignore_sections:
|
||||||
|
prefix = ''
|
||||||
|
if section_title != 'Main section':
|
||||||
|
prefix = section_title + ': '
|
||||||
|
for subsection in s.parent.findAll('a', attrs={'class':'book-section'}):
|
||||||
|
yield (prefix + self.tag_to_string(subsection), subsection['href'])
|
||||||
|
|
||||||
def find_articles(self, url):
|
def find_articles(self, url):
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
div = soup.find('div', attrs={'class':'book-index'})
|
div = soup.find('div', attrs={'class':'book-index'})
|
||||||
@ -109,15 +115,12 @@ class Guardian(BasicNewsRecipe):
|
|||||||
'title': title, 'url':url, 'description':desc,
|
'title': title, 'url':url, 'description':desc,
|
||||||
'date' : strftime('%a, %d %b'),
|
'date' : strftime('%a, %d %b'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
try:
|
try:
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, href in self.find_sections():
|
for title, href in self.find_sections():
|
||||||
if not title in self.ignore_sections:
|
feeds.append((title, list(self.find_articles(href))))
|
||||||
feeds.append((title, list(self.find_articles(href))))
|
|
||||||
return feeds
|
return feeds
|
||||||
except:
|
except:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
class HBR(BasicNewsRecipe):
|
class HBR(BasicNewsRecipe):
|
||||||
|
|
||||||
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||||
INDEX = 'http://hbr.org/current'
|
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||||
|
'superNavHeadContainer', 'hbrDisqus',
|
||||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||||
dict(name='iframe')]
|
dict(name='iframe')]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def hbr_get_toc(self):
|
def hbr_get_toc(self):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
today = date.today()
|
||||||
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
|
future = today + timedelta(days=30)
|
||||||
return self.index_to_soup('http://hbr.org'+url)
|
for x in [x.strftime('%y%m') for x in (future, today)]:
|
||||||
|
url = self.INDEX + x
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
if not soup.find(text='Issue Not Found'):
|
||||||
|
return soup
|
||||||
|
raise Exception('Could not find current issue')
|
||||||
|
|
||||||
def hbr_parse_section(self, container, feeds):
|
def hbr_parse_section(self, container, feeds):
|
||||||
current_section = None
|
current_section = None
|
||||||
|
@ -6,7 +6,7 @@ class HBR(BasicNewsRecipe):
|
|||||||
title = 'Harvard Business Review Blogs'
|
title = 'Harvard Business Review Blogs'
|
||||||
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman, enhanced by BrianG'
|
__author__ = 'Kovid Goyal, enhanced by BrianG'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
|
52
recipes/heise_online.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Heise-online'
|
||||||
|
description = 'News vom Heise-Verlag'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 35
|
||||||
|
rescale_images = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
timeout = 5
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
|
||||||
|
remove_tags = [dict(id='navi_top_container'),
|
||||||
|
dict(id='navi_bottom'),
|
||||||
|
dict(id='mitte_rechts'),
|
||||||
|
dict(id='navigation'),
|
||||||
|
dict(id='subnavi'),
|
||||||
|
dict(id='social_bookmarks'),
|
||||||
|
dict(id='permalink'),
|
||||||
|
dict(id='content_foren'),
|
||||||
|
dict(id='seiten_navi'),
|
||||||
|
dict(id='adbottom'),
|
||||||
|
dict(id='sitemap')]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
|
||||||
|
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
|
||||||
|
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
|
||||||
|
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
|
||||||
|
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
|
||||||
|
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
|
||||||
|
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
|
||||||
|
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
|
||||||
|
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
|
||||||
|
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
|
||||||
|
('iX', 'http://www.heise.de/ix/news/news.rdf'),
|
||||||
|
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
|
||||||
|
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
|
||||||
|
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
|
||||||
|
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
|
||||||
|
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
|
||||||
|
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?view=print'
|
||||||
|
|
BIN
recipes/icons/ambito_financiero.png
Normal file
After Width: | Height: | Size: 508 B |
BIN
recipes/icons/athens_news.png
Normal file
After Width: | Height: | Size: 514 B |
BIN
recipes/icons/buenosaireseconomico.png
Normal file
After Width: | Height: | Size: 400 B |
BIN
recipes/icons/elclubdelebook.png
Normal file
After Width: | Height: | Size: 5.3 KiB |
Before Width: | Height: | Size: 770 B After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/financial_times.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
recipes/icons/financial_times_uk.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
recipes/icons/observatorul_cultural.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/pecat.png
Normal file
After Width: | Height: | Size: 383 B |
BIN
recipes/icons/stiintasitehnica.png
Normal file
After Width: | Height: | Size: 703 B |
43
recipes/idg_now.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class IDGNow(BasicNewsRecipe):
|
||||||
|
title = 'IDG Now!'
|
||||||
|
__author__ = 'Diniz Bortolotto'
|
||||||
|
description = 'Posts do IDG Now!'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Now!Digital Business Ltda.'
|
||||||
|
category = 'technology, telecom, IT, Brazil'
|
||||||
|
language = 'pt_BR'
|
||||||
|
publication_type = 'technology portal'
|
||||||
|
use_embedded_content = False
|
||||||
|
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
link = article.get('link', None)
|
||||||
|
if link is None:
|
||||||
|
return article
|
||||||
|
if link.split('/')[-1]=="story01.htm":
|
||||||
|
link=link.split('/')[-2]
|
||||||
|
a=['0B','0C','0D','0E','0F','0G','0I','0N' ,'0L0S','0A','0J3A']
|
||||||
|
b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'_','.com','www.','0',':']
|
||||||
|
for i in range(0,len(a)):
|
||||||
|
link=link.replace(a[i],b[i])
|
||||||
|
link=link.split('&')[-3]
|
||||||
|
link=link.split('=')[1]
|
||||||
|
link=link + "/IDGNoticiaPrint_view"
|
||||||
|
return link
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultimas noticias', u'http://rss.idgnow.com.br/c/32184/f/499640/index.rss'),
|
||||||
|
(u'Computa\xe7\xe3o Corporativa', u'http://rss.idgnow.com.br/c/32184/f/499643/index.rss'),
|
||||||
|
(u'Carreira', u'http://rss.idgnow.com.br/c/32184/f/499644/index.rss'),
|
||||||
|
(u'Computa\xe7\xe3o Pessoal', u'http://rss.idgnow.com.br/c/32184/f/499645/index.rss'),
|
||||||
|
(u'Internet', u'http://rss.idgnow.com.br/c/32184/f/499646/index.rss'),
|
||||||
|
(u'Mercado', u'http://rss.idgnow.com.br/c/32184/f/419982/index.rss'),
|
||||||
|
(u'Seguran\xe7a', u'http://rss.idgnow.com.br/c/32184/f/499647/index.rss'),
|
||||||
|
(u'Telecom e Redes', u'http://rss.idgnow.com.br/c/32184/f/499648/index.rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
reverse_article_order = True
|
@ -6,7 +6,7 @@ class TheIndependent(BasicNewsRecipe):
|
|||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
oldest_article = 1 #days
|
oldest_article = 1 #days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 30
|
||||||
encoding = 'latin1'
|
encoding = 'latin1'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -25,24 +25,39 @@ class TheIndependent(BasicNewsRecipe):
|
|||||||
'http://www.independent.co.uk/news/uk/rss'),
|
'http://www.independent.co.uk/news/uk/rss'),
|
||||||
('World',
|
('World',
|
||||||
'http://www.independent.co.uk/news/world/rss'),
|
'http://www.independent.co.uk/news/world/rss'),
|
||||||
('Sport',
|
|
||||||
'http://www.independent.co.uk/sport/rss'),
|
|
||||||
('Arts and Entertainment',
|
|
||||||
'http://www.independent.co.uk/arts-entertainment/rss'),
|
|
||||||
('Business',
|
('Business',
|
||||||
'http://www.independent.co.uk/news/business/rss'),
|
'http://www.independent.co.uk/news/business/rss'),
|
||||||
('Life and Style',
|
|
||||||
'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'),
|
|
||||||
('Science',
|
|
||||||
'http://www.independent.co.uk/news/science/rss'),
|
|
||||||
('People',
|
('People',
|
||||||
'http://www.independent.co.uk/news/people/rss'),
|
'http://www.independent.co.uk/news/people/rss'),
|
||||||
|
('Science',
|
||||||
|
'http://www.independent.co.uk/news/science/rss'),
|
||||||
('Media',
|
('Media',
|
||||||
'http://www.independent.co.uk/news/media/rss'),
|
'http://www.independent.co.uk/news/media/rss'),
|
||||||
('Health and Families',
|
('Education',
|
||||||
'http://www.independent.co.uk/life-style/health-and-families/rss'),
|
'http://www.independent.co.uk/news/education/rss'),
|
||||||
('Obituaries',
|
('Obituaries',
|
||||||
'http://www.independent.co.uk/news/obituaries/rss'),
|
'http://www.independent.co.uk/news/obituaries/rss'),
|
||||||
|
|
||||||
|
('Opinion',
|
||||||
|
'http://www.independent.co.uk/opinion/rss'),
|
||||||
|
|
||||||
|
('Environment',
|
||||||
|
'http://www.independent.co.uk/environment/rss'),
|
||||||
|
|
||||||
|
('Sport',
|
||||||
|
'http://www.independent.co.uk/sport/rss'),
|
||||||
|
|
||||||
|
('Life and Style',
|
||||||
|
'http://www.independent.co.uk/life-style/rss'),
|
||||||
|
|
||||||
|
('Arts and Entertainment',
|
||||||
|
'http://www.independent.co.uk/arts-entertainment/rss'),
|
||||||
|
|
||||||
|
('Travel',
|
||||||
|
'http://www.independent.co.uk/travel/rss'),
|
||||||
|
|
||||||
|
('Money',
|
||||||
|
'http://www.independent.co.uk/money/rss'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
infobae.com
|
infobae.com
|
||||||
'''
|
'''
|
||||||
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Infobae(BasicNewsRecipe):
|
class Infobae(BasicNewsRecipe):
|
||||||
title = 'Infobae.com'
|
title = 'Infobae.com'
|
||||||
__author__ = 'Darko Miletic and Sujata Raman'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Infobae.com es el sitio de noticias con mayor actualizacion de Latinoamérica. Noticias actualizadas las 24 horas, los 365 días del año.'
|
||||||
publisher = 'Infobae.com'
|
publisher = 'Infobae.com'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
@ -17,13 +17,13 @@ class Infobae(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
encoding = 'cp1252'
|
encoding = 'utf8'
|
||||||
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
|
masthead_url = 'http://www.infobae.com/media/img/static/logo-infobae.gif'
|
||||||
remove_javascript = True
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body{font-family:Arial,Helvetica,sans-serif;}
|
body{font-family: Arial,Helvetica,sans-serif}
|
||||||
.popUpTitulo{color:#0D4261; font-size: xx-large}
|
img{display: block}
|
||||||
|
.categoria{font-size: small; text-transform: uppercase}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -31,26 +31,44 @@ class Infobae(BasicNewsRecipe):
|
|||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
, 'linearize_tables' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(attrs={'class':['titularnota','nota','post-title','post-entry','entry-title','entry-info','entry-content']})]
|
||||||
|
remove_tags_after = dict(attrs={'class':['interior-noticia','nota-desc','tags']})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['base','meta','link','iframe','object','embed','ins'])
|
||||||
|
,dict(attrs={'class':['barranota','tags']})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
(u'Saludable' , u'http://www.infobae.com/rss/saludable.xml')
|
||||||
,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' )
|
,(u'Economia' , u'http://www.infobae.com/rss/economia.xml' )
|
||||||
,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml')
|
,(u'En Numeros', u'http://www.infobae.com/rss/rating.xml' )
|
||||||
,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' )
|
,(u'Finanzas' , u'http://www.infobae.com/rss/finanzas.xml' )
|
||||||
|
,(u'Mundo' , u'http://www.infobae.com/rss/mundo.xml' )
|
||||||
|
,(u'Sociedad' , u'http://www.infobae.com/rss/sociedad.xml' )
|
||||||
|
,(u'Politica' , u'http://www.infobae.com/rss/politica.xml' )
|
||||||
|
,(u'Deportes' , u'http://www.infobae.com/rss/deportes.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def preprocess_html(self, soup):
|
||||||
article_part = url.rpartition('/')[2]
|
for item in soup.findAll(style=True):
|
||||||
article_id= article_part.partition('-')[0]
|
del item['style']
|
||||||
return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
def postprocess_html(self, soup, first):
|
if item.string is not None:
|
||||||
for tag in soup.findAll(name='strong'):
|
str = item.string
|
||||||
tag.name = 'b'
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':['contenu']})
|
dict(name='div', attrs={'class':['contenu']})
|
||||||
]
|
]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
|
||||||
remove_tags_after = [dict(id='appel_temoignage')]
|
remove_tags_after = [dict(id='appel_temoignage')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
@ -14,7 +14,7 @@ class LeTemps(BasicNewsRecipe):
|
|||||||
title = u'Le Temps'
|
title = u'Le Temps'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'Sujata Raman'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'French news. Needs a subscription from http://www.letemps.ch'
|
description = 'French news. Needs a subscription from http://www.letemps.ch'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -27,6 +27,7 @@ class LeTemps(BasicNewsRecipe):
|
|||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
br.open('http://www.letemps.ch/login')
|
br.open('http://www.letemps.ch/login')
|
||||||
|
br.select_form(nr=1)
|
||||||
br['username'] = self.username
|
br['username'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
mondediplo.com
|
mondediplo.com
|
||||||
'''
|
'''
|
||||||
@ -11,7 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
||||||
title = 'Le Monde diplomatique - English edition'
|
title = 'Le Monde diplomatique - English edition'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Real journalism making sense of the world around us'
|
description = "Le Monde diplomatique is the place you go when you want to know what's really happening. This is a major international paper that is truly independent, that sees the world in fresh ways, that focuses on places no other publications reach. We offer a clear, considered view of the conflicting interests and complexities of a modern global world. LMD in English is a concise version of the Paris-based parent edition, publishing all the major stories each month, expertly translated, and with some London-based commissions too. We offer a taster of LMD quality on our website where a selection of articles are available each month."
|
||||||
publisher = 'Le Monde diplomatique'
|
publisher = 'Le Monde diplomatique'
|
||||||
category = 'news, politics, world'
|
category = 'news, politics, world'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -26,13 +26,19 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
|||||||
INDEX = PREFIX + strftime('%Y/%m/')
|
INDEX = PREFIX + strftime('%Y/%m/')
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
extra_css = ' body{font-family: "Luxi sans","Lucida sans","Lucida Grande",Lucida,"Lucida Sans Unicode",sans-serif} .surtitre{font-size: 1.2em; font-variant: small-caps; margin-bottom: 0.5em} .chapo{font-size: 1.2em; font-weight: bold; margin: 1em 0 0.5em} .texte{font-family: Georgia,"Times New Roman",serif} h1{color: #990000} .notes{border-top: 1px solid #CCCCCC; font-size: 0.9em; line-height: 1.4em} '
|
extra_css = """
|
||||||
|
body{font-family: "Luxi sans","Lucida sans","Lucida Grande",Lucida,"Lucida Sans Unicode",sans-serif}
|
||||||
|
.surtitre{font-size: 1.2em; font-variant: small-caps; margin-bottom: 0.5em}
|
||||||
|
.chapo{font-size: 1.2em; font-weight: bold; margin: 1em 0 0.5em}
|
||||||
|
.texte{font-family: Georgia,"Times New Roman",serif} h1{color: #990000}
|
||||||
|
.notes{border-top: 1px solid #CCCCCC; font-size: 0.9em; line-height: 1.4em}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
@ -46,12 +52,12 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
|||||||
br.open(self.LOGIN,data)
|
br.open(self.LOGIN,data)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
keep_only_tags =[
|
keep_only_tags =[
|
||||||
dict(name='div', attrs={'id':'contenu'})
|
dict(name='div', attrs={'id':'contenu'})
|
||||||
, dict(name='div',attrs={'class':'notes surlignable'})
|
, dict(name='div',attrs={'class':'notes surlignable'})
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name=['object','link','script','iframe','base'])]
|
remove_tags = [dict(name=['object','link','script','iframe','base'])]
|
||||||
remove_attributes = ['height','width']
|
remove_attributes = ['height','width','name','lang']
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
articles = []
|
||||||
@ -75,3 +81,24 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
|||||||
})
|
})
|
||||||
return [(self.title, articles)]
|
return [(self.title, articles)]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover_url = None
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
cover_item = soup.find('div',attrs={'class':'current'})
|
||||||
|
if cover_item:
|
||||||
|
ap = cover_item.find('img',attrs={'class':'spip_logos'})
|
||||||
|
if ap:
|
||||||
|
cover_url = self.INDEX + ap['src']
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
||||||
|
@ -1,239 +1,28 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
class AdvancedUserRecipe1308306308(BasicNewsRecipe):
|
||||||
|
|
||||||
'''
|
|
||||||
macleans.ca
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from datetime import timedelta, date
|
|
||||||
|
|
||||||
class Macleans(BasicNewsRecipe):
|
|
||||||
title = u'Macleans Magazine'
|
title = u'Macleans Magazine'
|
||||||
__author__ = 'Nick Redding'
|
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
description = ('Macleans Magazine')
|
__author__ = 'sexymax15'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 12
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
timefmt = ' [%b %d]'
|
remove_javascript = True
|
||||||
|
remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
|
||||||
|
remove_tags_after = {'class':'postmetadata'}
|
||||||
|
|
||||||
# customization notes: delete sections you are not interested in
|
feeds = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
|
||||||
# set oldest_article to the maximum number of days back from today to include articles
|
(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
|
||||||
sectionlist = [
|
(u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
|
||||||
['http://www2.macleans.ca/','Front Page'],
|
(u'Business', u'http://www2.macleans.ca/category/business/feed/'),
|
||||||
['http://www2.macleans.ca/category/canada/','Canada'],
|
(u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
|
||||||
['http://www2.macleans.ca/category/world-from-the-magazine/','World'],
|
(u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
|
||||||
['http://www2.macleans.ca/category/business','Business'],
|
(u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
|
||||||
['http://www2.macleans.ca/category/arts-culture/','Culture'],
|
(u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
|
||||||
['http://www2.macleans.ca/category/opinion','Opinion'],
|
def print_version(self, url):
|
||||||
['http://www2.macleans.ca/category/health-from-the-magazine/','Health'],
|
return url + 'print/'
|
||||||
['http://www2.macleans.ca/category/environment-from-the-magazine/','Environment'],
|
|
||||||
['http://www2.macleans.ca/category/education/','On Campus'],
|
|
||||||
['http://www2.macleans.ca/category/travel-from-the-magazine/','Travel']
|
|
||||||
]
|
|
||||||
oldest_article = 7
|
|
||||||
|
|
||||||
# formatting for print version of articles
|
|
||||||
extra_css = '''h2{font-family:Times,serif; font-size:large;}
|
|
||||||
small {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# tag handling for print version of articles
|
|
||||||
keep_only_tags = [dict(id='tw-print')]
|
|
||||||
remove_tags = [dict({'class':'postmetadata'})]
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self,soup):
|
|
||||||
for img_tag in soup.findAll('img'):
|
|
||||||
parent_tag = img_tag.parent
|
|
||||||
if parent_tag.name == 'a':
|
|
||||||
new_tag = Tag(soup,'p')
|
|
||||||
new_tag.insert(0,img_tag)
|
|
||||||
parent_tag.replaceWith(new_tag)
|
|
||||||
elif parent_tag.name == 'p':
|
|
||||||
if not self.tag_to_string(parent_tag) == '':
|
|
||||||
new_div = Tag(soup,'div')
|
|
||||||
new_tag = Tag(soup,'p')
|
|
||||||
new_tag.insert(0,img_tag)
|
|
||||||
parent_tag.replaceWith(new_div)
|
|
||||||
new_div.insert(0,new_tag)
|
|
||||||
new_div.insert(1,parent_tag)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
articles = {}
|
|
||||||
key = None
|
|
||||||
ans = []
|
|
||||||
|
|
||||||
def parse_index_page(page_url,page_title):
|
|
||||||
|
|
||||||
def decode_date(datestr):
|
|
||||||
dmysplit = datestr.strip().lower().split(',')
|
|
||||||
mdsplit = dmysplit[1].split()
|
|
||||||
m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(mdsplit[0])+1
|
|
||||||
d = int(mdsplit[1])
|
|
||||||
y = int(dmysplit[2].split()[0])
|
|
||||||
return date(y,m,d)
|
|
||||||
|
|
||||||
def article_title(tag):
|
|
||||||
atag = tag.find('a',href=True)
|
|
||||||
if not atag:
|
|
||||||
return ''
|
|
||||||
return self.tag_to_string(atag)
|
|
||||||
|
|
||||||
def article_url(tag):
|
|
||||||
atag = tag.find('a',href=True)
|
|
||||||
if not atag:
|
|
||||||
return ''
|
|
||||||
return atag['href']+'print/'
|
|
||||||
|
|
||||||
def article_description(tag):
|
|
||||||
for p_tag in tag.findAll('p'):
|
|
||||||
d = self.tag_to_string(p_tag,False)
|
|
||||||
if not d == '':
|
|
||||||
return d
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def compound_h4_h3_title(tag):
|
|
||||||
if tag.h4:
|
|
||||||
if tag.h3:
|
|
||||||
return self.tag_to_string(tag.h4,False)+u'\u2014'+self.tag_to_string(tag.h3,False)
|
|
||||||
else:
|
|
||||||
return self.tag_to_string(tag.h4,False)
|
|
||||||
elif tag.h3:
|
|
||||||
return self.tag_to_string(tag.h3,False)
|
|
||||||
else:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def compound_h2_h4_title(tag):
|
|
||||||
if tag.h2:
|
|
||||||
if tag.h4:
|
|
||||||
return self.tag_to_string(tag.h2,False)+u'\u2014'+self.tag_to_string(tag.h4,False)
|
|
||||||
else:
|
|
||||||
return self.tag_to_string(tag.h2,False)
|
|
||||||
elif tag.h4:
|
|
||||||
return self.tag_to_string(tag.h4,False)
|
|
||||||
else:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
|
|
||||||
def handle_article(header_tag, outer_tag):
|
|
||||||
if header_tag:
|
|
||||||
url = article_url(header_tag)
|
|
||||||
title = article_title(header_tag)
|
|
||||||
author_date_tag = outer_tag.h4
|
|
||||||
if author_date_tag:
|
|
||||||
author_date = self.tag_to_string(author_date_tag,False).split(' - ')
|
|
||||||
author = author_date[0].strip()
|
|
||||||
article_date = decode_date(author_date[1])
|
|
||||||
earliest_date = date.today() - timedelta(days=self.oldest_article)
|
|
||||||
if article_date < earliest_date:
|
|
||||||
self.log("Skipping article dated %s" % author_date[1])
|
|
||||||
else:
|
|
||||||
excerpt_div = outer_tag.find('div','excerpt')
|
|
||||||
if excerpt_div:
|
|
||||||
description = article_description(excerpt_div)
|
|
||||||
else:
|
|
||||||
description = ''
|
|
||||||
if not articles.has_key(page_title):
|
|
||||||
articles[page_title] = []
|
|
||||||
articles[page_title].append(dict(title=title,url=url,date=author_date[1],description=description,author=author,content=''))
|
|
||||||
|
|
||||||
def handle_category_article(cat, header_tag, outer_tag):
|
|
||||||
url = article_url(header_tag)
|
|
||||||
title = article_title(header_tag)
|
|
||||||
if not title == '':
|
|
||||||
title = cat+u'\u2014'+title
|
|
||||||
a_tag = outer_tag.find('span','authorLink')
|
|
||||||
if a_tag:
|
|
||||||
author = self.tag_to_string(a_tag,False)
|
|
||||||
a_tag.parent.extract()
|
|
||||||
else:
|
|
||||||
author = ''
|
|
||||||
description = article_description(outer_tag)
|
|
||||||
if not articles.has_key(page_title):
|
|
||||||
articles[page_title] = []
|
|
||||||
articles[page_title].append(dict(title=title,url=url,date='',description=description,author=author,content=''))
|
|
||||||
|
|
||||||
|
|
||||||
soup = self.index_to_soup(page_url)
|
|
||||||
|
|
||||||
if page_title == 'Front Page':
|
|
||||||
# special processing for the front page
|
|
||||||
top_stories = soup.find('div',{ "id" : "macleansFeatured" })
|
|
||||||
if top_stories:
|
|
||||||
for div_slide in top_stories.findAll('div','slide'):
|
|
||||||
url = article_url(div_slide)
|
|
||||||
div_title = div_slide.find('div','header')
|
|
||||||
if div_title:
|
|
||||||
title = self.tag_to_string(div_title,False)
|
|
||||||
else:
|
|
||||||
title = ''
|
|
||||||
description = article_description(div_slide)
|
|
||||||
if not articles.has_key(page_title):
|
|
||||||
articles[page_title] = []
|
|
||||||
articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
|
||||||
|
|
||||||
from_macleans = soup.find('div',{ "id" : "fromMacleans" })
|
|
||||||
if from_macleans:
|
|
||||||
for li_tag in from_macleans.findAll('li','fromMacleansArticle'):
|
|
||||||
title = compound_h4_h3_title(li_tag)
|
|
||||||
url = article_url(li_tag)
|
|
||||||
description = article_description(li_tag)
|
|
||||||
if not articles.has_key(page_title):
|
|
||||||
articles[page_title] = []
|
|
||||||
articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
|
||||||
|
|
||||||
blog_central = soup.find('div',{ "id" : "bloglist" })
|
|
||||||
if blog_central:
|
|
||||||
for li_tag in blog_central.findAll('li'):
|
|
||||||
title = compound_h2_h4_title(li_tag)
|
|
||||||
if li_tag.h4:
|
|
||||||
url = article_url(li_tag.h4)
|
|
||||||
if not articles.has_key(page_title):
|
|
||||||
articles[page_title] = []
|
|
||||||
articles[page_title].append(dict(title=title,url=url,date='',description='',author='',content=''))
|
|
||||||
|
|
||||||
# need_to_know = soup.find('div',{ "id" : "needToKnow" })
|
|
||||||
# if need_to_know:
|
|
||||||
# for div_tag in need_to_know('div',attrs={'class' : re.compile("^needToKnowArticle")}):
|
|
||||||
# title = compound_h4_h3_title(div_tag)
|
|
||||||
# url = article_url(div_tag)
|
|
||||||
# description = article_description(div_tag)
|
|
||||||
# if not articles.has_key(page_title):
|
|
||||||
# articles[page_title] = []
|
|
||||||
# articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
|
||||||
|
|
||||||
for news_category in soup.findAll('div','newsCategory'):
|
|
||||||
news_cat = self.tag_to_string(news_category.h4,False)
|
|
||||||
handle_category_article(news_cat, news_category.find('h2'), news_category.find('div'))
|
|
||||||
for news_item in news_category.findAll('li'):
|
|
||||||
handle_category_article(news_cat,news_item.h3,news_item)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# find the div containing the highlight article
|
|
||||||
div_post = soup.find('div','post')
|
|
||||||
if div_post:
|
|
||||||
h1_tag = div_post.h1
|
|
||||||
handle_article(h1_tag,div_post)
|
|
||||||
|
|
||||||
# find the divs containing the rest of the articles
|
|
||||||
div_other = div_post.find('div', { "id" : "categoryOtherPosts" })
|
|
||||||
if div_other:
|
|
||||||
for div_entry in div_other.findAll('div','entry'):
|
|
||||||
h2_tag = div_entry.h2
|
|
||||||
handle_article(h2_tag,div_entry)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for page_name,page_title in self.sectionlist:
|
|
||||||
parse_index_page(page_name,page_title)
|
|
||||||
ans.append(page_title)
|
|
||||||
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
|
||||||
return ans
|
|
||||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdvancedUserRecipe1295081935(BasicNewsRecipe):
|
class AdvancedUserRecipe1295081935(BasicNewsRecipe):
|
||||||
title = u'Mail & Guardian ZA News'
|
title = u'Mail & Guardian ZA News'
|
||||||
__author__ = '77ja65'
|
__author__ = '77ja65'
|
||||||
language = 'en'
|
language = 'en_ZA'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 30
|
max_articles_per_feed = 30
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
@ -3,9 +3,6 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
|
|
||||||
title = u'Max-Planck-Inst.'
|
title = u'Max-Planck-Inst.'
|
||||||
__author__ = 'schuster'
|
__author__ = 'schuster'
|
||||||
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
|
|
||||||
dict(id=['ie_clearing', 'col2', 'col2_content']),
|
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -13,6 +10,11 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|||||||
language = 'de'
|
language = 'de'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['box_url', 'print_kontakt']}),
|
||||||
|
dict(id=['skiplinks'])]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
split_url = url.split("/")
|
split_url = url.split("/")
|
||||||
print_url = 'http://www.mpg.de/print/' + split_url[3]
|
print_url = 'http://www.mpg.de/print/' + split_url[3]
|
||||||
|
138
recipes/menorca.recipe
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.web.feeds import Feed
|
||||||
|
|
||||||
|
class Menorca(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Menorca'
|
||||||
|
publisher = 'Editorial Menorca S.A. '
|
||||||
|
__author__ = 'M. Sintes'
|
||||||
|
description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a'
|
||||||
|
category = 'news, politics, economy, culture, Menorca, Spain '
|
||||||
|
language = 'es'
|
||||||
|
enconding = 'cp1252'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [ (u'Principal',u'http://www.menorca.info/rss'),
|
||||||
|
(u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'),
|
||||||
|
(u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'),
|
||||||
|
(u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'),
|
||||||
|
(u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'),
|
||||||
|
(u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'),
|
||||||
|
(u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'),
|
||||||
|
(u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'),
|
||||||
|
(u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'),
|
||||||
|
(u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'),
|
||||||
|
(u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'),
|
||||||
|
(u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'),
|
||||||
|
(u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'),
|
||||||
|
(u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'),
|
||||||
|
(u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'),
|
||||||
|
(u'Balears', u'http://www.menorca.info/rss?seccion=balears')]
|
||||||
|
|
||||||
|
#Seccions amb link rss erroni. Es recupera directament de la pagina web
|
||||||
|
seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'),
|
||||||
|
(u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'),
|
||||||
|
(u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'compartir'})
|
||||||
|
remove_tags = [dict(id = 'utilidades'),
|
||||||
|
dict(name='div', attrs={'class': 'totalComentarios'}),
|
||||||
|
dict(name='div', attrs={'class': 'compartir'}),
|
||||||
|
dict(name='div', attrs={'class': re.compile("img_noticia*")})
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
url_imprimir = url + '?d=print'
|
||||||
|
return url.replace(url, url_imprimir)
|
||||||
|
|
||||||
|
def feed_to_index_append(self, feedObject, masterFeed):
|
||||||
|
|
||||||
|
# Loop thru the feed object and build the correct type of article list
|
||||||
|
for feed in feedObject:
|
||||||
|
newArticles = []
|
||||||
|
for article in feed.articles:
|
||||||
|
newArt = {
|
||||||
|
'title' : article.title,
|
||||||
|
'url' : article.url,
|
||||||
|
'date' : article.date,
|
||||||
|
'description' : article.text_summary
|
||||||
|
}
|
||||||
|
|
||||||
|
newArticles.append(newArt)
|
||||||
|
|
||||||
|
# append the newly-built list object to the index object # passed in as masterFeed.
|
||||||
|
masterFeed.append((feed.title,newArticles))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
|
||||||
|
rssFeeds = Feed()
|
||||||
|
rssFeeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
self.feed_to_index_append(rssFeeds,feeds)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for (nom_seccio, url_seccio) in self.seccions_web:
|
||||||
|
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
|
||||||
|
soup = self.index_to_soup(url_seccio)
|
||||||
|
for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}):
|
||||||
|
h = article.find(['h2','h3'])
|
||||||
|
titol = self.tag_to_string(h)
|
||||||
|
a = article.find('a', href=True)
|
||||||
|
url = 'http://www.menorca.info' + a['href']
|
||||||
|
|
||||||
|
desc = None
|
||||||
|
autor = ''
|
||||||
|
dt = ''
|
||||||
|
|
||||||
|
soup_art = self.index_to_soup(url)
|
||||||
|
aut = soup_art.find('div', attrs={'class':'autor'})
|
||||||
|
tx = self.tag_to_string(aut)
|
||||||
|
ls = re.split('[,;]',tx)
|
||||||
|
|
||||||
|
t = len(ls)
|
||||||
|
if t >= 1:
|
||||||
|
autor = ls[0]
|
||||||
|
|
||||||
|
if t > 1:
|
||||||
|
d = ls[t-1]
|
||||||
|
|
||||||
|
if len(d) >= 10:
|
||||||
|
lt = len(d) - 10
|
||||||
|
dt = d[lt:]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt)
|
||||||
|
|
||||||
|
articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if articles:
|
||||||
|
feeds.append((nom_seccio, articles))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
|
45
recipes/metro_news_nl.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
|
title = u'Metro Nieuws NL'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = u'DrMerry'
|
||||||
|
description = u'Metro Nederland'
|
||||||
|
language = u'nl'
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
delay = 1
|
||||||
|
# timefmt = ' [%A, %d %B, %Y]'
|
||||||
|
timefmt = ''
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
cover_url = 'http://www.readmetro.com/img/en/metroholland/last/1/small.jpg'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'date'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
|
||||||
|
encoding = 'utf-8'
|
||||||
|
extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph, p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}'
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
|
||||||
|
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
|
||||||
|
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
|
||||||
|
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}),
|
||||||
|
dict(name='iframe')]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
|
||||||
|
(u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
|
||||||
|
(u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
|
||||||
|
(u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
|
||||||
|
(u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
|
||||||
|
(u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
|
||||||
|
(u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
|
||||||
|
(u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
|
||||||
|
(u'Familie', u'http://www.metronieuws.nl/rss.xml?c=1283166782-9'),
|
||||||
|
(u'Blogs', u'http://www.metronieuws.nl/rss.xml?c=1295586825-6'),
|
||||||
|
(u'Reizen', u'http://www.metronieuws.nl/rss.xml?c=1277377288-13'),
|
||||||
|
(u'Carrière', u'http://www.metronieuws.nl/rss.xml?c=1278070988-1'),
|
||||||
|
(u'Sport', u'http://www.metronieuws.nl/rss.xml?c=1277377288-12')
|
||||||
|
]
|
@ -1,29 +1,34 @@
|
|||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
title = u'Metro UK'
|
title = u'Metro UK'
|
||||||
|
description = 'News as provide by The Metro -UK'
|
||||||
no_stylesheets = True
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
|
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
|
||||||
|
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
simultaneous_downloads= 3
|
|
||||||
|
|
||||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||||
|
|
||||||
|
extra_css = 'h2 {font: sans-serif medium;}'
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
|
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
|
||||||
dict(attrs={'class':['img-cnt figure']}),
|
dict(attrs={'class':['img-cnt figure']}),
|
||||||
dict(attrs={'class':['art-img']}),
|
dict(attrs={'class':['art-img']}),
|
||||||
dict(name='h1'),
|
|
||||||
dict(name='h2', attrs={'class':'h2'}),
|
|
||||||
dict(name='div', attrs={'class':'art-lft'})
|
dict(name='div', attrs={'class':'art-lft'})
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
|
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
|
||||||
'commentForm', 'metroCommentInnerWrap',
|
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
|
||||||
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]})]
|
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
|
||||||
|
]
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,17 +1,23 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2011, Eddie Lau'
|
__copyright__ = '2010-2011, Eddie Lau'
|
||||||
|
|
||||||
|
# Region - Hong Kong, Vancouver, Toronto
|
||||||
|
__Region__ = 'Hong Kong'
|
||||||
# Users of Kindle 3 with limited system-level CJK support
|
# Users of Kindle 3 with limited system-level CJK support
|
||||||
# please replace the following "True" with "False".
|
# please replace the following "True" with "False".
|
||||||
__MakePeriodical__ = True
|
__MakePeriodical__ = True
|
||||||
# Turn below to true if your device supports display of CJK titles
|
# Turn below to true if your device supports display of CJK titles
|
||||||
__UseChineseTitle__ = False
|
__UseChineseTitle__ = False
|
||||||
# Trun below to true if you wish to use life.mingpao.com as the main article source
|
# Set it to False if you want to skip images
|
||||||
|
__KeepImages__ = True
|
||||||
|
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||||
__UseLife__ = True
|
__UseLife__ = True
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
|
provide options to remove all images in the file
|
||||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||||
2011/02/28: rearrange the sections
|
2011/02/28: rearrange the sections
|
||||||
@ -34,21 +40,96 @@ Change Log:
|
|||||||
import os, datetime, re
|
import os, datetime, re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
class MPHKRecipe(BasicNewsRecipe):
|
# MAIN CLASS
|
||||||
title = 'Ming Pao - Hong Kong'
|
class MPRecipe(BasicNewsRecipe):
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
title = 'Ming Pao - Hong Kong'
|
||||||
|
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||||
|
category = 'Chinese, News, Hong Kong'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
keep_only_tags = [dict(name='h1'),
|
||||||
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
|
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||||
|
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||||
|
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||||
|
dict(attrs={'class':['photo']}),
|
||||||
|
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||||
|
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
dict(name='img'),
|
||||||
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '<h1>'),
|
||||||
|
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</h1>'),
|
||||||
|
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||||
|
lambda match: ''),
|
||||||
|
# skip <br> after title in life.mingpao.com fetched article
|
||||||
|
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: "<div id='newscontent'>"),
|
||||||
|
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: "</b>")
|
||||||
|
]
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
title = 'Ming Pao - Vancouver'
|
||||||
|
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||||
|
category = 'Chinese, News, Vancouver'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||||
|
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||||
|
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
title = 'Ming Pao - Toronto'
|
||||||
|
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||||
|
category = 'Chinese, News, Toronto'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||||
|
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||||
|
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'Eddie Lau'
|
__author__ = 'Eddie Lau'
|
||||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
|
||||||
publisher = 'MingPao'
|
publisher = 'MingPao'
|
||||||
category = 'Chinese, News, Hong Kong'
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -57,33 +138,6 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
recursions = 0
|
recursions = 0
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
|
||||||
keep_only_tags = [dict(name='h1'),
|
|
||||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
|
||||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
|
||||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
|
||||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
|
||||||
dict(attrs={'class':['photo']}),
|
|
||||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
|
||||||
]
|
|
||||||
remove_tags = [dict(name='style'),
|
|
||||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
|
||||||
dict(name='table')] # for content fetched from life.mingpao.com
|
|
||||||
remove_attributes = ['width']
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
|
||||||
lambda match: '<h1>'),
|
|
||||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
|
||||||
lambda match: '</h1>'),
|
|
||||||
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
|
||||||
lambda match: ''),
|
|
||||||
# skip <br> after title in life.mingpao.com fetched article
|
|
||||||
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
|
||||||
lambda match: "<div id='newscontent'>"),
|
|
||||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
|
||||||
lambda match: "</b>")
|
|
||||||
]
|
|
||||||
|
|
||||||
def image_url_processor(cls, baseurl, url):
|
def image_url_processor(cls, baseurl, url):
|
||||||
# trick: break the url at the first occurance of digit, add an additional
|
# trick: break the url at the first occurance of digit, add an additional
|
||||||
@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_dtlocal(self):
|
def get_dtlocal(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
if __Region__ == 'Hong Kong':
|
||||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||||
|
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||||
|
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
|
||||||
|
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
|
||||||
return dt_local
|
return dt_local
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchday(self):
|
def get_fetchday(self):
|
||||||
# dt_utc = datetime.datetime.utcnow()
|
|
||||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
|
||||||
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
|
||||||
return self.get_dtlocal().strftime("%d")
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
if __Region__ == 'Hong Kong':
|
||||||
|
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
try:
|
try:
|
||||||
br.open(cover)
|
br.open(cover)
|
||||||
@ -153,76 +219,104 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
feeds = []
|
feeds = []
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
|
|
||||||
if __UseLife__:
|
if __Region__ == 'Hong Kong':
|
||||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
if __UseLife__:
|
||||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||||
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||||
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||||
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||||
articles = self.parse_section2(url, keystr)
|
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||||
|
articles = self.parse_section2(url, keystr)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
else:
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||||
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special- editorial
|
||||||
|
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||||
|
if ed_articles:
|
||||||
|
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special - finance
|
||||||
|
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||||
|
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||||
|
if fin_articles:
|
||||||
|
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
|
|
||||||
|
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special - entertainment
|
||||||
|
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
|
if ent_articles:
|
||||||
|
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
|
||||||
|
# special- columns
|
||||||
|
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||||
|
if col_articles:
|
||||||
|
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||||
|
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||||
|
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
|
||||||
|
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||||
|
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
|
||||||
articles = self.parse_section(url)
|
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
|
||||||
|
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||||
|
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
else:
|
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
|
||||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
|
||||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
|
||||||
articles = self.parse_section(url)
|
|
||||||
if articles:
|
|
||||||
feeds.append((title, articles))
|
|
||||||
|
|
||||||
# special- editorial
|
|
||||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
|
||||||
if ed_articles:
|
|
||||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
|
||||||
|
|
||||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
|
||||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
|
||||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
|
||||||
articles = self.parse_section(url)
|
|
||||||
if articles:
|
|
||||||
feeds.append((title, articles))
|
|
||||||
|
|
||||||
# special - finance
|
|
||||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
|
||||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
|
||||||
if fin_articles:
|
|
||||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
|
||||||
|
|
||||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
|
||||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
|
||||||
articles = self.parse_section(url)
|
|
||||||
if articles:
|
|
||||||
feeds.append((title, articles))
|
|
||||||
|
|
||||||
# special - entertainment
|
|
||||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
|
||||||
if ent_articles:
|
|
||||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
|
||||||
articles = self.parse_section(url)
|
|
||||||
if articles:
|
|
||||||
feeds.append((title, articles))
|
|
||||||
|
|
||||||
|
|
||||||
# special- columns
|
|
||||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
|
||||||
if col_articles:
|
|
||||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
# parse from news.mingpao.com
|
# parse from news.mingpao.com
|
||||||
@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(i)
|
title = self.tag_to_string(i)
|
||||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
|
# parse from www.mingpaovan.com
|
||||||
|
def parse_section3(self, url, baseUrl):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
urlstr = i.get('href', False)
|
||||||
|
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
|
||||||
|
if urlstr not in included_urls:
|
||||||
|
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||||
|
included_urls.append(urlstr)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
def parse_ed_section(self, url):
|
def parse_ed_section(self, url):
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
if __UseChineseTitle__ == True:
|
if __UseChineseTitle__ == True:
|
||||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
if __Region__ == 'Hong Kong':
|
||||||
|
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||||
else:
|
else:
|
||||||
title = self.short_title()
|
title = self.short_title()
|
||||||
# if not generating a periodical, force date to apply in title
|
# if not generating a periodical, force date to apply in title
|
||||||
|
594
recipes/ming_pao_toronto.recipe
Normal file
@ -0,0 +1,594 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010-2011, Eddie Lau'
|
||||||
|
|
||||||
|
# Region - Hong Kong, Vancouver, Toronto
|
||||||
|
__Region__ = 'Toronto'
|
||||||
|
# Users of Kindle 3 with limited system-level CJK support
|
||||||
|
# please replace the following "True" with "False".
|
||||||
|
__MakePeriodical__ = True
|
||||||
|
# Turn below to true if your device supports display of CJK titles
|
||||||
|
__UseChineseTitle__ = False
|
||||||
|
# Set it to False if you want to skip images
|
||||||
|
__KeepImages__ = True
|
||||||
|
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||||
|
__UseLife__ = True
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Change Log:
|
||||||
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
|
provide options to remove all images in the file
|
||||||
|
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||||
|
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||||
|
2011/02/28: rearrange the sections
|
||||||
|
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||||
|
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
|
||||||
|
folder in Kindle 3
|
||||||
|
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||||
|
clean up the indentation
|
||||||
|
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||||
|
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
||||||
|
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||||
|
ordering of articles
|
||||||
|
2010/11/12: add news image and eco-news section
|
||||||
|
2010/11/08: add parsing of finance section
|
||||||
|
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
|
||||||
|
in section/article list.
|
||||||
|
2010/10/31: skip repeated articles in section pages
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os, datetime, re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from contextlib import nested
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
# MAIN CLASS
|
||||||
|
class MPRecipe(BasicNewsRecipe):
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
title = 'Ming Pao - Hong Kong'
|
||||||
|
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||||
|
category = 'Chinese, News, Hong Kong'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
keep_only_tags = [dict(name='h1'),
|
||||||
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
|
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||||
|
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||||
|
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||||
|
dict(attrs={'class':['photo']}),
|
||||||
|
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||||
|
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
dict(name='img'),
|
||||||
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '<h1>'),
|
||||||
|
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</h1>'),
|
||||||
|
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||||
|
lambda match: ''),
|
||||||
|
# skip <br> after title in life.mingpao.com fetched article
|
||||||
|
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: "<div id='newscontent'>"),
|
||||||
|
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: "</b>")
|
||||||
|
]
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
title = 'Ming Pao - Vancouver'
|
||||||
|
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||||
|
category = 'Chinese, News, Vancouver'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||||
|
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||||
|
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
title = 'Ming Pao - Toronto'
|
||||||
|
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||||
|
category = 'Chinese, News, Toronto'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||||
|
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||||
|
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'Eddie Lau'
|
||||||
|
publisher = 'MingPao'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'zh'
|
||||||
|
encoding = 'Big5-HKSCS'
|
||||||
|
recursions = 0
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
timefmt = ''
|
||||||
|
|
||||||
|
def image_url_processor(cls, baseurl, url):
|
||||||
|
# trick: break the url at the first occurance of digit, add an additional
|
||||||
|
# '_' at the front
|
||||||
|
# not working, may need to move this to preprocess_html() method
|
||||||
|
# minIdx = 10000
|
||||||
|
# i0 = url.find('0')
|
||||||
|
# if i0 >= 0 and i0 < minIdx:
|
||||||
|
# minIdx = i0
|
||||||
|
# i1 = url.find('1')
|
||||||
|
# if i1 >= 0 and i1 < minIdx:
|
||||||
|
# minIdx = i1
|
||||||
|
# i2 = url.find('2')
|
||||||
|
# if i2 >= 0 and i2 < minIdx:
|
||||||
|
# minIdx = i2
|
||||||
|
# i3 = url.find('3')
|
||||||
|
# if i3 >= 0 and i0 < minIdx:
|
||||||
|
# minIdx = i3
|
||||||
|
# i4 = url.find('4')
|
||||||
|
# if i4 >= 0 and i4 < minIdx:
|
||||||
|
# minIdx = i4
|
||||||
|
# i5 = url.find('5')
|
||||||
|
# if i5 >= 0 and i5 < minIdx:
|
||||||
|
# minIdx = i5
|
||||||
|
# i6 = url.find('6')
|
||||||
|
# if i6 >= 0 and i6 < minIdx:
|
||||||
|
# minIdx = i6
|
||||||
|
# i7 = url.find('7')
|
||||||
|
# if i7 >= 0 and i7 < minIdx:
|
||||||
|
# minIdx = i7
|
||||||
|
# i8 = url.find('8')
|
||||||
|
# if i8 >= 0 and i8 < minIdx:
|
||||||
|
# minIdx = i8
|
||||||
|
# i9 = url.find('9')
|
||||||
|
# if i9 >= 0 and i9 < minIdx:
|
||||||
|
# minIdx = i9
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_dtlocal(self):
|
||||||
|
dt_utc = datetime.datetime.utcnow()
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||||
|
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||||
|
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
|
||||||
|
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
|
||||||
|
return dt_local
|
||||||
|
|
||||||
|
def get_fetchdate(self):
|
||||||
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def get_fetchformatteddate(self):
|
||||||
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
def get_fetchday(self):
|
||||||
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
cover = None
|
||||||
|
return cover
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
if __UseLife__:
|
||||||
|
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||||
|
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||||
|
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||||
|
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||||
|
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||||
|
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||||
|
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||||
|
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||||
|
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||||
|
articles = self.parse_section2(url, keystr)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
else:
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||||
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special- editorial
|
||||||
|
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||||
|
if ed_articles:
|
||||||
|
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special - finance
|
||||||
|
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||||
|
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||||
|
if fin_articles:
|
||||||
|
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
|
|
||||||
|
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special - entertainment
|
||||||
|
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
|
if ent_articles:
|
||||||
|
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
|
||||||
|
# special- columns
|
||||||
|
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||||
|
if col_articles:
|
||||||
|
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||||
|
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||||
|
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
|
||||||
|
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||||
|
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
|
||||||
|
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
|
||||||
|
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
|
||||||
|
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||||
|
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
# parse from news.mingpao.com
|
||||||
|
def parse_section(self, url):
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
a = i.find('a', href = True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
# parse from life.mingpao.com
|
||||||
|
def parse_section2(self, url, keystr):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
# parse from www.mingpaovan.com
|
||||||
|
def parse_section3(self, url, baseUrl):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
urlstr = i.get('href', False)
|
||||||
|
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
|
||||||
|
if urlstr not in included_urls:
|
||||||
|
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||||
|
included_urls.append(urlstr)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_ed_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_fin_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href= True)
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||||
|
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_ent_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_col_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['width']
|
||||||
|
for item in soup.findAll(stype=True):
|
||||||
|
del item['absmiddle']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def create_opf(self, feeds, dir=None):
|
||||||
|
if dir is None:
|
||||||
|
dir = self.output_dir
|
||||||
|
if __UseChineseTitle__ == True:
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||||
|
else:
|
||||||
|
title = self.short_title()
|
||||||
|
# if not generating a periodical, force date to apply in title
|
||||||
|
if __MakePeriodical__ == False:
|
||||||
|
title = title + ' ' + self.get_fetchformatteddate()
|
||||||
|
if True:
|
||||||
|
mi = MetaInformation(title, [self.publisher])
|
||||||
|
mi.publisher = self.publisher
|
||||||
|
mi.author_sort = self.publisher
|
||||||
|
if __MakePeriodical__ == True:
|
||||||
|
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||||
|
else:
|
||||||
|
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||||
|
#mi.timestamp = nowf()
|
||||||
|
mi.timestamp = self.get_dtlocal()
|
||||||
|
mi.comments = self.description
|
||||||
|
if not isinstance(mi.comments, unicode):
|
||||||
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
|
#mi.pubdate = nowf()
|
||||||
|
mi.pubdate = self.get_dtlocal()
|
||||||
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
opf = OPFCreator(dir, mi)
|
||||||
|
# Add mastheadImage entry to <guide> section
|
||||||
|
mp = getattr(self, 'masthead_path', None)
|
||||||
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
|
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||||
|
ref.type = 'masthead'
|
||||||
|
ref.title = 'Masthead Image'
|
||||||
|
opf.guide.append(ref)
|
||||||
|
|
||||||
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
|
# Get cover
|
||||||
|
cpath = getattr(self, 'cover_path', None)
|
||||||
|
if cpath is None:
|
||||||
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
|
if self.default_cover(pf):
|
||||||
|
cpath = pf.name
|
||||||
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
|
opf.cover = cpath
|
||||||
|
manifest.append(cpath)
|
||||||
|
|
||||||
|
# Get masthead
|
||||||
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
|
manifest.append(mpath)
|
||||||
|
|
||||||
|
opf.create_manifest_from_files_in(manifest)
|
||||||
|
for mani in opf.manifest:
|
||||||
|
if mani.path.endswith('.ncx'):
|
||||||
|
mani.id = 'ncx'
|
||||||
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
|
mani.id = 'masthead-image'
|
||||||
|
entries = ['index.html']
|
||||||
|
toc = TOC(base_path=dir)
|
||||||
|
self.play_order_counter = 0
|
||||||
|
self.play_order_map = {}
|
||||||
|
|
||||||
|
def feed_index(num, parent):
|
||||||
|
f = feeds[num]
|
||||||
|
for j, a in enumerate(f):
|
||||||
|
if getattr(a, 'downloaded', False):
|
||||||
|
adir = 'feed_%d/article_%d/'%(num, j)
|
||||||
|
auth = a.author
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = a.text_summary
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
else:
|
||||||
|
desc = self.description_limiter(desc)
|
||||||
|
entries.append('%sindex.html'%adir)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||||
|
play_order=po, author=auth, description=desc)
|
||||||
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
|
for sp in a.sub_pages:
|
||||||
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
|
relp = sp[len(prefix):]
|
||||||
|
entries.append(relp.replace(os.sep, '/'))
|
||||||
|
last = sp
|
||||||
|
|
||||||
|
if os.path.exists(last):
|
||||||
|
with open(last, 'rb') as fi:
|
||||||
|
src = fi.read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(src)
|
||||||
|
body = soup.find('body')
|
||||||
|
if body is not None:
|
||||||
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
|
not self.has_single_feed,
|
||||||
|
a.orig_url, self.publisher, prefix=prefix,
|
||||||
|
center=self.center_navbar)
|
||||||
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
|
body.insert(len(body.contents), elem)
|
||||||
|
with open(last, 'wb') as fi:
|
||||||
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
|
if len(feeds) == 0:
|
||||||
|
raise Exception('All feeds are empty, aborting.')
|
||||||
|
|
||||||
|
if len(feeds) > 1:
|
||||||
|
for i, f in enumerate(feeds):
|
||||||
|
entries.append('feed_%d/index.html'%i)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
auth = getattr(f, 'author', None)
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = getattr(f, 'description', None)
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries.append('feed_%d/index.html'%0)
|
||||||
|
feed_index(0, toc)
|
||||||
|
|
||||||
|
for i, p in enumerate(entries):
|
||||||
|
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||||
|
opf.create_spine(entries)
|
||||||
|
opf.set_toc(toc)
|
||||||
|
|
||||||
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
594
recipes/ming_pao_vancouver.recipe
Normal file
@ -0,0 +1,594 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010-2011, Eddie Lau'
|
||||||
|
|
||||||
|
# Region - Hong Kong, Vancouver, Toronto
|
||||||
|
__Region__ = 'Vancouver'
|
||||||
|
# Users of Kindle 3 with limited system-level CJK support
|
||||||
|
# please replace the following "True" with "False".
|
||||||
|
__MakePeriodical__ = True
|
||||||
|
# Turn below to true if your device supports display of CJK titles
|
||||||
|
__UseChineseTitle__ = False
|
||||||
|
# Set it to False if you want to skip images
|
||||||
|
__KeepImages__ = True
|
||||||
|
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||||
|
__UseLife__ = True
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Change Log:
|
||||||
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
|
provide options to remove all images in the file
|
||||||
|
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||||
|
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||||
|
2011/02/28: rearrange the sections
|
||||||
|
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||||
|
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
|
||||||
|
folder in Kindle 3
|
||||||
|
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||||
|
clean up the indentation
|
||||||
|
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||||
|
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
||||||
|
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||||
|
ordering of articles
|
||||||
|
2010/11/12: add news image and eco-news section
|
||||||
|
2010/11/08: add parsing of finance section
|
||||||
|
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
|
||||||
|
in section/article list.
|
||||||
|
2010/10/31: skip repeated articles in section pages
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os, datetime, re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from contextlib import nested
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
# MAIN CLASS
|
||||||
|
class MPRecipe(BasicNewsRecipe):
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
title = 'Ming Pao - Hong Kong'
|
||||||
|
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||||
|
category = 'Chinese, News, Hong Kong'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
keep_only_tags = [dict(name='h1'),
|
||||||
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
|
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||||
|
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||||
|
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||||
|
dict(attrs={'class':['photo']}),
|
||||||
|
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||||
|
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
dict(name='img'),
|
||||||
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '<h1>'),
|
||||||
|
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</h1>'),
|
||||||
|
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||||
|
lambda match: ''),
|
||||||
|
# skip <br> after title in life.mingpao.com fetched article
|
||||||
|
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: "<div id='newscontent'>"),
|
||||||
|
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: "</b>")
|
||||||
|
]
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
title = 'Ming Pao - Vancouver'
|
||||||
|
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||||
|
category = 'Chinese, News, Vancouver'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||||
|
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||||
|
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
title = 'Ming Pao - Toronto'
|
||||||
|
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||||
|
category = 'Chinese, News, Toronto'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||||
|
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||||
|
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||||
|
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||||
|
]
|
||||||
|
if __KeepImages__:
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||||
|
else:
|
||||||
|
remove_tags = [dict(name='img')]
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'Eddie Lau'
|
||||||
|
publisher = 'MingPao'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'zh'
|
||||||
|
encoding = 'Big5-HKSCS'
|
||||||
|
recursions = 0
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
timefmt = ''
|
||||||
|
|
||||||
|
def image_url_processor(cls, baseurl, url):
|
||||||
|
# trick: break the url at the first occurance of digit, add an additional
|
||||||
|
# '_' at the front
|
||||||
|
# not working, may need to move this to preprocess_html() method
|
||||||
|
# minIdx = 10000
|
||||||
|
# i0 = url.find('0')
|
||||||
|
# if i0 >= 0 and i0 < minIdx:
|
||||||
|
# minIdx = i0
|
||||||
|
# i1 = url.find('1')
|
||||||
|
# if i1 >= 0 and i1 < minIdx:
|
||||||
|
# minIdx = i1
|
||||||
|
# i2 = url.find('2')
|
||||||
|
# if i2 >= 0 and i2 < minIdx:
|
||||||
|
# minIdx = i2
|
||||||
|
# i3 = url.find('3')
|
||||||
|
# if i3 >= 0 and i0 < minIdx:
|
||||||
|
# minIdx = i3
|
||||||
|
# i4 = url.find('4')
|
||||||
|
# if i4 >= 0 and i4 < minIdx:
|
||||||
|
# minIdx = i4
|
||||||
|
# i5 = url.find('5')
|
||||||
|
# if i5 >= 0 and i5 < minIdx:
|
||||||
|
# minIdx = i5
|
||||||
|
# i6 = url.find('6')
|
||||||
|
# if i6 >= 0 and i6 < minIdx:
|
||||||
|
# minIdx = i6
|
||||||
|
# i7 = url.find('7')
|
||||||
|
# if i7 >= 0 and i7 < minIdx:
|
||||||
|
# minIdx = i7
|
||||||
|
# i8 = url.find('8')
|
||||||
|
# if i8 >= 0 and i8 < minIdx:
|
||||||
|
# minIdx = i8
|
||||||
|
# i9 = url.find('9')
|
||||||
|
# if i9 >= 0 and i9 < minIdx:
|
||||||
|
# minIdx = i9
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get_dtlocal(self):
|
||||||
|
dt_utc = datetime.datetime.utcnow()
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||||
|
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||||
|
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
|
||||||
|
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
|
||||||
|
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
|
||||||
|
return dt_local
|
||||||
|
|
||||||
|
def get_fetchdate(self):
|
||||||
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def get_fetchformatteddate(self):
|
||||||
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
def get_fetchday(self):
|
||||||
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
cover = None
|
||||||
|
return cover
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
if __UseLife__:
|
||||||
|
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||||
|
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||||
|
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||||
|
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||||
|
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||||
|
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||||
|
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||||
|
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||||
|
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||||
|
articles = self.parse_section2(url, keystr)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
else:
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||||
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special- editorial
|
||||||
|
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||||
|
if ed_articles:
|
||||||
|
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special - finance
|
||||||
|
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||||
|
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||||
|
if fin_articles:
|
||||||
|
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
|
|
||||||
|
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
# special - entertainment
|
||||||
|
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
|
if ent_articles:
|
||||||
|
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||||
|
|
||||||
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
|
||||||
|
# special- columns
|
||||||
|
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||||
|
if col_articles:
|
||||||
|
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||||
|
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||||
|
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
|
||||||
|
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||||
|
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
|
||||||
|
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
|
||||||
|
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
|
||||||
|
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
|
||||||
|
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
|
||||||
|
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||||
|
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
# parse from news.mingpao.com
|
||||||
|
def parse_section(self, url):
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
a = i.find('a', href = True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
# parse from life.mingpao.com
|
||||||
|
def parse_section2(self, url, keystr):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
# parse from www.mingpaovan.com
|
||||||
|
def parse_section3(self, url, baseUrl):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
urlstr = i.get('href', False)
|
||||||
|
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
|
||||||
|
if urlstr not in included_urls:
|
||||||
|
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||||
|
included_urls.append(urlstr)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_ed_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_fin_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href= True)
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||||
|
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_ent_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_col_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
a.reverse()
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['width']
|
||||||
|
for item in soup.findAll(stype=True):
|
||||||
|
del item['absmiddle']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def create_opf(self, feeds, dir=None):
|
||||||
|
if dir is None:
|
||||||
|
dir = self.output_dir
|
||||||
|
if __UseChineseTitle__ == True:
|
||||||
|
if __Region__ == 'Hong Kong':
|
||||||
|
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||||
|
elif __Region__ == 'Vancouver':
|
||||||
|
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||||
|
elif __Region__ == 'Toronto':
|
||||||
|
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||||
|
else:
|
||||||
|
title = self.short_title()
|
||||||
|
# if not generating a periodical, force date to apply in title
|
||||||
|
if __MakePeriodical__ == False:
|
||||||
|
title = title + ' ' + self.get_fetchformatteddate()
|
||||||
|
if True:
|
||||||
|
mi = MetaInformation(title, [self.publisher])
|
||||||
|
mi.publisher = self.publisher
|
||||||
|
mi.author_sort = self.publisher
|
||||||
|
if __MakePeriodical__ == True:
|
||||||
|
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||||
|
else:
|
||||||
|
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||||
|
#mi.timestamp = nowf()
|
||||||
|
mi.timestamp = self.get_dtlocal()
|
||||||
|
mi.comments = self.description
|
||||||
|
if not isinstance(mi.comments, unicode):
|
||||||
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
|
#mi.pubdate = nowf()
|
||||||
|
mi.pubdate = self.get_dtlocal()
|
||||||
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
opf = OPFCreator(dir, mi)
|
||||||
|
# Add mastheadImage entry to <guide> section
|
||||||
|
mp = getattr(self, 'masthead_path', None)
|
||||||
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
|
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||||
|
ref.type = 'masthead'
|
||||||
|
ref.title = 'Masthead Image'
|
||||||
|
opf.guide.append(ref)
|
||||||
|
|
||||||
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
|
# Get cover
|
||||||
|
cpath = getattr(self, 'cover_path', None)
|
||||||
|
if cpath is None:
|
||||||
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
|
if self.default_cover(pf):
|
||||||
|
cpath = pf.name
|
||||||
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
|
opf.cover = cpath
|
||||||
|
manifest.append(cpath)
|
||||||
|
|
||||||
|
# Get masthead
|
||||||
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
|
manifest.append(mpath)
|
||||||
|
|
||||||
|
opf.create_manifest_from_files_in(manifest)
|
||||||
|
for mani in opf.manifest:
|
||||||
|
if mani.path.endswith('.ncx'):
|
||||||
|
mani.id = 'ncx'
|
||||||
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
|
mani.id = 'masthead-image'
|
||||||
|
entries = ['index.html']
|
||||||
|
toc = TOC(base_path=dir)
|
||||||
|
self.play_order_counter = 0
|
||||||
|
self.play_order_map = {}
|
||||||
|
|
||||||
|
def feed_index(num, parent):
|
||||||
|
f = feeds[num]
|
||||||
|
for j, a in enumerate(f):
|
||||||
|
if getattr(a, 'downloaded', False):
|
||||||
|
adir = 'feed_%d/article_%d/'%(num, j)
|
||||||
|
auth = a.author
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = a.text_summary
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
else:
|
||||||
|
desc = self.description_limiter(desc)
|
||||||
|
entries.append('%sindex.html'%adir)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||||
|
play_order=po, author=auth, description=desc)
|
||||||
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
|
for sp in a.sub_pages:
|
||||||
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
|
relp = sp[len(prefix):]
|
||||||
|
entries.append(relp.replace(os.sep, '/'))
|
||||||
|
last = sp
|
||||||
|
|
||||||
|
if os.path.exists(last):
|
||||||
|
with open(last, 'rb') as fi:
|
||||||
|
src = fi.read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(src)
|
||||||
|
body = soup.find('body')
|
||||||
|
if body is not None:
|
||||||
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
|
not self.has_single_feed,
|
||||||
|
a.orig_url, self.publisher, prefix=prefix,
|
||||||
|
center=self.center_navbar)
|
||||||
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
|
body.insert(len(body.contents), elem)
|
||||||
|
with open(last, 'wb') as fi:
|
||||||
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
|
if len(feeds) == 0:
|
||||||
|
raise Exception('All feeds are empty, aborting.')
|
||||||
|
|
||||||
|
if len(feeds) > 1:
|
||||||
|
for i, f in enumerate(feeds):
|
||||||
|
entries.append('feed_%d/index.html'%i)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
auth = getattr(f, 'author', None)
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = getattr(f, 'description', None)
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries.append('feed_%d/index.html'%0)
|
||||||
|
feed_index(0, toc)
|
||||||
|
|
||||||
|
for i, p in enumerate(entries):
|
||||||
|
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||||
|
opf.create_spine(entries)
|
||||||
|
opf.set_toc(toc)
|
||||||
|
|
||||||
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
@ -69,7 +69,11 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
for section, shref in self.newsweek_sections():
|
for section, shref in self.newsweek_sections():
|
||||||
self.log('Processing section', section, shref)
|
self.log('Processing section', section, shref)
|
||||||
articles = []
|
articles = []
|
||||||
soups = [self.index_to_soup(shref)]
|
try:
|
||||||
|
soups = [self.index_to_soup(shref)]
|
||||||
|
except:
|
||||||
|
self.log.warn('Section %s not found, skipping'%section)
|
||||||
|
continue
|
||||||
na = soups[0].find('a', rel='next')
|
na = soups[0].find('a', rel='next')
|
||||||
if na:
|
if na:
|
||||||
soups.append(self.index_to_soup(self.BASE_URL+na['href']))
|
soups.append(self.index_to_soup(self.BASE_URL+na['href']))
|
||||||
|
42
recipes/nme.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||||
|
title = u'New Musical Express Magazine'
|
||||||
|
__author__ = "scissors"
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict( attrs={'class':'clear_icons'}),
|
||||||
|
dict( attrs={'class':'share_links'}),
|
||||||
|
dict( attrs={'id':'right_panel'}),
|
||||||
|
dict( attrs={'class':'today box'})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
|
||||||
|
dict(name='h1'),
|
||||||
|
#dict(name='h3'),
|
||||||
|
dict(attrs={'class' : 'BText'}),
|
||||||
|
dict(attrs={'class' : 'Bmore'}),
|
||||||
|
dict(attrs={'class' : 'bPosts'}),
|
||||||
|
dict(attrs={'class' : 'text'}),
|
||||||
|
dict(attrs={'id' : 'article_gallery'}),
|
||||||
|
dict(attrs={'class' : 'article_text'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
|
||||||
|
(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
|
||||||
|
(u'Blogs', u'http://www.nme.com/blog/index.php?blog=140&tempskin=_rss2'),
|
||||||
|
|
||||||
|
]
|
40
recipes/noticias_r7.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class PortalR7(BasicNewsRecipe):
|
||||||
|
title = 'Noticias R7'
|
||||||
|
__author__ = 'Diniz Bortolotto'
|
||||||
|
description = 'Noticias Portal R7'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Rede Record'
|
||||||
|
category = 'news, Brazil'
|
||||||
|
language = 'pt_BR'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'),
|
||||||
|
(u'Economia', u'http://www.r7.com/data/rss/economia.xml'),
|
||||||
|
(u'Internacional', u'http://www.r7.com/data/rss/internacional.xml'),
|
||||||
|
(u'Tecnologia e Ci\xeancia', u'http://www.r7.com/data/rss/tecnologiaCiencia.xml')
|
||||||
|
]
|
||||||
|
reverse_article_order = True
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'materia'})]
|
||||||
|
remove_tags = [
|
||||||
|
dict(id=['espalhe', 'report-erro']),
|
||||||
|
dict(name='ul', attrs={'class':'controles'}),
|
||||||
|
dict(name='ul', attrs={'class':'relacionados'}),
|
||||||
|
dict(name='div', attrs={'class':'materia_banner'}),
|
||||||
|
dict(name='div', attrs={'class':'materia_controles'})
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<div class="materia">.*<div class="materia_cabecalho">',re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '<div class="materia"><div class="materia_cabecalho">')
|
||||||
|
]
|
24
recipes/noticias_unb.recipe
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NoticiasUnB(BasicNewsRecipe):
|
||||||
|
title = 'Noticias UnB'
|
||||||
|
__author__ = 'Diniz Bortolotto'
|
||||||
|
description = 'Noticias da UnB'
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
category = 'news, educational, Brazil'
|
||||||
|
language = 'pt_BR'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [(u'UnB Agência', u'http://www.unb.br/noticias/rss/noticias.rss')]
|
||||||
|
|
||||||
|
reverse_article_order = True
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://', 'http://www.unb.br/noticias/print_email/imprimir.php?u=http://')
|
||||||
|
|
72
recipes/pecat.recipe
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.pecat.co.rs
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Pecat_rs(BasicNewsRecipe):
|
||||||
|
title = 'Pecat'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Internet portal slobodne Srbije'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
masthead_url = 'http://www.pecat.co.rs/wp-content/themes/zenko-v1/images/logo.jpg'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
|
img{display: block; margin-bottom: 1em; margin-top: 1em}
|
||||||
|
p{display: block; margin-bottom: 1em; margin-top: 1em}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'politika, Srbija'
|
||||||
|
, 'publisher': 'Pecat'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [(u'Clanci', u'http://www.pecat.co.rs/feed/')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
limg.extract()
|
||||||
|
item.replaceWith(limg)
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
dad = item.findParent('p')
|
||||||
|
if dad:
|
||||||
|
mydad = dad.parent
|
||||||
|
myIndex = mydad.contents.index(dad)
|
||||||
|
item.extract()
|
||||||
|
mydad.insert(myIndex,item)
|
||||||
|
for item in soup.findAll('strong'):
|
||||||
|
dad = item.findParent('p')
|
||||||
|
if dad:
|
||||||
|
mydad = dad.parent
|
||||||
|
myIndex = mydad.contents.index(dad)
|
||||||
|
item.extract()
|
||||||
|
item.name='h4'
|
||||||
|
mydad.insert(myIndex,item)
|
||||||
|
return soup
|
@ -26,6 +26,7 @@ class Perfil(BasicNewsRecipe):
|
|||||||
.foto1 h1{font-size: x-small}
|
.foto1 h1{font-size: x-small}
|
||||||
h1{font-family: Georgia,"Times New Roman",serif}
|
h1{font-family: Georgia,"Times New Roman",serif}
|
||||||
img{margin-bottom: 0.4em}
|
img{margin-bottom: 0.4em}
|
||||||
|
.hora{font-size: x-small; color: red}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -60,7 +61,26 @@ class Perfil(BasicNewsRecipe):
|
|||||||
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
|
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
@ -1,85 +1,45 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
'''
|
|
||||||
philly.com/inquirer/
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
class Philly(BasicNewsRecipe):
|
class AdvancedUserRecipe1308312288(BasicNewsRecipe):
|
||||||
|
title = u'Philadelphia Inquirer'
|
||||||
title = 'Philadelphia Inquirer'
|
__author__ = 'sexymax15'
|
||||||
__author__ = 'RadikalDissent and Sujata Raman'
|
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = 'Daily news from the Philadelphia Inquirer'
|
description = 'Daily news from the Philadelphia Inquirer'
|
||||||
no_stylesheets = True
|
oldest_article = 15
|
||||||
use_embedded_content = False
|
max_articles_per_feed = 20
|
||||||
oldest_article = 1
|
use_embedded_content = False
|
||||||
max_articles_per_feed = 25
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
extra_css = '''
|
# remove_tags_before = {'class':'article_timestamp'}
|
||||||
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
|
#remove_tags_after = {'class':'graylabel'}
|
||||||
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
keep_only_tags= [dict(name=['h1','p'])]
|
||||||
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
remove_tags = [dict(name=['hr','dl','dt','img','meta','iframe','link','script','form','input','label']),
|
||||||
.byline {font-size: small; color: #666666; font-style:italic; }
|
dict(id=['toggleConfirmEmailDiv','toggleTOS','toggleUsernameMsgDiv','toggleConfirmYear','navT1_philly','secondaryNav','navPlacement','globalPrimaryNav'
|
||||||
.lastline {font-size: small; color: #666666; font-style:italic;}
|
,'ugc-footer-philly','bv_footer_include','footer','header',
|
||||||
.contact {font-size: small; color: #666666;}
|
'container_rag_bottom','section_rectangle','contentrightside'])
|
||||||
.contact p {font-size: small; color: #666666;}
|
,{'class':['megamenu3 megamenu','container misc','container_inner misc_inner'
|
||||||
#photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
|
,'misccontainer_left_32','headlineonly','misccontainer_middle_32'
|
||||||
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
|
,'misccontainer_right_32','headline formBegin',
|
||||||
#photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
|
'post_balloon','relatedlist','linkssubhead','b_sq','dotted-rule-above'
|
||||||
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
|
,'container','headlines-digest','graylabel','container_inner'
|
||||||
.article_timestamp{font-size:x-small; color:#666666;}
|
,'rlinks_colorbar1','rlinks_colorbar2','supercontainer','container_5col_left','container_image_left',
|
||||||
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
|
'digest-headline2','digest-lead','container_5col_leftmiddle',
|
||||||
'''
|
'container_5col_middlemiddle','container_5col_rightmiddle'
|
||||||
|
,'container_5col_right','divclear','supercontainer_outer force-width',
|
||||||
|
'supercontainer','containertitle kicker-title',
|
||||||
|
'pollquestion','pollchoice','photomore','pollbutton','container rssbox','containertitle video ',
|
||||||
|
'containertitle_image ','container_tabtwo','selected'
|
||||||
|
,'shadetabs','selected','tabcontentstyle','tabcontent','inner_container'
|
||||||
|
,'arrow','container_ad','containertitlespacer','adUnit','tracking','sitemsg_911 clearfix']}]
|
||||||
|
|
||||||
keep_only_tags = [
|
extra_css = """
|
||||||
dict(name='div', attrs={'class':'story-content'}),
|
h1{font-family: Georgia,serif; font-size: xx-large}
|
||||||
dict(name='div', attrs={'id': 'contentinside'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
"""
|
||||||
dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
|
|
||||||
dict(name='dl', attrs={'class':'relatedlist'}),
|
|
||||||
dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
|
|
||||||
dict(name='a', attrs={'class': ['headlineonly','bl']}),
|
|
||||||
dict(name='img', attrs={'class':'img_noborder'})
|
|
||||||
]
|
|
||||||
# def print_version(self, url):
|
|
||||||
# return url + '?viewAll=y'
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [(u'News', u'http://www.philly.com/philly_news.rss')]
|
||||||
('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
|
|
||||||
('Business', 'http://www.philly.com/inq_business.rss'),
|
|
||||||
#('News', 'http://www.philly.com/inquirer/news/index.rss'),
|
|
||||||
('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
|
|
||||||
('Local', 'http://www.philly.com/inquirer_local.rss'),
|
|
||||||
('Health', 'http://www.philly.com/inquirer_health_science.rss'),
|
|
||||||
('Education', 'http://www.philly.com/inquirer_education.rss'),
|
|
||||||
('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
|
|
||||||
('Sports', 'http://www.philly.com/inquirer_sports.rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
ans = article.link
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.log('Looking for full story link in', ans)
|
|
||||||
soup = self.index_to_soup(ans)
|
|
||||||
x = soup.find(text="View All")
|
|
||||||
|
|
||||||
if x is not None:
|
|
||||||
ans = ans + '?viewAll=y'
|
|
||||||
self.log('Found full story link', ans)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def postprocess_html(self, soup,first):
|
|
||||||
|
|
||||||
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
|
|
||||||
tag.extract()
|
|
||||||
for tag in soup.findAll(name='br'):
|
|
||||||
tag.extract()
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
35
recipes/polizeipress_de.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Polizeipresse - Deutschland'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
description = 'Tagesaktuelle "Polizeiberichte" aus ganz Deutschland (bis auf Ortsebene).' 'Um deinen Ort/Stadt/Kreis usw. einzubinden, gehe auf "http://www.presseportal.de/polizeipresse/" und suche im oberen "Suchfeld" nach dem Namen.' 'Oberhalb der Suchergebnisse (Folgen:) auf den üblichen link zu den RSS-Feeds klicken und den RSS-link im Rezept unter "feeds" eintragen wie üblich.' 'Die Auswahl von Orten kann vereinfacht werden wenn man den Suchbegriff wie folgt eingibt:' '"Stadt-Ort".'
|
||||||
|
oldest_article = 21
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
remove_javascript = True
|
||||||
|
masthead_url = 'http://www.alt-heliservice.de/images/34_BPOL_Logo_4C_g_schutzbereich.jpg'
|
||||||
|
cover_url = 'http://berlinstadtservice.de/buerger/Bundespolizei-Logo.png'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':'logo'}),
|
||||||
|
dict(name='div', attrs={'id':'origin'}),
|
||||||
|
dict(name='pre', attrs={'class':'xml_contact'})]
|
||||||
|
|
||||||
|
def print_version(self,url):
|
||||||
|
segments = url.split('/')
|
||||||
|
printURL = 'http://www.presseportal.de/print.htx?nr=' + '/'.join(segments[5:6]) + '&type=polizei'
|
||||||
|
return printURL
|
||||||
|
|
||||||
|
feeds = [(u'Frimmerdorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-frimmersdorf&w=public_service'),
|
||||||
|
(u'Neurath', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-neurath&w=public_service'),
|
||||||
|
(u'Gustorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-gustorf&w=public_service'),
|
||||||
|
(u'Neuenhausen', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-neuenhausen&w=public_service'),
|
||||||
|
(u'Wevelinghoven', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-Wevelinghoven&w=public_service'),
|
||||||
|
(u'Grevenbroich ges.', u'http://www.presseportal.de/rss/rss2_vts.htx?q=grevenbroich&w=public_service'),
|
||||||
|
(u'Kreis Neuss ges.', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Rhein-Kreis+Neuss&w=public_service'),
|
||||||
|
]
|
||||||
|
|
@ -1,52 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'Mori'
|
|
||||||
__version__ = 'v. 0.1'
|
|
||||||
'''
|
|
||||||
www.runa.pl/blog
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
import re
|
|
||||||
|
|
||||||
class FantazmatyRecipe(BasicNewsRecipe):
|
|
||||||
__author__ = 'Mori'
|
|
||||||
language = 'pl'
|
|
||||||
|
|
||||||
title = u'Fantazmaty'
|
|
||||||
publisher = u'Agencja Wydawnicza Runa'
|
|
||||||
description = u'Blog Agencji Wydawniczej Runa'
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
oldest_article = 100
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
img{float: left; padding-right: 10px; padding-bottom: 5px;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Fantazmaty', u'http://www.runa.pl/blog/rss.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name = 'div', attrs = {'class' : 'path'}),
|
|
||||||
dict(name = 'div', attrs = {'class' : 'drdot'}),
|
|
||||||
dict(name = 'div', attrs = {'class' : 'picture'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name = 'div', attrs = {'class' : 'content'})
|
|
||||||
]
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
|
||||||
[
|
|
||||||
(r'<body>.*?<div id="primary"', lambda match: '<body><div id="primary"'),
|
|
||||||
(r'<!--.*?-->', lambda match: '')
|
|
||||||
]
|
|
||||||
]
|
|
80
recipes/scmp.recipe
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
scmp.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SCMP(BasicNewsRecipe):
|
||||||
|
title = 'South China Morning Post'
|
||||||
|
__author__ = 'llam'
|
||||||
|
description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China."
|
||||||
|
publisher = 'South China Morning Post Publishers Ltd.'
|
||||||
|
category = 'SCMP, Online news, Hong Kong News, China news, Business news, English newspaper, daily newspaper, Lifestyle news, Sport news, Audio Video news, Asia news, World news, economy news, investor relations news, RSS Feeds'
|
||||||
|
oldest_article = 2
|
||||||
|
delay = 1
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_CN'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
needs_subscription = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.scmp.com/images/logo_scmp_home.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://www.scmp.com/portal/site/SCMP/')
|
||||||
|
br.select_form(name='loginForm')
|
||||||
|
br['Login' ] = self.username
|
||||||
|
br['Password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
remove_attributes=['width','height','border']
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(attrs={'id':['ART','photoBox']})
|
||||||
|
,dict(attrs={'class':['article_label','article_byline','article_body']})
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<P><table((?!<table).)*class="embscreen"((?!</table>).)*</table>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Business' , u'http://www.scmp.com/rss/business.xml' )
|
||||||
|
,(u'Hong Kong' , u'http://www.scmp.com/rss/hong_kong.xml' )
|
||||||
|
,(u'China' , u'http://www.scmp.com/rss/china.xml' )
|
||||||
|
,(u'Asia & World' , u'http://www.scmp.com/rss/news_asia_world.xml')
|
||||||
|
,(u'Opinion' , u'http://www.scmp.com/rss/opinion.xml' )
|
||||||
|
,(u'LifeSTYLE' , u'http://www.scmp.com/rss/lifestyle.xml' )
|
||||||
|
,(u'Sport' , u'http://www.scmp.com/rss/sport.xml' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
rpart, sep, rest = url.rpartition('&')
|
||||||
|
return rpart #+ sep + urllib.quote_plus(rest)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
items = soup.findAll(src="/images/label_icon.gif")
|
||||||
|
[item.extract() for item in items]
|
||||||
|
return self.adeify_images(soup)
|
40
recipes/sizinti_derigisi.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TodaysZaman_en(BasicNewsRecipe):
|
||||||
|
title = u'Sızıntı Dergisi'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed =80
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
#publisher = ' '
|
||||||
|
category = 'dergi, ilim, kültür, bilim,Türkçe'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
|
||||||
|
|
||||||
|
#remove_attributes = ['aria-describedby']
|
||||||
|
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
|
||||||
|
cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
|
||||||
|
masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
|
||||||
|
remove_tags_before = dict(id='content-right')
|
||||||
|
|
||||||
|
|
||||||
|
#remove_empty_feeds= True
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Sızıntı', u'http://www.sizinti.com.tr/rss'),
|
||||||
|
]
|
||||||
|
|
||||||
|
#def preprocess_html(self, soup):
|
||||||
|
# return self.adeify_images(soup)
|
||||||
|
#def print_version(self, url): #there is a probem caused by table format
|
||||||
|
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')
|
||||||
|
|
@ -1,94 +1,67 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
spiegel.de
|
spiegel.de
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Spiegel_int(BasicNewsRecipe):
|
class Spiegel_int(BasicNewsRecipe):
|
||||||
title = 'Spiegel Online International'
|
title = 'Spiegel Online International'
|
||||||
__author__ = 'Darko Miletic and Sujata Raman'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = "News and POV from Europe's largest newsmagazine"
|
description = "Daily news, analysis and opinion from Europe's leading newsmagazine and Germany's top news Web site"
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'en'
|
language = 'en_DE'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
publisher = 'SPIEGEL ONLINE GmbH'
|
publisher = 'SPIEGEL ONLINE GmbH'
|
||||||
category = 'news, politics, Germany'
|
category = 'news, politics, Germany'
|
||||||
lang = 'en'
|
masthead_url = 'http://www.spiegel.de/static/sys/v9/spiegelonline_logo.png'
|
||||||
recursions = 1
|
publication_type = 'magazine'
|
||||||
match_regexps = [r'http://www.spiegel.de/.*-[1-9],00.html']
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : lang
|
,'language' : language
|
||||||
,'publisher' : publisher
|
,'publisher': publisher
|
||||||
,'pretty_print': True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
#spArticleColumn{font-family:verdana,arial,helvetica,geneva,sans-serif ; }
|
#spArticleContent{font-family: Verdana,Arial,Helvetica,Geneva,sans-serif}
|
||||||
h1{color:#666666; font-weight:bold;}
|
h1{color:#666666; font-weight:bold;}
|
||||||
h2{color:#990000;}
|
h2{color:#990000;}
|
||||||
h3{color:#990000;}
|
h3{color:#990000;}
|
||||||
h4 {color:#990000;}
|
h4 {color:#990000;}
|
||||||
a{color:#990000;}
|
a{color:#990000;}
|
||||||
.spAuthor{font-style:italic;}
|
.spAuthor{font-style:italic;}
|
||||||
#spIntroTeaser{font-weight:bold;}
|
#spIntroTeaser{font-weight:bold}
|
||||||
.spCredit{color:#666666; font-size:x-small;}
|
.spCredit{color:#666666; font-size:x-small;}
|
||||||
.spShortDate{font-size:x-small;}
|
.spShortDate{font-size:x-small;}
|
||||||
.spArticleImageBox {font-size:x-small;}
|
.spArticleImageBox {font-size:x-small;}
|
||||||
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||||
dict(name ='div', attrs={'id': ['spArticleImageBox spAssetAlignleft','spArticleColumn']}),
|
remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||||
]
|
remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||||
|
remove_attributes = ['clear']
|
||||||
|
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
|
||||||
|
|
||||||
remove_tags = [
|
def print_version(self, url):
|
||||||
dict(name='div', attrs={'id':['spSocialBookmark','spArticleFunctions','spMultiPagerHeadlines',]}),
|
main, sep, rest = url.rpartition(',')
|
||||||
dict(name='div', attrs={'class':['spCommercial spM520','spArticleCredit','spPicZoom']}),
|
rmain, rsep, rrest = main.rpartition(',')
|
||||||
]
|
return rmain + ',druck-' + rrest + ',' + rest
|
||||||
|
|
||||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/rss/0,5291,676,00.xml')]
|
|
||||||
|
|
||||||
def postprocess_html(self, soup,first):
|
|
||||||
|
|
||||||
for tag in soup.findAll(name='div',attrs={'id':"spMultiPagerControl"}):
|
|
||||||
tag.extract()
|
|
||||||
|
|
||||||
p = soup.find(name = 'p', attrs={'id':'spIntroTeaser'})
|
|
||||||
|
|
||||||
if p.string is not None:
|
|
||||||
t = p.string.rpartition(':')[0]
|
|
||||||
|
|
||||||
if 'Part'in t:
|
|
||||||
if soup.h1 is not None:
|
|
||||||
soup.h1.extract()
|
|
||||||
if soup.h2 is not None:
|
|
||||||
soup.h2.extract()
|
|
||||||
functag = soup.find(name= 'div', attrs={'id':"spArticleFunctions"})
|
|
||||||
if functag is not None:
|
|
||||||
functag.extract()
|
|
||||||
auttag = soup.find(name= 'p', attrs={'class':"spAuthor"})
|
|
||||||
if auttag is not None:
|
|
||||||
auttag.extract()
|
|
||||||
|
|
||||||
pictag = soup.find(name= 'div', attrs={'id':"spArticleTopAsset"})
|
|
||||||
if pictag is not None:
|
|
||||||
pictag.extract()
|
|
||||||
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
# def print_version(self, url):
|
|
||||||
# main, sep, rest = url.rpartition(',')
|
|
||||||
# rmain, rsep, rrest = main.rpartition(',')
|
|
||||||
# return rmain + ',druck-' + rrest + ',' + rest
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
||||||
|
56
recipes/stiintasitehnica.recipe
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
stiintasitehnica.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Stiintasitehnica(BasicNewsRecipe):
|
||||||
|
title = u'\u0218tiin\u021b\u0103 \u015fi Tehnic\u0103'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'\u0218tiin\u021b\u0103 \u015fi Tehnic\u0103'
|
||||||
|
publisher = u'\u0218tiin\u021b\u0103 \u015fi Tehnic\u0103'
|
||||||
|
oldest_article = 50
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = u'Ziare,Reviste,Stiinta,Tehnica'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.stiintasitehnica.com/images/logo.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'mainColumn2'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='span', attrs={'class':['redEar']})
|
||||||
|
, dict(name='table', attrs={'class':['connect_widget_interactive_area']})
|
||||||
|
, dict(name='div', attrs={'class':['panel-overlay']})
|
||||||
|
, dict(name='div', attrs={'id':['pointer']})
|
||||||
|
, dict(name='img', attrs={'class':['nav-next', 'nav-prev']})
|
||||||
|
, dict(name='table', attrs={'class':['connect_widget_interactive_area']})
|
||||||
|
, dict(name='hr', attrs={'class':['dotted']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='hr', attrs={'class':['dotted']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.stiintasitehnica.com/rss/stiri.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -49,12 +49,14 @@ class TelegraphUK(BasicNewsRecipe):
|
|||||||
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
|
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
|
||||||
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
|
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
|
||||||
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
|
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
|
||||||
|
,(u'Finance' , u'http://www.telegraph.co.uk/finance/rss' )
|
||||||
,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss' )
|
,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss' )
|
||||||
,(u'UK News' , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss')
|
,(u'UK News' , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss')
|
||||||
,(u'Science News' , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss' )
|
,(u'Science News' , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss' )
|
||||||
,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' )
|
,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' )
|
||||||
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
||||||
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
||||||
|
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
|
||||||
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -10,8 +10,8 @@ import re
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Time(BasicNewsRecipe):
|
class Time(BasicNewsRecipe):
|
||||||
recipe_disabled = ('This recipe has been disabled as TIME no longer'
|
#recipe_disabled = ('This recipe has been disabled as TIME no longer'
|
||||||
' publish complete articles on the web.')
|
# ' publish complete articles on the web.')
|
||||||
title = u'Time'
|
title = u'Time'
|
||||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
description = 'Weekly magazine'
|
description = 'Weekly magazine'
|
||||||
|
53
recipes/todays_zaman.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TodaysZaman_en(BasicNewsRecipe):
|
||||||
|
title = u'Todays Zaman'
|
||||||
|
__author__ = u'thomass'
|
||||||
|
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed =100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
#use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
#publisher = ' '
|
||||||
|
category = 'news, haberler,TR,gazete'
|
||||||
|
language = 'en_TR'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
|
||||||
|
keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']}),dict(name='span', attrs={'class':['left-date','detailDate','detailCName']}),dict(name='td', attrs={'id':['newsSpot','newsText']})] #resim ekleme: ,dict(name='div', attrs={'id':['gallery','detailDate',]})
|
||||||
|
|
||||||
|
remove_attributes = ['aria-describedby']
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':['/images/icon_print.gif','http://gmodules.com/ig/images/plus_google.gif','/images/template/jazz/agenda/i1.jpg', 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':[ 'interactive-hr']}),dict(name='div', attrs={'class':[ 'empty_height_18','empty_height_9']}) ,dict(name='td', attrs={'id':[ 'superTitle']}),dict(name='span', attrs={'class':[ 't-count enabled t-count-focus']}),dict(name='a', attrs={'id':[ 'count']}),dict(name='td', attrs={'class':[ 'left-date']}) ]
|
||||||
|
cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
|
||||||
|
masthead_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
# remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Home', u'http://www.todayszaman.com/rss?sectionId=0'),
|
||||||
|
( u'News', u'http://www.todayszaman.com/rss?sectionId=100'),
|
||||||
|
( u'Business', u'http://www.todayszaman.com/rss?sectionId=105'),
|
||||||
|
( u'Interviews', u'http://www.todayszaman.com/rss?sectionId=8'),
|
||||||
|
( u'Columnists', u'http://www.todayszaman.com/rss?sectionId=6'),
|
||||||
|
( u'Op-Ed', u'http://www.todayszaman.com/rss?sectionId=109'),
|
||||||
|
( u'Arts & Culture', u'http://www.todayszaman.com/rss?sectionId=110'),
|
||||||
|
( u'Expat Zone', u'http://www.todayszaman.com/rss?sectionId=132'),
|
||||||
|
( u'Sports', u'http://www.todayszaman.com/rss?sectionId=5'),
|
||||||
|
( u'Features', u'http://www.todayszaman.com/rss?sectionId=116'),
|
||||||
|
( u'Travel', u'http://www.todayszaman.com/rss?sectionId=117'),
|
||||||
|
( u'Leisure', u'http://www.todayszaman.com/rss?sectionId=118'),
|
||||||
|
( u'Weird But True', u'http://www.todayszaman.com/rss?sectionId=134'),
|
||||||
|
( u'Life', u'http://www.todayszaman.com/rss?sectionId=133'),
|
||||||
|
( u'Health', u'http://www.todayszaman.com/rss?sectionId=126'),
|
||||||
|
( u'Press Review', u'http://www.todayszaman.com/rss?sectionId=130'),
|
||||||
|
( u'Todays think tanks', u'http://www.todayszaman.com/rss?sectionId=159'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
#def preprocess_html(self, soup):
|
||||||
|
# return self.adeify_images(soup)
|
||||||
|
#def print_version(self, url): #there is a probem caused by table format
|
||||||
|
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')
|
||||||
|
|
25
recipes/words_without_borders.recipe
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#recipe created by sexymax15.....sexymax15@gmail.com
|
||||||
|
#Words without Borders recipe
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1308302002(BasicNewsRecipe):
|
||||||
|
title = u'Words Without Borders'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'sexymax15'
|
||||||
|
oldest_article = 90
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
keep_only_tags = {'class':'span-14 article'}
|
||||||
|
remove_tags_after = [{'class':'addthis_toolbox addthis_default_style no_print'}]
|
||||||
|
remove_tags = [{'class':['posterous_quote_citation','button']}]
|
||||||
|
extra_css = """
|
||||||
|
h1{font-family: Georgia,serif; font-size: large}h2{font-family: Georgia,serif; font-size: large} """
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'wwb', u'http://feeds.feedburner.com/wwborders?format=xml')]
|
@ -2,90 +2,92 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, matek09, matek09@gmail.com'
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
|
||||||
class Wprost(BasicNewsRecipe):
|
class Wprost(BasicNewsRecipe):
|
||||||
EDITION = 0
|
EDITION = 0
|
||||||
FIND_LAST_FULL_ISSUE = True
|
FIND_LAST_FULL_ISSUE = True
|
||||||
EXCLUDE_LOCKED = True
|
EXCLUDE_LOCKED = True
|
||||||
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
|
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
|
||||||
|
|
||||||
title = u'Wprost'
|
title = u'Wprost'
|
||||||
__author__ = 'matek09'
|
__author__ = 'matek09'
|
||||||
description = 'Weekly magazine'
|
description = 'Weekly magazine'
|
||||||
encoding = 'ISO-8859-2'
|
encoding = 'ISO-8859-2'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||||
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||||
|
|
||||||
'''keep_only_tags =[]
|
'''keep_only_tags =[]
|
||||||
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
|
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
||||||
(re.compile(r'display: block;'), lambda match: '')]
|
(re.compile(r'display: block;'), lambda match: ''),
|
||||||
|
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<tr>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<td .*?\>'), lambda match: '')]
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
||||||
|
|
||||||
remove_tags =[]
|
extra_css = '''
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
.div-header {font-size: x-small; font-weight: bold}
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
'''
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.div-header {font-size: x-small; font-weight: bold}
|
|
||||||
'''
|
|
||||||
#h2 {font-size: x-large; font-weight: bold}
|
#h2 {font-size: x-large; font-weight: bold}
|
||||||
def is_blocked(self, a):
|
def is_blocked(self, a):
|
||||||
if a.findNextSibling('img') is None:
|
if a.findNextSibling('img') is None:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_last_issue(self):
|
def find_last_issue(self):
|
||||||
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
|
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
|
||||||
a = 0
|
a = 0
|
||||||
if self.FIND_LAST_FULL_ISSUE:
|
if self.FIND_LAST_FULL_ISSUE:
|
||||||
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
|
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
|
||||||
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
||||||
else:
|
else:
|
||||||
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
||||||
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
|
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
|
||||||
self.cover_url = a.img['src']
|
self.cover_url = a.img['src']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
self.find_last_issue()
|
self.find_last_issue()
|
||||||
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
|
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
|
||||||
feeds = []
|
feeds = []
|
||||||
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
|
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
|
||||||
articles = list(self.find_articles(main_block))
|
articles = list(self.find_articles(main_block))
|
||||||
if len(articles) > 0:
|
if len(articles) > 0:
|
||||||
section = self.tag_to_string(main_block)
|
section = self.tag_to_string(main_block)
|
||||||
feeds.append((section, articles))
|
feeds.append((section, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def find_articles(self, main_block):
|
|
||||||
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
|
|
||||||
if a.name in "td":
|
|
||||||
break
|
|
||||||
if self.EXCLUDE_LOCKED & self.is_blocked(a):
|
|
||||||
continue
|
|
||||||
yield {
|
|
||||||
'title' : self.tag_to_string(a),
|
|
||||||
'url' : 'http://www.wprost.pl' + a['href'],
|
|
||||||
'date' : '',
|
|
||||||
'description' : ''
|
|
||||||
}
|
|
||||||
|
|
||||||
|
def find_articles(self, main_block):
|
||||||
|
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
|
||||||
|
if a.name in "td":
|
||||||
|
break
|
||||||
|
if self.EXCLUDE_LOCKED & self.is_blocked(a):
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'title' : self.tag_to_string(a),
|
||||||
|
'url' : 'http://www.wprost.pl' + a['href'],
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
res = br.submit()
|
res = br.submit()
|
||||||
raw = res.read()
|
raw = res.read()
|
||||||
if 'Welcome,' not in raw:
|
if 'Welcome,' not in raw and '>Logout<' not in raw:
|
||||||
raise ValueError('Failed to log in to wsj.com, check your '
|
raise ValueError('Failed to log in to wsj.com, check your '
|
||||||
'username and password')
|
'username and password')
|
||||||
return br
|
return br
|
||||||
|
@ -1,20 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ZamanRecipe(BasicNewsRecipe):
|
class Zaman (BasicNewsRecipe):
|
||||||
title = u'Zaman'
|
|
||||||
__author__ = u'Deniz Og\xfcz'
|
|
||||||
language = 'tr'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
|
|
||||||
cover_url = 'http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif'
|
title = u'ZAMAN Gazetesi'
|
||||||
feeds = [(u'Gundem', u'http://www.zaman.com.tr/gundem.rss'),
|
__author__ = u'thomass'
|
||||||
(u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
oldest_article = 2
|
||||||
(u'Spor', u'http://www.zaman.com.tr/spor.rss'),
|
max_articles_per_feed =100
|
||||||
(u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
|
# no_stylesheets = True
|
||||||
(u'Politika', u'http://www.zaman.com.tr/politika.rss'),
|
#delay = 1
|
||||||
(u'D\u0131\u015f Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
|
#use_embedded_content = False
|
||||||
(u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),]
|
encoding = 'ISO 8859-9'
|
||||||
|
publisher = 'Zaman'
|
||||||
|
category = 'news, haberler,TR,gazete'
|
||||||
|
language = 'tr'
|
||||||
|
publication_type = 'newspaper '
|
||||||
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
conversion_options = {
|
||||||
|
'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': False
|
||||||
|
}
|
||||||
|
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
|
||||||
|
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('www.zaman.com.tr/haber.do?', 'www.zaman.com.tr/yazdir.do?')
|
keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||||
|
remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
|
||||||
|
|
||||||
|
|
||||||
|
#remove_attributes = ['width','height']
|
||||||
|
remove_empty_feeds= True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
|
||||||
|
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
||||||
|
( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
|
||||||
|
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
|
||||||
|
( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
|
||||||
|
( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
|
||||||
|
( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
|
||||||
|
( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
|
||||||
|
( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
|
||||||
|
( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
|
||||||
|
( u'Spor', u'http://www.zaman.com.tr/spor.rss'),
|
||||||
|
( u'Kürsü', u'http://www.zaman.com.tr/kursu.rss'),
|
||||||
|
( u'Kültür Sanat', u'http://www.zaman.com.tr/kultursanat.rss'),
|
||||||
|
( u'Televizyon', u'http://www.zaman.com.tr/televizyon.rss'),
|
||||||
|
( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
|
||||||
|
|
||||||
|
|
||||||
|
]
|
||||||
|
@ -82,7 +82,7 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_feeds(self):
|
def parse_feeds(self):
|
||||||
self.log_debug(_('ZAOBAO overrided parse_feeds()'))
|
self.log(_('ZAOBAO overrided parse_feeds()'))
|
||||||
parsed_feeds = BasicNewsRecipe.parse_feeds(self)
|
parsed_feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
for id, obj in enumerate(self.INDEXES):
|
for id, obj in enumerate(self.INDEXES):
|
||||||
@ -99,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
a_title = self.tag_to_string(a)
|
a_title = self.tag_to_string(a)
|
||||||
date = ''
|
date = ''
|
||||||
description = ''
|
description = ''
|
||||||
self.log_debug(_('adding %s at %s')%(a_title,a_url))
|
self.log(_('adding %s at %s')%(a_title,a_url))
|
||||||
articles.append({
|
articles.append({
|
||||||
'title':a_title,
|
'title':a_title,
|
||||||
'date':date,
|
'date':date,
|
||||||
@ -110,23 +110,23 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
||||||
max_articles_per_feed=self.max_articles_per_feed)
|
max_articles_per_feed=self.max_articles_per_feed)
|
||||||
|
|
||||||
self.log_debug(_('adding %s to feed')%(title))
|
self.log(_('adding %s to feed')%(title))
|
||||||
for feed in pfeeds:
|
for feed in pfeeds:
|
||||||
self.log_debug(_('adding feed: %s')%(feed.title))
|
self.log(_('adding feed: %s')%(feed.title))
|
||||||
feed.description = self.DESC_SENSE
|
feed.description = self.DESC_SENSE
|
||||||
parsed_feeds.append(feed)
|
parsed_feeds.append(feed)
|
||||||
for a, article in enumerate(feed):
|
for a, article in enumerate(feed):
|
||||||
self.log_debug(_('added article %s from %s')%(article.title, article.url))
|
self.log(_('added article %s from %s')%(article.title, article.url))
|
||||||
self.log_debug(_('added feed %s')%(feed.title))
|
self.log(_('added feed %s')%(feed.title))
|
||||||
|
|
||||||
for i, feed in enumerate(parsed_feeds):
|
for i, feed in enumerate(parsed_feeds):
|
||||||
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
|
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
|
||||||
weired_encoding_detected = False
|
weired_encoding_detected = False
|
||||||
if not isinstance(feed.description, unicode) and self.encoding and feed.description:
|
if not isinstance(feed.description, unicode) and self.encoding and feed.description:
|
||||||
self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
|
self.log(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
|
||||||
feed.description = feed.description.decode(self.encoding, 'replace')
|
feed.description = feed.description.decode(self.encoding, 'replace')
|
||||||
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
|
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
|
||||||
self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
|
self.log(_('Feed %s is weired encoded, manually redo all')%(feed.title))
|
||||||
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
||||||
weired_encoding_detected = True
|
weired_encoding_detected = True
|
||||||
|
|
||||||
@ -148,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
|
||||||
|
|
||||||
if article.title == "Untitled article":
|
if article.title == "Untitled article":
|
||||||
self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
|
self.log(_('Removing empty article %s from %s')%(article.title, article.url))
|
||||||
# remove the article
|
# remove the article
|
||||||
feed.articles[a:a+1] = []
|
feed.articles[a:a+1] = []
|
||||||
return parsed_feeds
|
return parsed_feeds
|
||||||
|
@ -20,8 +20,8 @@
|
|||||||
<script type="text/javascript"
|
<script type="text/javascript"
|
||||||
src="{prefix}/static/jquery.multiselect.min.js"></script>
|
src="{prefix}/static/jquery.multiselect.min.js"></script>
|
||||||
|
|
||||||
|
<script type="text/javascript" src="{prefix}/static/stacktrace.js"></script>
|
||||||
<script type="text/javascript" src="{prefix}/static/browse/browse.js"></script>
|
<script type="text/javascript" src="{prefix}/static/browse/browse.js"></script>
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
var sort_cookie_name = "{sort_cookie_name}";
|
var sort_cookie_name = "{sort_cookie_name}";
|
||||||
|
@ -129,7 +129,13 @@ function toplevel() {
|
|||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
function render_error(msg) {
|
function render_error(msg) {
|
||||||
return '<div class="ui-widget"><div class="ui-state-error ui-corner-all" style="padding: 0pt 0.7em"><p><span class="ui-icon ui-icon-alert" style="float: left; margin-right: 0.3em"> </span><strong>Error: </strong>'+msg+"</p></div></div>"
|
var st = "";
|
||||||
|
try {
|
||||||
|
var st = printStackTrace();
|
||||||
|
st = st.join('\n\n');
|
||||||
|
} catch(e) {
|
||||||
|
}
|
||||||
|
return '<div class="ui-widget"><div class="ui-state-error ui-corner-all" style="padding: 0pt 0.7em"><p><span class="ui-icon ui-icon-alert" style="float: left; margin-right: 0.3em"> </span><strong>Error: </strong>'+msg+"<pre>"+st+"</pre></p></div></div>"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Category feed {{{
|
// Category feed {{{
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
Monocle = {
|
Monocle = {
|
||||||
VERSION: "1.0.0"
|
VERSION: "2.0.0"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -170,7 +170,8 @@ Monocle.Browser.has.iframeTouchBug = Monocle.Browser.iOSVersionBelow("4.2");
|
|||||||
Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2");
|
Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2");
|
||||||
|
|
||||||
Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari;
|
Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari;
|
||||||
Monocle.Browser.has.iframeDoubleWidthBug = Monocle.Browser.has.mustScrollSheaf;
|
Monocle.Browser.has.iframeDoubleWidthBug =
|
||||||
|
Monocle.Browser.has.mustScrollSheaf || Monocle.Browser.on.Kindle3;
|
||||||
|
|
||||||
Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit;
|
Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit;
|
||||||
|
|
||||||
@ -181,6 +182,11 @@ Monocle.Browser.has.jumpFlickerBug =
|
|||||||
Monocle.Browser.on.MacOSX && Monocle.Browser.is.WebKit;
|
Monocle.Browser.on.MacOSX && Monocle.Browser.is.WebKit;
|
||||||
|
|
||||||
|
|
||||||
|
Monocle.Browser.has.columnOverflowPaintBug = Monocle.Browser.is.WebKit &&
|
||||||
|
!Monocle.Browser.is.MobileSafari &&
|
||||||
|
navigator.userAgent.indexOf("AppleWebKit/534") > 0;
|
||||||
|
|
||||||
|
|
||||||
if (typeof window.console == "undefined") {
|
if (typeof window.console == "undefined") {
|
||||||
window.console = {
|
window.console = {
|
||||||
messages: [],
|
messages: [],
|
||||||
@ -241,6 +247,7 @@ Monocle.Factory = function (element, label, index, reader) {
|
|||||||
|
|
||||||
|
|
||||||
function initialize() {
|
function initialize() {
|
||||||
|
if (!p.label) { return; }
|
||||||
var node = p.reader.properties.graph;
|
var node = p.reader.properties.graph;
|
||||||
node[p.label] = node[p.label] || [];
|
node[p.label] = node[p.label] || [];
|
||||||
if (typeof p.index == 'undefined' && node[p.label][p.index]) {
|
if (typeof p.index == 'undefined' && node[p.label][p.index]) {
|
||||||
@ -274,7 +281,11 @@ Monocle.Factory = function (element, label, index, reader) {
|
|||||||
|
|
||||||
function make(tagName, oLabel, index_or_options, or_options) {
|
function make(tagName, oLabel, index_or_options, or_options) {
|
||||||
var oIndex, options;
|
var oIndex, options;
|
||||||
if (arguments.length == 2) {
|
if (arguments.length == 1) {
|
||||||
|
oLabel = null,
|
||||||
|
oIndex = 0;
|
||||||
|
options = {};
|
||||||
|
} else if (arguments.length == 2) {
|
||||||
oIndex = 0;
|
oIndex = 0;
|
||||||
options = {};
|
options = {};
|
||||||
} else if (arguments.length == 4) {
|
} else if (arguments.length == 4) {
|
||||||
@ -376,6 +387,22 @@ Monocle.pieceLoaded('factory');
|
|||||||
Monocle.Events = {}
|
Monocle.Events = {}
|
||||||
|
|
||||||
|
|
||||||
|
Monocle.Events.dispatch = function (elem, evtType, data, cancelable) {
|
||||||
|
if (!document.createEvent) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
var evt = document.createEvent("Events");
|
||||||
|
evt.initEvent(evtType, false, cancelable || false);
|
||||||
|
evt.m = data;
|
||||||
|
try {
|
||||||
|
return elem.dispatchEvent(evt);
|
||||||
|
} catch(e) {
|
||||||
|
console.warn("Failed to dispatch event: "+evtType);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Monocle.Events.listen = function (elem, evtType, fn, useCapture) {
|
Monocle.Events.listen = function (elem, evtType, fn, useCapture) {
|
||||||
if (elem.addEventListener) {
|
if (elem.addEventListener) {
|
||||||
return elem.addEventListener(evtType, fn, useCapture || false);
|
return elem.addEventListener(evtType, fn, useCapture || false);
|
||||||
@ -405,7 +432,7 @@ Monocle.Events.listenForContact = function (elem, fns, options) {
|
|||||||
pageY: ci.pageY
|
pageY: ci.pageY
|
||||||
};
|
};
|
||||||
|
|
||||||
var target = evt.target || window.srcElement;
|
var target = evt.target || evt.srcElement;
|
||||||
while (target.nodeType != 1 && target.parentNode) {
|
while (target.nodeType != 1 && target.parentNode) {
|
||||||
target = target.parentNode;
|
target = target.parentNode;
|
||||||
}
|
}
|
||||||
@ -527,13 +554,18 @@ Monocle.Events.deafenForContact = function (elem, listeners) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Monocle.Events.listenForTap = function (elem, fn) {
|
Monocle.Events.listenForTap = function (elem, fn, activeClass) {
|
||||||
var startPos;
|
var startPos;
|
||||||
|
|
||||||
if (Monocle.Browser.on.Kindle3) {
|
if (Monocle.Browser.on.Kindle3) {
|
||||||
Monocle.Events.listen(elem, 'click', function () {});
|
Monocle.Events.listen(elem, 'click', function () {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var annul = function () {
|
||||||
|
startPos = null;
|
||||||
|
if (activeClass && elem.dom) { elem.dom.removeClass(activeClass); }
|
||||||
|
}
|
||||||
|
|
||||||
var annulIfOutOfBounds = function (evt) {
|
var annulIfOutOfBounds = function (evt) {
|
||||||
if (evt.type.match(/^mouse/)) {
|
if (evt.type.match(/^mouse/)) {
|
||||||
return;
|
return;
|
||||||
@ -545,7 +577,7 @@ Monocle.Events.listenForTap = function (elem, fn) {
|
|||||||
evt.m.registrantX < 0 || evt.m.registrantX > elem.offsetWidth ||
|
evt.m.registrantX < 0 || evt.m.registrantX > elem.offsetWidth ||
|
||||||
evt.m.registrantY < 0 || evt.m.registrantY > elem.offsetHeight
|
evt.m.registrantY < 0 || evt.m.registrantY > elem.offsetHeight
|
||||||
) {
|
) {
|
||||||
startPos = null;
|
annul();
|
||||||
} else {
|
} else {
|
||||||
evt.preventDefault();
|
evt.preventDefault();
|
||||||
}
|
}
|
||||||
@ -557,6 +589,7 @@ Monocle.Events.listenForTap = function (elem, fn) {
|
|||||||
start: function (evt) {
|
start: function (evt) {
|
||||||
startPos = [evt.m.pageX, evt.m.pageY];
|
startPos = [evt.m.pageX, evt.m.pageY];
|
||||||
evt.preventDefault();
|
evt.preventDefault();
|
||||||
|
if (activeClass && elem.dom) { elem.dom.addClass(activeClass); }
|
||||||
},
|
},
|
||||||
move: annulIfOutOfBounds,
|
move: annulIfOutOfBounds,
|
||||||
end: function (evt) {
|
end: function (evt) {
|
||||||
@ -565,10 +598,9 @@ Monocle.Events.listenForTap = function (elem, fn) {
|
|||||||
evt.m.startOffset = startPos;
|
evt.m.startOffset = startPos;
|
||||||
fn(evt);
|
fn(evt);
|
||||||
}
|
}
|
||||||
|
annul();
|
||||||
},
|
},
|
||||||
cancel: function (evt) {
|
cancel: annul
|
||||||
startPos = null;
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
useCapture: false
|
useCapture: false
|
||||||
@ -997,6 +1029,9 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
createReaderElements();
|
createReaderElements();
|
||||||
|
|
||||||
p.defaultStyles = addPageStyles(k.DEFAULT_STYLE_RULES, false);
|
p.defaultStyles = addPageStyles(k.DEFAULT_STYLE_RULES, false);
|
||||||
|
if (options.stylesheet) {
|
||||||
|
p.initialStyles = addPageStyles(options.stylesheet, false);
|
||||||
|
}
|
||||||
|
|
||||||
primeFrames(options.primeURL, function () {
|
primeFrames(options.primeURL, function () {
|
||||||
applyStyles();
|
applyStyles();
|
||||||
@ -1077,6 +1112,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
if (Monocle.Browser.is.WebKit) {
|
if (Monocle.Browser.is.WebKit) {
|
||||||
frame.contentDocument.documentElement.style.overflow = "hidden";
|
frame.contentDocument.documentElement.style.overflow = "hidden";
|
||||||
}
|
}
|
||||||
|
dispatchEvent('monocle:frameprimed', { frame: frame, pageIndex: pageCount });
|
||||||
if ((pageCount += 1) == pageMax) {
|
if ((pageCount += 1) == pageMax) {
|
||||||
Monocle.defer(callback);
|
Monocle.defer(callback);
|
||||||
}
|
}
|
||||||
@ -1131,6 +1167,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
var pageCount = 0;
|
var pageCount = 0;
|
||||||
if (typeof callback == 'function') {
|
if (typeof callback == 'function') {
|
||||||
var watcher = function (evt) {
|
var watcher = function (evt) {
|
||||||
|
dispatchEvent('monocle:firstcomponentchange', evt.m);
|
||||||
if ((pageCount += 1) == p.flipper.pageCount) {
|
if ((pageCount += 1) == p.flipper.pageCount) {
|
||||||
deafen('monocle:componentchange', watcher);
|
deafen('monocle:componentchange', watcher);
|
||||||
callback();
|
callback();
|
||||||
@ -1239,7 +1276,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
page.appendChild(runner);
|
page.appendChild(runner);
|
||||||
ctrlData.elements.push(runner);
|
ctrlData.elements.push(runner);
|
||||||
}
|
}
|
||||||
} else if (cType == "modal" || cType == "popover") {
|
} else if (cType == "modal" || cType == "popover" || cType == "hud") {
|
||||||
ctrlElem = ctrl.createControlElements(overlay);
|
ctrlElem = ctrl.createControlElements(overlay);
|
||||||
overlay.appendChild(ctrlElem);
|
overlay.appendChild(ctrlElem);
|
||||||
ctrlData.elements.push(ctrlElem);
|
ctrlData.elements.push(ctrlElem);
|
||||||
@ -1312,24 +1349,33 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
var controlData = dataForControl(ctrl);
|
var controlData = dataForControl(ctrl);
|
||||||
if (!controlData) {
|
if (!controlData) {
|
||||||
console.warn("No data for control: " + ctrl);
|
console.warn("No data for control: " + ctrl);
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
if (controlData.hidden == false) {
|
|
||||||
return;
|
if (showingControl(ctrl)) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var overlay = dom.find('overlay');
|
||||||
|
if (controlData.usesOverlay && controlData.controlType != "hud") {
|
||||||
|
for (var i = 0, ii = p.controls.length; i < ii; ++i) {
|
||||||
|
if (p.controls[i].usesOverlay && !p.controls[i].hidden) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
overlay.style.display = "block";
|
||||||
|
}
|
||||||
|
|
||||||
for (var i = 0; i < controlData.elements.length; ++i) {
|
for (var i = 0; i < controlData.elements.length; ++i) {
|
||||||
controlData.elements[i].style.display = "block";
|
controlData.elements[i].style.display = "block";
|
||||||
}
|
}
|
||||||
var overlay = dom.find('overlay');
|
|
||||||
if (controlData.usesOverlay) {
|
|
||||||
overlay.style.display = "block";
|
|
||||||
}
|
|
||||||
if (controlData.controlType == "popover") {
|
if (controlData.controlType == "popover") {
|
||||||
overlay.listeners = Monocle.Events.listenForContact(
|
overlay.listeners = Monocle.Events.listenForContact(
|
||||||
overlay,
|
overlay,
|
||||||
{
|
{
|
||||||
start: function (evt) {
|
start: function (evt) {
|
||||||
obj = evt.target || window.event.srcElement;
|
var obj = evt.target || window.event.srcElement;
|
||||||
do {
|
do {
|
||||||
if (obj == controlData.elements[0]) { return true; }
|
if (obj == controlData.elements[0]) { return true; }
|
||||||
} while (obj && (obj = obj.parentNode));
|
} while (obj && (obj = obj.parentNode));
|
||||||
@ -1346,22 +1392,18 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
ctrl.properties.hidden = false;
|
ctrl.properties.hidden = false;
|
||||||
}
|
}
|
||||||
dispatchEvent('controlshow', ctrl, false);
|
dispatchEvent('controlshow', ctrl, false);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function showingControl(ctrl) {
|
||||||
|
var controlData = dataForControl(ctrl);
|
||||||
|
return controlData.hidden == false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function dispatchEvent(evtType, data, cancelable) {
|
function dispatchEvent(evtType, data, cancelable) {
|
||||||
if (!document.createEvent) {
|
return Monocle.Events.dispatch(dom.find('box'), evtType, data, cancelable);
|
||||||
return true;
|
|
||||||
}
|
|
||||||
var evt = document.createEvent("Events");
|
|
||||||
evt.initEvent(evtType, false, cancelable || false);
|
|
||||||
evt.m = data;
|
|
||||||
try {
|
|
||||||
return dom.find('box').dispatchEvent(evt);
|
|
||||||
} catch(e) {
|
|
||||||
console.warn("Failed to dispatch event: " + evtType);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1502,6 +1544,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
|||||||
API.addControl = addControl;
|
API.addControl = addControl;
|
||||||
API.hideControl = hideControl;
|
API.hideControl = hideControl;
|
||||||
API.showControl = showControl;
|
API.showControl = showControl;
|
||||||
|
API.showingControl = showingControl;
|
||||||
API.dispatchEvent = dispatchEvent;
|
API.dispatchEvent = dispatchEvent;
|
||||||
API.listen = listen;
|
API.listen = listen;
|
||||||
API.deafen = deafen;
|
API.deafen = deafen;
|
||||||
@ -1527,22 +1570,32 @@ Monocle.Reader.DEFAULT_CLASS_PREFIX = 'monelem_'
|
|||||||
Monocle.Reader.FLIPPER_DEFAULT_CLASS = "Slider";
|
Monocle.Reader.FLIPPER_DEFAULT_CLASS = "Slider";
|
||||||
Monocle.Reader.FLIPPER_LEGACY_CLASS = "Legacy";
|
Monocle.Reader.FLIPPER_LEGACY_CLASS = "Legacy";
|
||||||
Monocle.Reader.DEFAULT_STYLE_RULES = [
|
Monocle.Reader.DEFAULT_STYLE_RULES = [
|
||||||
"html * {" +
|
"html#RS\\:monocle * {" +
|
||||||
|
"-webkit-font-smoothing: subpixel-antialiased;" +
|
||||||
"text-rendering: auto !important;" +
|
"text-rendering: auto !important;" +
|
||||||
"word-wrap: break-word !important;" +
|
"word-wrap: break-word !important;" +
|
||||||
|
"overflow: visible !important;" +
|
||||||
(Monocle.Browser.has.floatColumnBug ? "float: none !important;" : "") +
|
(Monocle.Browser.has.floatColumnBug ? "float: none !important;" : "") +
|
||||||
"}" +
|
"}",
|
||||||
"body {" +
|
"html#RS\\:monocle body {" +
|
||||||
"margin: 0 !important;" +
|
"margin: 0 !important;" +
|
||||||
"padding: 0 !important;" +
|
"padding: 0 !important;" +
|
||||||
"-webkit-text-size-adjust: none;" +
|
"-webkit-text-size-adjust: none;" +
|
||||||
"}" +
|
"}",
|
||||||
"table, img {" +
|
"html#RS\\:monocle body * {" +
|
||||||
"max-width: 100% !important;" +
|
"max-width: 100% !important;" +
|
||||||
"max-height: 90% !important;" +
|
"}",
|
||||||
|
"html#RS\\:monocle img, html#RS\\:monocle video, html#RS\\:monocle object {" +
|
||||||
|
"max-height: 95% !important;" +
|
||||||
"}"
|
"}"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if (Monocle.Browser.has.columnOverflowPaintBug) {
|
||||||
|
Monocle.Reader.DEFAULT_STYLE_RULES.push(
|
||||||
|
"::-webkit-scrollbar { width: 0; height: 0; }"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Monocle.pieceLoaded('reader');
|
Monocle.pieceLoaded('reader');
|
||||||
/* BOOK */
|
/* BOOK */
|
||||||
@ -1586,6 +1639,16 @@ Monocle.Book = function (dataSource) {
|
|||||||
locus.load = true;
|
locus.load = true;
|
||||||
locus.componentId = p.componentIds[0];
|
locus.componentId = p.componentIds[0];
|
||||||
return locus;
|
return locus;
|
||||||
|
} else if (
|
||||||
|
cIndex < 0 &&
|
||||||
|
locus.componentId &&
|
||||||
|
currComponent.properties.id != locus.componentId
|
||||||
|
) {
|
||||||
|
pageDiv.m.reader.dispatchEvent(
|
||||||
|
"monocle:notfound",
|
||||||
|
{ href: locus.componentId }
|
||||||
|
);
|
||||||
|
return null;
|
||||||
} else if (cIndex < 0) {
|
} else if (cIndex < 0) {
|
||||||
component = currComponent;
|
component = currComponent;
|
||||||
locus.componentId = pageDiv.m.activeFrame.m.component.properties.id;
|
locus.componentId = pageDiv.m.activeFrame.m.component.properties.id;
|
||||||
@ -1619,6 +1682,8 @@ Monocle.Book = function (dataSource) {
|
|||||||
result.page += locus.direction;
|
result.page += locus.direction;
|
||||||
} else if (typeof(locus.anchor) == "string") {
|
} else if (typeof(locus.anchor) == "string") {
|
||||||
result.page = component.pageForChapter(locus.anchor, pageDiv);
|
result.page = component.pageForChapter(locus.anchor, pageDiv);
|
||||||
|
} else if (typeof(locus.xpath) == "string") {
|
||||||
|
result.page = component.pageForXPath(locus.xpath, pageDiv);
|
||||||
} else if (typeof(locus.position) == "string") {
|
} else if (typeof(locus.position) == "string") {
|
||||||
if (locus.position == "start") {
|
if (locus.position == "start") {
|
||||||
result.page = 1;
|
result.page = 1;
|
||||||
@ -1638,6 +1703,7 @@ Monocle.Book = function (dataSource) {
|
|||||||
if (result.page < 1) {
|
if (result.page < 1) {
|
||||||
if (cIndex == 0) {
|
if (cIndex == 0) {
|
||||||
result.page = 1;
|
result.page = 1;
|
||||||
|
result.boundarystart = true;
|
||||||
} else {
|
} else {
|
||||||
result.load = true;
|
result.load = true;
|
||||||
result.componentId = p.componentIds[cIndex - 1];
|
result.componentId = p.componentIds[cIndex - 1];
|
||||||
@ -1647,6 +1713,7 @@ Monocle.Book = function (dataSource) {
|
|||||||
} else if (result.page > lastPageNum['new']) {
|
} else if (result.page > lastPageNum['new']) {
|
||||||
if (cIndex == p.lastCIndex) {
|
if (cIndex == p.lastCIndex) {
|
||||||
result.page = lastPageNum['new'];
|
result.page = lastPageNum['new'];
|
||||||
|
result.boundaryend = true;
|
||||||
} else {
|
} else {
|
||||||
result.load = true;
|
result.load = true;
|
||||||
result.componentId = p.componentIds[cIndex + 1];
|
result.componentId = p.componentIds[cIndex + 1];
|
||||||
@ -1660,18 +1727,25 @@ Monocle.Book = function (dataSource) {
|
|||||||
|
|
||||||
function setPageAt(pageDiv, locus) {
|
function setPageAt(pageDiv, locus) {
|
||||||
locus = pageNumberAt(pageDiv, locus);
|
locus = pageNumberAt(pageDiv, locus);
|
||||||
if (!locus.load) {
|
if (locus && !locus.load) {
|
||||||
var component = p.components[p.componentIds.indexOf(locus.componentId)];
|
var evtData = { locus: locus, page: pageDiv }
|
||||||
pageDiv.m.place = pageDiv.m.place || new Monocle.Place();
|
if (locus.boundarystart) {
|
||||||
pageDiv.m.place.setPlace(component, locus.page);
|
pageDiv.m.reader.dispatchEvent('monocle:boundarystart', evtData);
|
||||||
|
} else if (locus.boundaryend) {
|
||||||
|
pageDiv.m.reader.dispatchEvent('monocle:boundaryend', evtData);
|
||||||
|
} else {
|
||||||
|
var component = p.components[p.componentIds.indexOf(locus.componentId)];
|
||||||
|
pageDiv.m.place = pageDiv.m.place || new Monocle.Place();
|
||||||
|
pageDiv.m.place.setPlace(component, locus.page);
|
||||||
|
|
||||||
var evtData = {
|
var evtData = {
|
||||||
page: pageDiv,
|
page: pageDiv,
|
||||||
locus: locus,
|
locus: locus,
|
||||||
pageNumber: pageDiv.m.place.pageNumber(),
|
pageNumber: pageDiv.m.place.pageNumber(),
|
||||||
componentId: locus.componentId
|
componentId: locus.componentId
|
||||||
|
}
|
||||||
|
pageDiv.m.reader.dispatchEvent("monocle:pagechange", evtData);
|
||||||
}
|
}
|
||||||
pageDiv.m.reader.dispatchEvent("monocle:pagechange", evtData);
|
|
||||||
}
|
}
|
||||||
return locus;
|
return locus;
|
||||||
}
|
}
|
||||||
@ -1683,6 +1757,10 @@ Monocle.Book = function (dataSource) {
|
|||||||
locus = pageNumberAt(pageDiv, locus);
|
locus = pageNumberAt(pageDiv, locus);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!locus) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!locus.load) {
|
if (!locus.load) {
|
||||||
callback(locus);
|
callback(locus);
|
||||||
return;
|
return;
|
||||||
@ -1690,7 +1768,9 @@ Monocle.Book = function (dataSource) {
|
|||||||
|
|
||||||
var findPageNumber = function () {
|
var findPageNumber = function () {
|
||||||
locus = setPageAt(pageDiv, locus);
|
locus = setPageAt(pageDiv, locus);
|
||||||
if (locus.load) {
|
if (!locus) {
|
||||||
|
return;
|
||||||
|
} else if (locus.load) {
|
||||||
loadPageAt(pageDiv, locus, callback, progressCallback)
|
loadPageAt(pageDiv, locus, callback, progressCallback)
|
||||||
} else {
|
} else {
|
||||||
callback(locus);
|
callback(locus);
|
||||||
@ -1715,10 +1795,12 @@ Monocle.Book = function (dataSource) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function setOrLoadPageAt(pageDiv, locus, callback, progressCallback) {
|
function setOrLoadPageAt(pageDiv, locus, callback, onProgress, onFail) {
|
||||||
locus = setPageAt(pageDiv, locus);
|
locus = setPageAt(pageDiv, locus);
|
||||||
if (locus.load) {
|
if (!locus) {
|
||||||
loadPageAt(pageDiv, locus, callback, progressCallback);
|
if (onFail) { onFail(); }
|
||||||
|
} else if (locus.load) {
|
||||||
|
loadPageAt(pageDiv, locus, callback, onProgress);
|
||||||
} else {
|
} else {
|
||||||
callback(locus);
|
callback(locus);
|
||||||
}
|
}
|
||||||
@ -1864,13 +1946,18 @@ Monocle.Place = function () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function percentageThrough() {
|
function percentAtTopOfPage() {
|
||||||
|
return p.percent - 1.0 / p.component.lastPageNumber();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function percentAtBottomOfPage() {
|
||||||
return p.percent;
|
return p.percent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function pageAtPercentageThrough(pc) {
|
function pageAtPercentageThrough(percent) {
|
||||||
return Math.max(Math.round(p.component.lastPageNumber() * pc), 1);
|
return Math.max(Math.round(p.component.lastPageNumber() * percent), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1911,6 +1998,8 @@ Monocle.Place = function () {
|
|||||||
}
|
}
|
||||||
if (options.direction) {
|
if (options.direction) {
|
||||||
locus.page += options.direction;
|
locus.page += options.direction;
|
||||||
|
} else {
|
||||||
|
locus.percent = percentAtBottomOfPage();
|
||||||
}
|
}
|
||||||
return locus;
|
return locus;
|
||||||
}
|
}
|
||||||
@ -1942,7 +2031,9 @@ Monocle.Place = function () {
|
|||||||
API.setPlace = setPlace;
|
API.setPlace = setPlace;
|
||||||
API.setPercentageThrough = setPercentageThrough;
|
API.setPercentageThrough = setPercentageThrough;
|
||||||
API.componentId = componentId;
|
API.componentId = componentId;
|
||||||
API.percentageThrough = percentageThrough;
|
API.percentAtTopOfPage = percentAtTopOfPage;
|
||||||
|
API.percentAtBottomOfPage = percentAtBottomOfPage;
|
||||||
|
API.percentageThrough = percentAtBottomOfPage;
|
||||||
API.pageAtPercentageThrough = pageAtPercentageThrough;
|
API.pageAtPercentageThrough = pageAtPercentageThrough;
|
||||||
API.pageNumber = pageNumber;
|
API.pageNumber = pageNumber;
|
||||||
API.chapterInfo = chapterInfo;
|
API.chapterInfo = chapterInfo;
|
||||||
@ -2158,11 +2249,13 @@ Monocle.Component = function (book, id, index, chapters, source) {
|
|||||||
if (p.chapters[0] && typeof p.chapters[0].percent == "number") {
|
if (p.chapters[0] && typeof p.chapters[0].percent == "number") {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
var doc = pageDiv.m.activeFrame.contentDocument;
|
||||||
for (var i = 0; i < p.chapters.length; ++i) {
|
for (var i = 0; i < p.chapters.length; ++i) {
|
||||||
var chp = p.chapters[i];
|
var chp = p.chapters[i];
|
||||||
chp.percent = 0;
|
chp.percent = 0;
|
||||||
if (chp.fragment) {
|
if (chp.fragment) {
|
||||||
chp.percent = pageDiv.m.dimensions.percentageThroughOfId(chp.fragment);
|
var node = doc.getElementById(chp.fragment);
|
||||||
|
chp.percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return p.chapters;
|
return p.chapters;
|
||||||
@ -2187,14 +2280,37 @@ Monocle.Component = function (book, id, index, chapters, source) {
|
|||||||
if (!fragment) {
|
if (!fragment) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
var pc2pn = function (pc) { return Math.floor(pc * p.pageLength) + 1 }
|
|
||||||
for (var i = 0; i < p.chapters.length; ++i) {
|
for (var i = 0; i < p.chapters.length; ++i) {
|
||||||
if (p.chapters[i].fragment == fragment) {
|
if (p.chapters[i].fragment == fragment) {
|
||||||
return pc2pn(p.chapters[i].percent);
|
return percentToPageNumber(p.chapters[i].percent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var percent = pageDiv.m.dimensions.percentageThroughOfId(fragment);
|
var doc = pageDiv.m.activeFrame.contentDocument;
|
||||||
return pc2pn(percent);
|
var node = doc.getElementById(fragment);
|
||||||
|
var percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
|
||||||
|
return percentToPageNumber(percent);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function pageForXPath(xpath, pageDiv) {
|
||||||
|
var doc = pageDiv.m.activeFrame.contentDocument;
|
||||||
|
var percent = 0;
|
||||||
|
if (typeof doc.evaluate == "function") {
|
||||||
|
var node = doc.evaluate(
|
||||||
|
xpath,
|
||||||
|
doc,
|
||||||
|
null,
|
||||||
|
9,
|
||||||
|
null
|
||||||
|
).singleNodeValue;
|
||||||
|
var percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
|
||||||
|
}
|
||||||
|
return percentToPageNumber(percent);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function percentToPageNumber(pc) {
|
||||||
|
return Math.floor(pc * p.pageLength) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2207,6 +2323,7 @@ Monocle.Component = function (book, id, index, chapters, source) {
|
|||||||
API.updateDimensions = updateDimensions;
|
API.updateDimensions = updateDimensions;
|
||||||
API.chapterForPage = chapterForPage;
|
API.chapterForPage = chapterForPage;
|
||||||
API.pageForChapter = pageForChapter;
|
API.pageForChapter = pageForChapter;
|
||||||
|
API.pageForXPath = pageForXPath;
|
||||||
API.lastPageNumber = lastPageNumber;
|
API.lastPageNumber = lastPageNumber;
|
||||||
|
|
||||||
return API;
|
return API;
|
||||||
@ -2415,9 +2532,11 @@ Monocle.Dimensions.Vert = function (pageDiv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function percentageThroughOfId(id) {
|
function percentageThroughOfNode(target) {
|
||||||
|
if (!target) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
var doc = p.page.m.activeFrame.contentDocument;
|
var doc = p.page.m.activeFrame.contentDocument;
|
||||||
var target = doc.getElementById(id);
|
|
||||||
var offset = 0;
|
var offset = 0;
|
||||||
if (target.getBoundingClientRect) {
|
if (target.getBoundingClientRect) {
|
||||||
offset = target.getBoundingClientRect().top;
|
offset = target.getBoundingClientRect().top;
|
||||||
@ -2456,7 +2575,7 @@ Monocle.Dimensions.Vert = function (pageDiv) {
|
|||||||
API.hasChanged = hasChanged;
|
API.hasChanged = hasChanged;
|
||||||
API.measure = measure;
|
API.measure = measure;
|
||||||
API.pages = pages;
|
API.pages = pages;
|
||||||
API.percentageThroughOfId = percentageThroughOfId;
|
API.percentageThroughOfNode = percentageThroughOfNode;
|
||||||
API.locusToOffset = locusToOffset;
|
API.locusToOffset = locusToOffset;
|
||||||
|
|
||||||
initialize();
|
initialize();
|
||||||
@ -2713,8 +2832,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
(!p.measurements) ||
|
(!p.measurements) ||
|
||||||
(p.measurements.width != newMeasurements.width) ||
|
(p.measurements.width != newMeasurements.width) ||
|
||||||
(p.measurements.height != newMeasurements.height) ||
|
(p.measurements.height != newMeasurements.height) ||
|
||||||
(p.measurements.scrollWidth != newMeasurements.scrollWidth) ||
|
(p.measurements.scrollWidth != newMeasurements.scrollWidth)
|
||||||
(p.measurements.fontSize != newMeasurements.fontSize)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2736,10 +2854,16 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
if (!lc || !lc.getBoundingClientRect) {
|
if (!lc || !lc.getBoundingClientRect) {
|
||||||
console.warn('Empty document for page['+p.page.m.pageIndex+']');
|
console.warn('Empty document for page['+p.page.m.pageIndex+']');
|
||||||
p.measurements.scrollWidth = p.measurements.width;
|
p.measurements.scrollWidth = p.measurements.width;
|
||||||
} else if (lc.getBoundingClientRect().bottom > p.measurements.height) {
|
|
||||||
p.measurements.scrollWidth = p.measurements.width * 2;
|
|
||||||
} else {
|
} else {
|
||||||
p.measurements.scrollWidth = p.measurements.width;
|
var bcr = lc.getBoundingClientRect();
|
||||||
|
if (
|
||||||
|
bcr.right > p.measurements.width ||
|
||||||
|
bcr.bottom > p.measurements.height
|
||||||
|
) {
|
||||||
|
p.measurements.scrollWidth = p.measurements.width * 2;
|
||||||
|
} else {
|
||||||
|
p.measurements.scrollWidth = p.measurements.width;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2758,12 +2882,11 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function percentageThroughOfId(id) {
|
function percentageThroughOfNode(target) {
|
||||||
var doc = p.page.m.activeFrame.contentDocument;
|
|
||||||
var target = doc.getElementById(id);
|
|
||||||
if (!target) {
|
if (!target) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
var doc = p.page.m.activeFrame.contentDocument;
|
||||||
var offset = 0;
|
var offset = 0;
|
||||||
if (target.getBoundingClientRect) {
|
if (target.getBoundingClientRect) {
|
||||||
offset = target.getBoundingClientRect().left;
|
offset = target.getBoundingClientRect().left;
|
||||||
@ -2785,20 +2908,30 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
function componentChanged(evt) {
|
function componentChanged(evt) {
|
||||||
if (evt.m['page'] != p.page) { return; }
|
if (evt.m['page'] != p.page) { return; }
|
||||||
var doc = evt.m['document'];
|
var doc = evt.m['document'];
|
||||||
Monocle.Styles.applyRules(doc.body, k.BODY_STYLES);
|
if (Monocle.Browser.has.columnOverflowPaintBug) {
|
||||||
|
var div = doc.createElement('div');
|
||||||
|
Monocle.Styles.applyRules(div, k.BODY_STYLES);
|
||||||
|
div.style.cssText += "overflow: scroll !important;";
|
||||||
|
while (doc.body.childNodes.length) {
|
||||||
|
div.appendChild(doc.body.firstChild);
|
||||||
|
}
|
||||||
|
doc.body.appendChild(div);
|
||||||
|
} else {
|
||||||
|
Monocle.Styles.applyRules(doc.body, k.BODY_STYLES);
|
||||||
|
|
||||||
if (Monocle.Browser.is.WebKit) {
|
if (Monocle.Browser.is.WebKit) {
|
||||||
doc.documentElement.style.overflow = 'hidden';
|
doc.documentElement.style.overflow = 'hidden';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p.dirty = true;
|
p.dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function setColumnWidth() {
|
function setColumnWidth() {
|
||||||
var cw = p.page.m.sheafDiv.clientWidth;
|
var cw = p.page.m.sheafDiv.clientWidth;
|
||||||
var doc = p.page.m.activeFrame.contentDocument;
|
|
||||||
if (currBodyStyleValue('column-width') != cw+"px") {
|
if (currBodyStyleValue('column-width') != cw+"px") {
|
||||||
Monocle.Styles.affix(doc.body, 'column-width', cw+"px");
|
Monocle.Styles.affix(columnedElement(), 'column-width', cw+"px");
|
||||||
p.dirty = true;
|
p.dirty = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2809,8 +2942,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
return {
|
return {
|
||||||
width: sheaf.clientWidth,
|
width: sheaf.clientWidth,
|
||||||
height: sheaf.clientHeight,
|
height: sheaf.clientHeight,
|
||||||
scrollWidth: scrollerWidth(),
|
scrollWidth: scrollerWidth()
|
||||||
fontSize: currBodyStyleValue('font-size')
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2819,16 +2951,24 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
if (Monocle.Browser.has.mustScrollSheaf) {
|
if (Monocle.Browser.has.mustScrollSheaf) {
|
||||||
return p.page.m.sheafDiv;
|
return p.page.m.sheafDiv;
|
||||||
} else {
|
} else {
|
||||||
return p.page.m.activeFrame.contentDocument.body;
|
return columnedElement();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function columnedElement() {
|
||||||
|
var elem = p.page.m.activeFrame.contentDocument.body;
|
||||||
|
return Monocle.Browser.has.columnOverflowPaintBug ? elem.firstChild : elem;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function scrollerWidth() {
|
function scrollerWidth() {
|
||||||
var bdy = p.page.m.activeFrame.contentDocument.body;
|
var bdy = p.page.m.activeFrame.contentDocument.body;
|
||||||
if (Monocle.Browser.has.iframeDoubleWidthBug) {
|
if (Monocle.Browser.has.iframeDoubleWidthBug) {
|
||||||
if (Monocle.Browser.on.Android) {
|
if (Monocle.Browser.on.Kindle3) {
|
||||||
return bdy.scrollWidth * 1.5; // I actually have no idea why 1.5.
|
return scrollerElement().scrollWidth;
|
||||||
|
} else if (Monocle.Browser.on.Android) {
|
||||||
|
return bdy.scrollWidth;
|
||||||
} else if (Monocle.Browser.iOSVersion < "4.1") {
|
} else if (Monocle.Browser.iOSVersion < "4.1") {
|
||||||
var hbw = bdy.scrollWidth / 2;
|
var hbw = bdy.scrollWidth / 2;
|
||||||
var sew = scrollerElement().scrollWidth;
|
var sew = scrollerElement().scrollWidth;
|
||||||
@ -2838,15 +2978,18 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
var hbw = bdy.scrollWidth / 2;
|
var hbw = bdy.scrollWidth / 2;
|
||||||
return hbw;
|
return hbw;
|
||||||
}
|
}
|
||||||
} else if (Monocle.Browser.is.Gecko) {
|
} else if (bdy.getBoundingClientRect) {
|
||||||
var lc = bdy.lastChild;
|
var elems = bdy.getElementsByTagName('*');
|
||||||
while (lc && lc.nodeType != 1) {
|
var bdyRect = bdy.getBoundingClientRect();
|
||||||
lc = lc.previousSibling;
|
var l = bdyRect.left, r = bdyRect.right;
|
||||||
}
|
for (var i = elems.length - 1; i >= 0; --i) {
|
||||||
if (lc && lc.getBoundingClientRect) {
|
var rect = elems[i].getBoundingClientRect();
|
||||||
return lc.getBoundingClientRect().right;
|
l = Math.min(l, rect.left);
|
||||||
|
r = Math.max(r, rect.right);
|
||||||
}
|
}
|
||||||
|
return Math.abs(l) + Math.abs(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
return scrollerElement().scrollWidth;
|
return scrollerElement().scrollWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2867,8 +3010,14 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
|
|
||||||
function translateToLocus(locus) {
|
function translateToLocus(locus) {
|
||||||
var offset = locusToOffset(locus);
|
var offset = locusToOffset(locus);
|
||||||
var bdy = p.page.m.activeFrame.contentDocument.body;
|
p.page.m.offset = 0 - offset;
|
||||||
Monocle.Styles.affix(bdy, "transform", "translateX("+offset+"px)");
|
if (k.SETX && !Monocle.Browser.has.columnOverflowPaintBug) {
|
||||||
|
var bdy = p.page.m.activeFrame.contentDocument.body;
|
||||||
|
Monocle.Styles.affix(bdy, "transform", "translateX("+offset+"px)");
|
||||||
|
} else {
|
||||||
|
var scrElem = scrollerElement();
|
||||||
|
scrElem.scrollLeft = 0 - offset;
|
||||||
|
}
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2876,7 +3025,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
|||||||
API.hasChanged = hasChanged;
|
API.hasChanged = hasChanged;
|
||||||
API.measure = measure;
|
API.measure = measure;
|
||||||
API.pages = pages;
|
API.pages = pages;
|
||||||
API.percentageThroughOfId = percentageThroughOfId;
|
API.percentageThroughOfNode = percentageThroughOfNode;
|
||||||
|
|
||||||
API.locusToOffset = locusToOffset;
|
API.locusToOffset = locusToOffset;
|
||||||
API.translateToLocus = translateToLocus;
|
API.translateToLocus = translateToLocus;
|
||||||
@ -2898,6 +3047,8 @@ Monocle.Dimensions.Columns.BODY_STYLES = {
|
|||||||
"column-fill": "auto"
|
"column-fill": "auto"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Monocle.Dimensions.Columns.SETX = true; // Set to false for scrollLeft.
|
||||||
|
|
||||||
if (Monocle.Browser.has.iframeDoubleWidthBug) {
|
if (Monocle.Browser.has.iframeDoubleWidthBug) {
|
||||||
Monocle.Dimensions.Columns.BODY_STYLES["min-width"] = "200%";
|
Monocle.Dimensions.Columns.BODY_STYLES["min-width"] = "200%";
|
||||||
} else {
|
} else {
|
||||||
@ -2924,6 +3075,8 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
function addPage(pageDiv) {
|
function addPage(pageDiv) {
|
||||||
pageDiv.m.dimensions = new Monocle.Dimensions.Columns(pageDiv);
|
pageDiv.m.dimensions = new Monocle.Dimensions.Columns(pageDiv);
|
||||||
|
|
||||||
|
Monocle.Styles.setX(pageDiv, "0px");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2963,6 +3116,7 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
|
|
||||||
function interactiveMode(bState) {
|
function interactiveMode(bState) {
|
||||||
|
p.reader.dispatchEvent('monocle:interactive:'+(bState ? 'on' : 'off'));
|
||||||
if (!Monocle.Browser.has.selectThruBug) {
|
if (!Monocle.Browser.has.selectThruBug) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -2994,10 +3148,10 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
function moveTo(locus, callback) {
|
function moveTo(locus, callback) {
|
||||||
var fn = function () {
|
var fn = function () {
|
||||||
prepareNextPage(announceTurn);
|
prepareNextPage(function () {
|
||||||
if (typeof callback == "function") {
|
if (typeof callback == "function") { callback(); }
|
||||||
callback();
|
announceTurn();
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
setPage(upperPage(), locus, fn);
|
setPage(upperPage(), locus, fn);
|
||||||
}
|
}
|
||||||
@ -3045,12 +3199,26 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
if (dir == k.FORWARDS) {
|
if (dir == k.FORWARDS) {
|
||||||
if (getPlace().onLastPageOfBook()) {
|
if (getPlace().onLastPageOfBook()) {
|
||||||
|
p.reader.dispatchEvent(
|
||||||
|
'monocle:boundaryend',
|
||||||
|
{
|
||||||
|
locus: getPlace().getLocus({ direction : dir }),
|
||||||
|
page: upperPage()
|
||||||
|
}
|
||||||
|
);
|
||||||
resetTurnData();
|
resetTurnData();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
onGoingForward(boxPointX);
|
onGoingForward(boxPointX);
|
||||||
} else if (dir == k.BACKWARDS) {
|
} else if (dir == k.BACKWARDS) {
|
||||||
if (getPlace().onFirstPageOfBook()) {
|
if (getPlace().onFirstPageOfBook()) {
|
||||||
|
p.reader.dispatchEvent(
|
||||||
|
'monocle:boundarystart',
|
||||||
|
{
|
||||||
|
locus: getPlace().getLocus({ direction : dir }),
|
||||||
|
page: upperPage()
|
||||||
|
}
|
||||||
|
);
|
||||||
resetTurnData();
|
resetTurnData();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -3215,14 +3383,14 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
|
|
||||||
function announceTurn() {
|
function announceTurn() {
|
||||||
hideWaitControl(upperPage());
|
|
||||||
hideWaitControl(lowerPage());
|
|
||||||
p.reader.dispatchEvent('monocle:turn');
|
p.reader.dispatchEvent('monocle:turn');
|
||||||
resetTurnData();
|
resetTurnData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function resetTurnData() {
|
function resetTurnData() {
|
||||||
|
hideWaitControl(upperPage());
|
||||||
|
hideWaitControl(lowerPage());
|
||||||
p.turnData = {};
|
p.turnData = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3268,7 +3436,7 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
(new Date()).getTime() - stamp > duration ||
|
(new Date()).getTime() - stamp > duration ||
|
||||||
Math.abs(currX - finalX) <= Math.abs((currX + step) - finalX)
|
Math.abs(currX - finalX) <= Math.abs((currX + step) - finalX)
|
||||||
) {
|
) {
|
||||||
clearTimeout(elem.setXTransitionInterval)
|
clearTimeout(elem.setXTransitionInterval);
|
||||||
Monocle.Styles.setX(elem, finalX);
|
Monocle.Styles.setX(elem, finalX);
|
||||||
if (elem.setXTCB) {
|
if (elem.setXTCB) {
|
||||||
elem.setXTCB();
|
elem.setXTCB();
|
||||||
@ -3366,13 +3534,17 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
function jumpIn(pageDiv, callback) {
|
function jumpIn(pageDiv, callback) {
|
||||||
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
|
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
|
||||||
setX(pageDiv, 0, { duration: dur }, callback);
|
Monocle.defer(function () {
|
||||||
|
setX(pageDiv, 0, { duration: dur }, callback);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function jumpOut(pageDiv, callback) {
|
function jumpOut(pageDiv, callback) {
|
||||||
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
|
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
|
||||||
setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: dur }, callback);
|
Monocle.defer(function () {
|
||||||
|
setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: dur }, callback);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3382,7 +3554,9 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
duration: k.durations.SLIDE,
|
duration: k.durations.SLIDE,
|
||||||
timing: 'ease-in'
|
timing: 'ease-in'
|
||||||
};
|
};
|
||||||
setX(upperPage(), 0, slideOpts, callback);
|
Monocle.defer(function () {
|
||||||
|
setX(upperPage(), 0, slideOpts, callback);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3391,7 +3565,9 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
duration: k.durations.SLIDE,
|
duration: k.durations.SLIDE,
|
||||||
timing: 'ease-in'
|
timing: 'ease-in'
|
||||||
};
|
};
|
||||||
setX(upperPage(), 0 - upperPage().offsetWidth, slideOpts, callback);
|
Monocle.defer(function () {
|
||||||
|
setX(upperPage(), 0 - upperPage().offsetWidth, slideOpts, callback);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3418,13 +3594,13 @@ Monocle.Flippers.Slider = function (reader) {
|
|||||||
|
|
||||||
function showWaitControl(page) {
|
function showWaitControl(page) {
|
||||||
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
|
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
|
||||||
ctrl.style.opacity = 0.5;
|
ctrl.style.visibility = "visible";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function hideWaitControl(page) {
|
function hideWaitControl(page) {
|
||||||
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
|
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
|
||||||
ctrl.style.opacity = 0;
|
ctrl.style.visibility = "hidden";
|
||||||
}
|
}
|
||||||
|
|
||||||
API.pageCount = p.pageCount;
|
API.pageCount = p.pageCount;
|
||||||
|
371
resources/content_server/stacktrace.js
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
// Domain Public by Eric Wendelin http://eriwen.com/ (2008)
|
||||||
|
// Luke Smith http://lucassmith.name/ (2008)
|
||||||
|
// Loic Dachary <loic@dachary.org> (2008)
|
||||||
|
// Johan Euphrosine <proppy@aminche.com> (2008)
|
||||||
|
// Oyvind Sean Kinsey http://kinsey.no/blog (2010)
|
||||||
|
// Victor Homyakov <victor-homyakov@users.sourceforge.net> (2010)
|
||||||
|
//
|
||||||
|
// Information and discussions
|
||||||
|
// http://jspoker.pokersource.info/skin/test-printstacktrace.html
|
||||||
|
// http://eriwen.com/javascript/js-stack-trace/
|
||||||
|
// http://eriwen.com/javascript/stacktrace-update/
|
||||||
|
// http://pastie.org/253058
|
||||||
|
//
|
||||||
|
// guessFunctionNameFromLines comes from firebug
|
||||||
|
//
|
||||||
|
// Software License Agreement (BSD License)
|
||||||
|
//
|
||||||
|
// Copyright (c) 2007, Parakey Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
//
|
||||||
|
// Redistribution and use of this software in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above
|
||||||
|
// copyright notice, this list of conditions and the
|
||||||
|
// following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistributions in binary form must reproduce the above
|
||||||
|
// copyright notice, this list of conditions and the
|
||||||
|
// following disclaimer in the documentation and/or other
|
||||||
|
// materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * Neither the name of Parakey Inc. nor the names of its
|
||||||
|
// contributors may be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior
|
||||||
|
// written permission of Parakey Inc.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||||
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||||
|
// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main function giving a function stack trace with a forced or passed in Error
|
||||||
|
*
|
||||||
|
* @cfg {Error} e The error to create a stacktrace from (optional)
|
||||||
|
* @cfg {Boolean} guess If we should try to resolve the names of anonymous functions
|
||||||
|
* @return {Array} of Strings with functions, lines, files, and arguments where possible
|
||||||
|
*/
|
||||||
|
function printStackTrace(options) {
|
||||||
|
options = options || {guess: true};
|
||||||
|
var ex = options.e || null, guess = !!options.guess;
|
||||||
|
var p = new printStackTrace.implementation(), result = p.run(ex);
|
||||||
|
return (guess) ? p.guessAnonymousFunctions(result) : result;
|
||||||
|
}
|
||||||
|
|
||||||
|
printStackTrace.implementation = function() {
|
||||||
|
};
|
||||||
|
|
||||||
|
printStackTrace.implementation.prototype = {
|
||||||
|
run: function(ex) {
|
||||||
|
ex = ex || this.createException();
|
||||||
|
// Do not use the stored mode: different exceptions in Chrome
|
||||||
|
// may or may not have arguments or stack
|
||||||
|
var mode = this.mode(ex);
|
||||||
|
// Use either the stored mode, or resolve it
|
||||||
|
//var mode = this._mode || this.mode(ex);
|
||||||
|
if (mode === 'other') {
|
||||||
|
return this.other(arguments.callee);
|
||||||
|
} else {
|
||||||
|
return this[mode](ex);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
createException: function() {
|
||||||
|
try {
|
||||||
|
this.undef();
|
||||||
|
return null;
|
||||||
|
} catch (e) {
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return {String} mode of operation for the environment in question.
|
||||||
|
*/
|
||||||
|
mode: function(e) {
|
||||||
|
if (e['arguments'] && e.stack) {
|
||||||
|
return (this._mode = 'chrome');
|
||||||
|
} else if (e.message && typeof window !== 'undefined' && window.opera) {
|
||||||
|
return (this._mode = e.stacktrace ? 'opera10' : 'opera');
|
||||||
|
} else if (e.stack) {
|
||||||
|
return (this._mode = 'firefox');
|
||||||
|
}
|
||||||
|
return (this._mode = 'other');
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a context, function name, and callback function, overwrite it so that it calls
|
||||||
|
* printStackTrace() first with a callback and then runs the rest of the body.
|
||||||
|
*
|
||||||
|
* @param {Object} context of execution (e.g. window)
|
||||||
|
* @param {String} functionName to instrument
|
||||||
|
* @param {Function} function to call with a stack trace on invocation
|
||||||
|
*/
|
||||||
|
instrumentFunction: function(context, functionName, callback) {
|
||||||
|
context = context || window;
|
||||||
|
var original = context[functionName];
|
||||||
|
context[functionName] = function instrumented() {
|
||||||
|
callback.call(this, printStackTrace().slice(4));
|
||||||
|
return context[functionName]._instrumented.apply(this, arguments);
|
||||||
|
};
|
||||||
|
context[functionName]._instrumented = original;
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a context and function name of a function that has been
|
||||||
|
* instrumented, revert the function to it's original (non-instrumented)
|
||||||
|
* state.
|
||||||
|
*
|
||||||
|
* @param {Object} context of execution (e.g. window)
|
||||||
|
* @param {String} functionName to de-instrument
|
||||||
|
*/
|
||||||
|
deinstrumentFunction: function(context, functionName) {
|
||||||
|
if (context[functionName].constructor === Function &&
|
||||||
|
context[functionName]._instrumented &&
|
||||||
|
context[functionName]._instrumented.constructor === Function) {
|
||||||
|
context[functionName] = context[functionName]._instrumented;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given an Error object, return a formatted Array based on Chrome's stack string.
|
||||||
|
*
|
||||||
|
* @param e - Error object to inspect
|
||||||
|
* @return Array<String> of function calls, files and line numbers
|
||||||
|
*/
|
||||||
|
chrome: function(e) {
|
||||||
|
//return e.stack.replace(/^[^\(]+?[\n$]/gm, '').replace(/^\s+at\s+/gm, '').replace(/^Object.<anonymous>\s*\(/gm, '{anonymous}()@').split('\n');
|
||||||
|
return e.stack.replace(/^\S[^\(]+?[\n$]/gm, '').
|
||||||
|
replace(/^\s+at\s+/gm, '').
|
||||||
|
replace(/^([^\(]+?)([\n$])/gm, '{anonymous}()@$1$2').
|
||||||
|
replace(/^Object.<anonymous>\s*\(([^\)]+)\)/gm, '{anonymous}()@$1').split('\n');
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given an Error object, return a formatted Array based on Firefox's stack string.
|
||||||
|
*
|
||||||
|
* @param e - Error object to inspect
|
||||||
|
* @return Array<String> of function calls, files and line numbers
|
||||||
|
*/
|
||||||
|
firefox: function(e) {
|
||||||
|
return e.stack.replace(/(?:\n@:0)?\s+$/m, '').replace(/^\(/gm, '{anonymous}(').split('\n');
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given an Error object, return a formatted Array based on Opera 10's stacktrace string.
|
||||||
|
*
|
||||||
|
* @param e - Error object to inspect
|
||||||
|
* @return Array<String> of function calls, files and line numbers
|
||||||
|
*/
|
||||||
|
opera10: function(e) {
|
||||||
|
var stack = e.stacktrace;
|
||||||
|
var lines = stack.split('\n'), ANON = '{anonymous}', lineRE = /.*line (\d+), column (\d+) in ((<anonymous function\:?\s*(\S+))|([^\(]+)\([^\)]*\))(?: in )?(.*)\s*$/i, i, j, len;
|
||||||
|
for (i = 2, j = 0, len = lines.length; i < len - 2; i++) {
|
||||||
|
if (lineRE.test(lines[i])) {
|
||||||
|
var location = RegExp.$6 + ':' + RegExp.$1 + ':' + RegExp.$2;
|
||||||
|
var fnName = RegExp.$3;
|
||||||
|
fnName = fnName.replace(/<anonymous function\:?\s?(\S+)?>/g, ANON);
|
||||||
|
lines[j++] = fnName + '@' + location;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.splice(j, lines.length - j);
|
||||||
|
return lines;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Opera 7.x-9.x only!
|
||||||
|
opera: function(e) {
|
||||||
|
var lines = e.message.split('\n'), ANON = '{anonymous}', lineRE = /Line\s+(\d+).*script\s+(http\S+)(?:.*in\s+function\s+(\S+))?/i, i, j, len;
|
||||||
|
|
||||||
|
for (i = 4, j = 0, len = lines.length; i < len; i += 2) {
|
||||||
|
//TODO: RegExp.exec() would probably be cleaner here
|
||||||
|
if (lineRE.test(lines[i])) {
|
||||||
|
lines[j++] = (RegExp.$3 ? RegExp.$3 + '()@' + RegExp.$2 + RegExp.$1 : ANON + '()@' + RegExp.$2 + ':' + RegExp.$1) + ' -- ' + lines[i + 1].replace(/^\s+/, '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.splice(j, lines.length - j);
|
||||||
|
return lines;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Safari, IE, and others
|
||||||
|
other: function(curr) {
|
||||||
|
var ANON = '{anonymous}', fnRE = /function\s*([\w\-$]+)?\s*\(/i, stack = [], fn, args, maxStackSize = 10;
|
||||||
|
while (curr && stack.length < maxStackSize) {
|
||||||
|
fn = fnRE.test(curr.toString()) ? RegExp.$1 || ANON : ANON;
|
||||||
|
args = Array.prototype.slice.call(curr['arguments'] || []);
|
||||||
|
stack[stack.length] = fn + '(' + this.stringifyArguments(args) + ')';
|
||||||
|
curr = curr.caller;
|
||||||
|
}
|
||||||
|
return stack;
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given arguments array as a String, subsituting type names for non-string types.
|
||||||
|
*
|
||||||
|
* @param {Arguments} object
|
||||||
|
* @return {Array} of Strings with stringified arguments
|
||||||
|
*/
|
||||||
|
stringifyArguments: function(args) {
|
||||||
|
var slice = Array.prototype.slice;
|
||||||
|
for (var i = 0; i < args.length; ++i) {
|
||||||
|
var arg = args[i];
|
||||||
|
if (arg === undefined) {
|
||||||
|
args[i] = 'undefined';
|
||||||
|
} else if (arg === null) {
|
||||||
|
args[i] = 'null';
|
||||||
|
} else if (arg.constructor) {
|
||||||
|
if (arg.constructor === Array) {
|
||||||
|
if (arg.length < 3) {
|
||||||
|
args[i] = '[' + this.stringifyArguments(arg) + ']';
|
||||||
|
} else {
|
||||||
|
args[i] = '[' + this.stringifyArguments(slice.call(arg, 0, 1)) + '...' + this.stringifyArguments(slice.call(arg, -1)) + ']';
|
||||||
|
}
|
||||||
|
} else if (arg.constructor === Object) {
|
||||||
|
args[i] = '#object';
|
||||||
|
} else if (arg.constructor === Function) {
|
||||||
|
args[i] = '#function';
|
||||||
|
} else if (arg.constructor === String) {
|
||||||
|
args[i] = '"' + arg + '"';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return args.join(',');
|
||||||
|
},
|
||||||
|
|
||||||
|
sourceCache: {},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the text from a given URL.
|
||||||
|
*/
|
||||||
|
ajax: function(url) {
|
||||||
|
var req = this.createXMLHTTPObject();
|
||||||
|
if (!req) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
req.open('GET', url, false);
|
||||||
|
req.setRequestHeader('User-Agent', 'XMLHTTP/1.0');
|
||||||
|
req.send('');
|
||||||
|
return req.responseText;
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try XHR methods in order and store XHR factory.
|
||||||
|
*
|
||||||
|
* @return <Function> XHR function or equivalent
|
||||||
|
*/
|
||||||
|
createXMLHTTPObject: function() {
|
||||||
|
var xmlhttp, XMLHttpFactories = [
|
||||||
|
function() {
|
||||||
|
return new XMLHttpRequest();
|
||||||
|
}, function() {
|
||||||
|
return new ActiveXObject('Msxml2.XMLHTTP');
|
||||||
|
}, function() {
|
||||||
|
return new ActiveXObject('Msxml3.XMLHTTP');
|
||||||
|
}, function() {
|
||||||
|
return new ActiveXObject('Microsoft.XMLHTTP');
|
||||||
|
}
|
||||||
|
];
|
||||||
|
for (var i = 0; i < XMLHttpFactories.length; i++) {
|
||||||
|
try {
|
||||||
|
xmlhttp = XMLHttpFactories[i]();
|
||||||
|
// Use memoization to cache the factory
|
||||||
|
this.createXMLHTTPObject = XMLHttpFactories[i];
|
||||||
|
return xmlhttp;
|
||||||
|
} catch (e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a URL, check if it is in the same domain (so we can get the source
|
||||||
|
* via Ajax).
|
||||||
|
*
|
||||||
|
* @param url <String> source url
|
||||||
|
* @return False if we need a cross-domain request
|
||||||
|
*/
|
||||||
|
isSameDomain: function(url) {
|
||||||
|
return url.indexOf(location.hostname) !== -1;
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get source code from given URL if in the same domain.
|
||||||
|
*
|
||||||
|
* @param url <String> JS source URL
|
||||||
|
* @return <Array> Array of source code lines
|
||||||
|
*/
|
||||||
|
getSource: function(url) {
|
||||||
|
if (!(url in this.sourceCache)) {
|
||||||
|
this.sourceCache[url] = this.ajax(url).split('\n');
|
||||||
|
}
|
||||||
|
return this.sourceCache[url];
|
||||||
|
},
|
||||||
|
|
||||||
|
guessAnonymousFunctions: function(stack) {
|
||||||
|
for (var i = 0; i < stack.length; ++i) {
|
||||||
|
var reStack = /\{anonymous\}\(.*\)@(\w+:\/\/([\-\w\.]+)+(:\d+)?[^:]+):(\d+):?(\d+)?/;
|
||||||
|
var frame = stack[i], m = reStack.exec(frame);
|
||||||
|
if (m) {
|
||||||
|
var file = m[1], lineno = m[4], charno = m[7] || 0; //m[7] is character position in Chrome
|
||||||
|
if (file && this.isSameDomain(file) && lineno) {
|
||||||
|
var functionName = this.guessAnonymousFunction(file, lineno, charno);
|
||||||
|
stack[i] = frame.replace('{anonymous}', functionName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stack;
|
||||||
|
},
|
||||||
|
|
||||||
|
guessAnonymousFunction: function(url, lineNo, charNo) {
|
||||||
|
var ret;
|
||||||
|
try {
|
||||||
|
ret = this.findFunctionName(this.getSource(url), lineNo);
|
||||||
|
} catch (e) {
|
||||||
|
ret = 'getSource failed with url: ' + url + ', exception: ' + e.toString();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
},
|
||||||
|
|
||||||
|
findFunctionName: function(source, lineNo) {
|
||||||
|
// FIXME findFunctionName fails for compressed source
|
||||||
|
// (more than one function on the same line)
|
||||||
|
// TODO use captured args
|
||||||
|
// function {name}({args}) m[1]=name m[2]=args
|
||||||
|
var reFunctionDeclaration = /function\s+([^(]*?)\s*\(([^)]*)\)/;
|
||||||
|
// {name} = function ({args}) TODO args capture
|
||||||
|
// /['"]?([0-9A-Za-z_]+)['"]?\s*[:=]\s*function(?:[^(]*)/
|
||||||
|
var reFunctionExpression = /['"]?([0-9A-Za-z_]+)['"]?\s*[:=]\s*function\b/;
|
||||||
|
// {name} = eval()
|
||||||
|
var reFunctionEvaluation = /['"]?([0-9A-Za-z_]+)['"]?\s*[:=]\s*(?:eval|new Function)\b/;
|
||||||
|
// Walk backwards in the source lines until we find
|
||||||
|
// the line which matches one of the patterns above
|
||||||
|
var code = "", line, maxLines = 10, m;
|
||||||
|
for (var i = 0; i < maxLines; ++i) {
|
||||||
|
// FIXME lineNo is 1-based, source[] is 0-based
|
||||||
|
line = source[lineNo - i];
|
||||||
|
if (line) {
|
||||||
|
code = line + code;
|
||||||
|
m = reFunctionExpression.exec(code);
|
||||||
|
if (m && m[1]) {
|
||||||
|
return m[1];
|
||||||
|
}
|
||||||
|
m = reFunctionDeclaration.exec(code);
|
||||||
|
if (m && m[1]) {
|
||||||
|
//return m[1] + "(" + (m[2] || "") + ")";
|
||||||
|
return m[1];
|
||||||
|
}
|
||||||
|
m = reFunctionEvaluation.exec(code);
|
||||||
|
if (m && m[1]) {
|
||||||
|
return m[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return '(?)';
|
||||||
|
}
|
||||||
|
};
|
@ -37,7 +37,6 @@ series_index_auto_increment = 'next'
|
|||||||
# Can be either True or False
|
# Can be either True or False
|
||||||
authors_completer_append_separator = False
|
authors_completer_append_separator = False
|
||||||
|
|
||||||
|
|
||||||
#: Author sort name algorithm
|
#: Author sort name algorithm
|
||||||
# The algorithm used to copy author to author_sort
|
# The algorithm used to copy author to author_sort
|
||||||
# Possible values are:
|
# Possible values are:
|
||||||
@ -71,6 +70,15 @@ author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
|||||||
# categories_use_field_for_author_name = 'author_sort'
|
# categories_use_field_for_author_name = 'author_sort'
|
||||||
categories_use_field_for_author_name = 'author'
|
categories_use_field_for_author_name = 'author'
|
||||||
|
|
||||||
|
#: Completion sort order: choose when to change from lexicographic to ASCII-like
|
||||||
|
# Calibre normally uses locale-dependent lexicographic ordering when showing
|
||||||
|
# completion values. This means that the sort order is correct for the user's
|
||||||
|
# language. However, this can be slow. Performance is improved by switching to
|
||||||
|
# ascii ordering. This tweak controls when that switch happens. Set it to zero
|
||||||
|
# to always use ascii ordering. Set it to something larger than zero to switch
|
||||||
|
# to ascii ordering for performance reasons.
|
||||||
|
completion_change_to_ascii_sorting = 2500
|
||||||
|
|
||||||
#: Control partitioning of Tag Browser
|
#: Control partitioning of Tag Browser
|
||||||
# When partitioning the tags browser, the format of the subcategory label is
|
# When partitioning the tags browser, the format of the subcategory label is
|
||||||
# controlled by a template: categories_collapsed_name_template if sorting by
|
# controlled by a template: categories_collapsed_name_template if sorting by
|
||||||
@ -93,7 +101,6 @@ categories_collapsed_name_template = r'{first.sort:shorten(4,,0)} - {last.sort:s
|
|||||||
categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
|
categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
|
||||||
categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
|
categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
|
||||||
|
|
||||||
|
|
||||||
#: Specify columns to sort the booklist by on startup
|
#: Specify columns to sort the booklist by on startup
|
||||||
# Provide a set of columns to be sorted on when calibre starts
|
# Provide a set of columns to be sorted on when calibre starts
|
||||||
# The argument is None if saved sort history is to be used
|
# The argument is None if saved sort history is to be used
|
||||||
@ -244,17 +251,14 @@ sony_collection_name_template='{value}{category:| (|)}'
|
|||||||
# Default: empty (no rules), so no collection attributes are named.
|
# Default: empty (no rules), so no collection attributes are named.
|
||||||
sony_collection_sorting_rules = []
|
sony_collection_sorting_rules = []
|
||||||
|
|
||||||
|
|
||||||
#: Control how tags are applied when copying books to another library
|
#: Control how tags are applied when copying books to another library
|
||||||
# Set this to True to ensure that tags in 'Tags to add when adding
|
# Set this to True to ensure that tags in 'Tags to add when adding
|
||||||
# a book' are added when copying books to another library
|
# a book' are added when copying books to another library
|
||||||
add_new_book_tags_when_importing_books = False
|
add_new_book_tags_when_importing_books = False
|
||||||
|
|
||||||
|
|
||||||
#: Set the maximum number of tags to show per book in the content server
|
#: Set the maximum number of tags to show per book in the content server
|
||||||
max_content_server_tags_shown=5
|
max_content_server_tags_shown=5
|
||||||
|
|
||||||
|
|
||||||
#: Set custom metadata fields that the content server will or will not display.
|
#: Set custom metadata fields that the content server will or will not display.
|
||||||
# content_server_will_display is a list of custom fields to be displayed.
|
# content_server_will_display is a list of custom fields to be displayed.
|
||||||
# content_server_wont_display is a list of custom fields not to be displayed.
|
# content_server_wont_display is a list of custom fields not to be displayed.
|
||||||
@ -288,14 +292,17 @@ maximum_resort_levels = 5
|
|||||||
generate_cover_title_font = None
|
generate_cover_title_font = None
|
||||||
generate_cover_foot_font = None
|
generate_cover_foot_font = None
|
||||||
|
|
||||||
#: Control behavior of double clicks on the book list
|
#: Control behavior of the book list
|
||||||
# Behavior of doubleclick on the books list. Choices: open_viewer, do_nothing,
|
# You can control the behavior of doubleclicks on the books list.
|
||||||
|
# Choices: open_viewer, do_nothing,
|
||||||
# edit_cell, edit_metadata. Selecting edit_metadata has the side effect of
|
# edit_cell, edit_metadata. Selecting edit_metadata has the side effect of
|
||||||
# disabling editing a field using a single click.
|
# disabling editing a field using a single click.
|
||||||
# Default: open_viewer.
|
# Default: open_viewer.
|
||||||
# Example: doubleclick_on_library_view = 'do_nothing'
|
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||||
|
# You can also control whether the book list scrolls horizontal per column or
|
||||||
|
# per pixel. Default is per column.
|
||||||
doubleclick_on_library_view = 'open_viewer'
|
doubleclick_on_library_view = 'open_viewer'
|
||||||
|
horizontal_scrolling_per_column = True
|
||||||
|
|
||||||
#: Language to use when sorting.
|
#: Language to use when sorting.
|
||||||
# Setting this tweak will force sorting to use the
|
# Setting this tweak will force sorting to use the
|
||||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
BIN
resources/images/plugins/mobileread.png
Normal file
After Width: | Height: | Size: 641 B |
BIN
resources/images/plugins/plugin_deprecated.png
Normal file
After Width: | Height: | Size: 9.7 KiB |
BIN
resources/images/plugins/plugin_disabled_invalid.png
Normal file
After Width: | Height: | Size: 12 KiB |
BIN
resources/images/plugins/plugin_disabled_ok.png
Normal file
After Width: | Height: | Size: 7.6 KiB |
BIN
resources/images/plugins/plugin_disabled_valid.png
Normal file
After Width: | Height: | Size: 10 KiB |
BIN
resources/images/plugins/plugin_new.png
Normal file
After Width: | Height: | Size: 8.7 KiB |
BIN
resources/images/plugins/plugin_new_invalid.png
Normal file
After Width: | Height: | Size: 13 KiB |
BIN
resources/images/plugins/plugin_new_valid.png
Normal file
After Width: | Height: | Size: 11 KiB |
BIN
resources/images/plugins/plugin_updater.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
resources/images/plugins/plugin_updater_updates.png
Normal file
After Width: | Height: | Size: 15 KiB |
BIN
resources/images/plugins/plugin_upgrade_invalid.png
Normal file
After Width: | Height: | Size: 15 KiB |
BIN
resources/images/plugins/plugin_upgrade_ok.png
Normal file
After Width: | Height: | Size: 12 KiB |