merge from trunk
448
Changelog.yaml
@ -4,6 +4,453 @@
|
||||
# for important features/bug fixes.
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
- version: 0.7.28
|
||||
date: 2010-11-12
|
||||
|
||||
new features:
|
||||
- title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
|
||||
|
||||
- title: "Driver for Nook Color, Eken M001"
|
||||
|
||||
- title: "Add a tweak to turn off double clicking to open viewer"
|
||||
|
||||
- title: "Catalog generation: Add indication when a book has no formats"
|
||||
tickets: [7376]
|
||||
|
||||
- title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
|
||||
|
||||
- title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
|
||||
|
||||
bug fixes:
|
||||
- title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
|
||||
tickets: [7488]
|
||||
|
||||
- title: "Bulk convert dialog: Hide useless restore defaults button."
|
||||
tickets: [7471]
|
||||
|
||||
- title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
|
||||
tickets: [7355]
|
||||
|
||||
- title: "Fix some SONY readers not being detected on windows"
|
||||
tickets: [7413]
|
||||
|
||||
- title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
|
||||
tickets: [7455]
|
||||
|
||||
- title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
|
||||
tickets: [7506]
|
||||
|
||||
- title: "Sony driver: Ignore invalid strings when updating XML database"
|
||||
|
||||
- title: "Content Server: Add day to displayed date in /mobile book listing"
|
||||
|
||||
- title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
|
||||
tickets: [7481]
|
||||
|
||||
- title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
|
||||
tickets: [7472]
|
||||
|
||||
- title: "Fix not enough vertical space for text in the preferences dialog category listing"
|
||||
|
||||
- title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
|
||||
|
||||
- title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
|
||||
|
||||
- title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
|
||||
|
||||
- title: "Fix bug in regex used to extract charset from <meta> tags"
|
||||
|
||||
- title: "MOBI Output: Add support for the <q> tag"
|
||||
|
||||
improved recipes:
|
||||
- Zeit Online
|
||||
- Gamespot Review
|
||||
- Ploitika
|
||||
- Pagina12
|
||||
- Irish Times
|
||||
- elektrolese
|
||||
|
||||
new recipes:
|
||||
- title: "Handelsblatt and European Voice"
|
||||
author: "malfi"
|
||||
|
||||
- title: "Polityka and Newsweek"
|
||||
author: "Mateusz Kielar"
|
||||
|
||||
- title: "MarcTV"
|
||||
author: "Marc Toensings"
|
||||
|
||||
- title: "Rolling Stone"
|
||||
author: "Darko Miletic"
|
||||
|
||||
- title: "Vedomosti"
|
||||
author: "Nikolai Kotchetkov"
|
||||
|
||||
- title: "Hola.com"
|
||||
author: "bmsleight"
|
||||
|
||||
- title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
|
||||
author: "BlonG"
|
||||
|
||||
- title: "SC Print Magazine"
|
||||
author: "Tony Maro"
|
||||
|
||||
- title: "Diario Sport"
|
||||
author: "Jefferson Frantz"
|
||||
|
||||
- version: 0.7.27
|
||||
date: 2010-11-05
|
||||
|
||||
new features:
|
||||
- title: "The book list behavior has changed"
|
||||
type: major
|
||||
description: >
|
||||
"Now double clicking on an entry in the book list will open it in the viewer. To edit metadata single click a previously selected entry instead. This is consistent with
|
||||
the usage in most operating systems, so should be most intuitive for new users. Also typing any key no longer starts an edit, instead press F2 (Enter on OS X) to start editing
|
||||
the current cell. Also you now have to double click instead of single clicking the book details panel to open the detailed info dialog for the book."
|
||||
|
||||
- title: "Added a new HTML output format plugin, which converts the input document to a ZIP file. The zip file contains HTML pages suitable for display in a website"
|
||||
|
||||
- title: "Support for iRiver Cover Story and Digma Q600"
|
||||
|
||||
- title: "Add a search button (labelled Go!) to explicitly run a search with the text currently in the quick search box"
|
||||
|
||||
- title: "Add author to the calibre geenrated book jacket"
|
||||
tickets: [7402]
|
||||
|
||||
- title: "Add the title of the destination book to the merge dialog warning message"
|
||||
|
||||
- title: "calibre-server: Make auto reload control separate from --devlop with a new command line option --auto-reload"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix book details panel not being updated after a delete-merge"
|
||||
tickets: [7426]
|
||||
|
||||
- title: "Fix clicking in the search box launches a search if you have search as you type enabled"
|
||||
tickets: [7425]
|
||||
|
||||
- title: "Use a browser widget to display book details for more robustness and better performance when vieweing large HTML comments"
|
||||
|
||||
- title: "Fix cover browser not updated after copy to library and delete"
|
||||
tickets: [7416]
|
||||
|
||||
- title: "Fix regression that broke sending non calibre EPUB files to the iPad. Also handle failure to set cover in iTunes gracefully"
|
||||
tickets: [7356]
|
||||
|
||||
- title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
|
||||
tickets: [7321]
|
||||
|
||||
- title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"
|
||||
|
||||
- title: "calibre-server: Fix regression that broke --daemonize"
|
||||
|
||||
- title: "EPUB Input: Handle ncx files that have <navpoint> elements with no content correctly."
|
||||
tickets: [7396]
|
||||
|
||||
- title: "SNBOutput: Fixed a bug in handling pre tag"
|
||||
|
||||
- title: "MOBI Output: Don't ignore hidden anchors."
|
||||
tickets: [7384]
|
||||
|
||||
- title: "Fix switching libraries and generating a catalog could generate a catalog for the wrong library"
|
||||
|
||||
- title: "MOBI Output: Fix regression that broke conversion of anchors inside superscripts/subscripts."
|
||||
tickets: [7368]
|
||||
|
||||
- title: "Content server: Fix various minor bugs"
|
||||
tickets: [7379, 6768, 7354]
|
||||
|
||||
- title: "Amazon metadata download plugin: Make it more robust and add option to auto convert HTML to text"
|
||||
|
||||
- title: "Re-arrange send to device menu to make it harder to accidentally trigger the send and delete actions"
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Danas
|
||||
- Fudzilla
|
||||
- Zeit Online
|
||||
- New York Times
|
||||
- Mediapart
|
||||
|
||||
new recipes:
|
||||
- title: "Ynet and Calcalist"
|
||||
author: "marbs"
|
||||
|
||||
- title: "El Faro de Vigo"
|
||||
author: "Jefferson Frantz"
|
||||
|
||||
- title: "Clic_RBS"
|
||||
author: "avoredo"
|
||||
|
||||
- title: "Correio da Manha"
|
||||
author: "jmst"
|
||||
|
||||
- title: "Rue89"
|
||||
author: "Louis Gesbert"
|
||||
|
||||
- version: 0.7.26
|
||||
date: 2010-10-30
|
||||
|
||||
new features:
|
||||
- title: "Check library: Allow wildcards in ignore names field"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in 0.7.25 that broke reading metadata from filenames."
|
||||
|
||||
- title: "Fix regression in 0.7.25 that caused original files to be mistakenly removed when adding books recursively"
|
||||
|
||||
- title: "Fix long series/publisher causing edit metadata in bulk dialog to become very large"
|
||||
tickets: [7332]
|
||||
|
||||
- title: "Only add SONY periodical code to downloaded news if output profile is set to one of the SONY reader profiles. This is needed because the ever delightful Stanza crashes and burns when an EPUB has the periodical code"
|
||||
|
||||
improved recipes:
|
||||
- El Periodico
|
||||
- New Zealand Herald
|
||||
|
||||
new recipes:
|
||||
- title: "Taggeschau.de"
|
||||
author: "Florian Andreas Pfaff"
|
||||
|
||||
- title: "Gamespot Reviews"
|
||||
author: "Marc Tonsing"
|
||||
|
||||
- version: 0.7.25
|
||||
date: 2010-10-29
|
||||
|
||||
new features:
|
||||
- title: "Add support for the SONY periodical format."
|
||||
description: "This means that news downloaded by calibre and sent to a newer SONY device (350/650/900) should appear in the Periodicals section and have the special periodicals navigation user interface"
|
||||
type: major
|
||||
|
||||
- title: "Content server: Make the new browsing interface the default. The old interface can be accessed at /old"
|
||||
|
||||
- title: "Content server: Allow running of content server as a WSGI application within another server. Add tutorial for this to the User Manual."
|
||||
|
||||
- title: "Support for the Pico Life reader, Kobo Wifi and HTC Aria"
|
||||
|
||||
- title: "Content server: Add a new --url-prefix command line option to ease the use of the server with a reverse proxy"
|
||||
|
||||
- title: "New social metadata plugin for Amazon that does not rely on AWS. Since Amazon broke AWS, it is recommended you upgrade to this version if you use metadata from Amazon"
|
||||
|
||||
- title: "Add a tweak to specify the fonts used when geenrating the default cover"
|
||||
|
||||
- title: "Add an output profile for generic Tablet devices"
|
||||
tickets: [7289]
|
||||
|
||||
- title: "SONY driver: Allow sorting of collections by arbitrary field via a new tweak."
|
||||
|
||||
- title: "Content server: Make /mobile a little prettier"
|
||||
|
||||
- title: "Add button to 'Library Check' to automatically delete spurious files and folders"
|
||||
|
||||
bug fixes:
|
||||
- title: "FB2 Input: Lots of love. Handle stylesheets and style attributes. Make parsinf malformed FB2 files more robust."
|
||||
tickets: [7219, 7230]
|
||||
|
||||
- title: "Fix auto send of news to device with multiple calibre libraries. The fix means that if you have any pending news to be sent, it will be ignored after the update. Future news downloads will once again be automatically sent to the device."
|
||||
|
||||
- title: "MOBI Output: Conversion of super/sub scripts now handles nested tags."
|
||||
tickets: [7264]
|
||||
|
||||
- title: "Conversion pipeline: Fix parsing of XML encoding declarations."
|
||||
tickets: [7328]
|
||||
|
||||
- title: "Pandigital (Kobo): Upload thumbnails to correct location"
|
||||
tickets: [7165]
|
||||
|
||||
- title: "Fix auto emailed news with non asci characters in title not being deliverd to Kindle"
|
||||
tickets: [7322]
|
||||
|
||||
- title: "Read metadata only after on import plugins have run when adding books to GUI"
|
||||
tickets: [7245]
|
||||
|
||||
- title: "Various fixes for bugs caused by non ascii temporary paths on windows with non UTF-8 filesystem encodings"
|
||||
tickets: [7288]
|
||||
|
||||
- title: "Various fixes/enhancements to SNB Output"
|
||||
|
||||
- title: "Allow Tag editor in edit metadata dialog to be used even if tags have been changed"
|
||||
tickets: [7298]
|
||||
|
||||
- title: "Fix crash on some OS X machines when Preferences->Conversion->Output is clicked"
|
||||
|
||||
- title: "MOBI indexing: Fix last entry missing sometimes"
|
||||
tickets: [6595]
|
||||
|
||||
- title: "Fix regression causing books to be deselected after sending to device"
|
||||
tickets: [7271]
|
||||
|
||||
- title: "Conversion pipeline: Fix rescaling of GIF images not working"
|
||||
tickets: [7306]
|
||||
|
||||
- title: "Update PDF metadata/conversion libraries in windows build"
|
||||
|
||||
- title: "Fix timezone bug when searching on date fields"
|
||||
tickets: [7300]
|
||||
|
||||
- title: "Fix regression that caused the viewr to crash if the main application is closed"
|
||||
tickets: [7276]
|
||||
|
||||
- title: "Fix bug causing a spurious metadata.opf file to be written at the root of the calibre library when adding books"
|
||||
|
||||
- title: "Use the same title casing algorithm in all places"
|
||||
|
||||
- title: "Fix bulk edit of dual state boolean custom columns"
|
||||
|
||||
- title: "Increase image size for comics in Kindle DX profile for better conversion of comics to PDF"
|
||||
|
||||
- title: "Fix restore db to not dies when conflicting custom columns are encountered and report conflicting columns errors. Fix exceptions when referencing invalid _index fields."
|
||||
|
||||
- title: "Fix auto merge books not respecting article sort tweak"
|
||||
tickets: [7147]
|
||||
|
||||
- title: "Linux device drivers: Fix udisks based ejecting for devices with multiple nodes"
|
||||
|
||||
- title: "Linux device mounting: Mount the drive with the lowest kernel name as main memory"
|
||||
|
||||
- title: "Fix use of numeric fields in templates"
|
||||
|
||||
- title: "EPUB Input: Handle EPUB files with multiple OPF files."
|
||||
tickets: [7229]
|
||||
|
||||
- title: "Setting EPUB metadata: Fix date format. Fix language being overwritten by und when unspecified. Fix empty ISBN identifier being created"
|
||||
|
||||
- title: "Fix cannot delete a Series listing from List view also dismiss fetch metadata dialog when no metadata found automatically"
|
||||
tickets: [7221, 7220]
|
||||
|
||||
- title: "Content server: Handle switch library in GUI gracefully"
|
||||
|
||||
- title: "calibre-server: Use cherrypy implementation of --pidfile and --daemonize"
|
||||
|
||||
new recipes:
|
||||
- title: "Ming Pao"
|
||||
author: "Eddie Lau"
|
||||
|
||||
- title: "lenta.ru"
|
||||
author: "Nikolai Kotchetkov"
|
||||
|
||||
- title: "frazpc.pl"
|
||||
author: "Tomasz Dlugosz"
|
||||
|
||||
- title: "Perfil and The Economic Collapse Blog"
|
||||
author: "Darko Miletic"
|
||||
|
||||
- title: "STNN"
|
||||
author: "Larry Chan"
|
||||
|
||||
improved recipes:
|
||||
- CubaDebate
|
||||
- El Pais
|
||||
- Fox News
|
||||
- New Scientist
|
||||
- The Economic Times of India
|
||||
|
||||
- version: 0.7.24
|
||||
date: 2010-10-17
|
||||
|
||||
new features:
|
||||
- title: "Content server: New interface that allows browsing via categories, similar to the Tag Browser in the calibre interface."
|
||||
description: >
|
||||
"You can access the new interface by going to /browse. So if your calibre content server is available at http://192.168.1.2, use
|
||||
http://192.168.1.2/browse. The new interface requires a fairly modern browser, so no Internet Explorer 6,7."
|
||||
type: major
|
||||
|
||||
- title: "Support for the SNB e-book format, used by the Bambook e-book reader"
|
||||
type: major
|
||||
|
||||
- title: "Driver for the Wifi Kobo"
|
||||
|
||||
- title: "Edit metadata dialog: If metadata is downloaded successfully, set focus to download cover button"
|
||||
|
||||
- title: "News download system: Allow recipes with optional subscriptions"
|
||||
tickets: [7199]
|
||||
|
||||
- title: "Templates: Improve the smarten function"
|
||||
|
||||
- title: "Linux device mounting: Use udisks, if it is available, to mount devices, so that I no longer have to hear bug reports from users using distro packages that have crippled calibre-mount-helper. You can turn off udisks by setting the environment variable CALIBRE_DISABLE_UDISKS=1"
|
||||
|
||||
- title: "Implement Drag'n'drop to tags in user categories"
|
||||
tickets: [7172]
|
||||
|
||||
- title: "Ebook viewer: Add command line option to start in full screen mode"
|
||||
|
||||
- title: "Set completion mode on search boxes to popup completion"
|
||||
|
||||
- title: "Update version of jQuery used in content server and viewer. Required a little hackery in the viewer, hopefully nothing broke"
|
||||
|
||||
bug fixes:
|
||||
- title: "Linux device drivers: Ignore read only partition exported by the device"
|
||||
|
||||
- title: "E-book viewer: Fix scrolling down with mouse wheel not always reaching bottom in windows"
|
||||
|
||||
- title: "Smarten punctuation: Fix bug in handling of comments and <style> tags"
|
||||
|
||||
- title: "EPUB Input: Handle EPUB files with components encoded in an encoding other than UTF-8 correctly, though why anyone would do that is a mystery."
|
||||
tickets: [7196]
|
||||
|
||||
- title: "OS X commandline tools: Decode non-ascii command line arguments correctly"
|
||||
tickets: [6964]
|
||||
|
||||
- title: "MOBI Output: Fix bug that broke conversion of <svg> elements in the input document when the <svg> element was followed by non-whitespace text."
|
||||
tickets: [7083]
|
||||
|
||||
- title: "CHM Input: Fix handling of relative file paths in <img> tags."
|
||||
tickets: [7159]
|
||||
|
||||
- title: "EPUB Output: Fix incorrect format for xml:lang when specifying a sub language"
|
||||
tickets: [7198]
|
||||
|
||||
- title: "EPUB Input: Make parsing of toc.ncx more robust."
|
||||
tickets: [7170]
|
||||
|
||||
- title: "Content server: Fix searching with non-ascii characters on windows"
|
||||
tickets: [5249]
|
||||
|
||||
- title: "Fix average rating calculation for rating datatype in Tag Browser incorrect"
|
||||
|
||||
- title: "Comic Input: Fix image borders becoming yellow on some windows installs"
|
||||
|
||||
- title: "Email sending: Fix sending of email with non ascii chars"
|
||||
tickets: [7137]
|
||||
|
||||
- title: "SONY driver: Fix collections created from series not in order with manual metadata management, if all books in the series are not sent at once"
|
||||
|
||||
- title: "Content server: Apply the search restriction when generating category lists as well"
|
||||
|
||||
- title: "RTF Input: Fix regression in conversion of WMF images on linux at least, maybe on other platforms as wel"
|
||||
|
||||
- title: "Fix isbndb.com metadata downloading sometimes yield a title of Unknown"
|
||||
tickets: [7114]
|
||||
|
||||
- title: "Fix edit metadata dialog causing the hour:minute:seconds of the date column being lost, even when date is not changed"
|
||||
tickets: [7125]
|
||||
|
||||
new recipes:
|
||||
- title: "Revista El Cultural"
|
||||
author: "Jefferson Frantz"
|
||||
|
||||
- title: "Novaya Gazeta"
|
||||
author: "muwa"
|
||||
|
||||
- title: "frazpc.pl"
|
||||
author: "Tomasz Dlugosz"
|
||||
|
||||
- title: "Orsai and Financial Times UK"
|
||||
author: "Darko Miletic"
|
||||
|
||||
- title: "Malayasian Mirror and Rolling Stones"
|
||||
author: "Tony Stegall"
|
||||
|
||||
improved recipes:
|
||||
- Globe and Mail
|
||||
- Business Standard
|
||||
- Miami Herald
|
||||
- El Mercurio
|
||||
- volkskrant.nl
|
||||
- GoComics.com
|
||||
- The New Yorker
|
||||
|
||||
- version: 0.7.23
|
||||
date: 2010-10-08
|
||||
|
||||
@ -51,6 +498,7 @@
|
||||
- title: "CHM input: handle another class of broken CHM files"
|
||||
tickets: [7058]
|
||||
|
||||
- title: "Make calibre worker processes use the same temp directory as the calibre GUI"
|
||||
|
||||
new recipes:
|
||||
- title: "Communications of the Association for Computing Machinery"
|
||||
|
@ -81,6 +81,14 @@ p.unread_book {
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
p.wishlist_item {
|
||||
text-align:left;
|
||||
margin-top:0px;
|
||||
margin-bottom:0px;
|
||||
margin-left:2em;
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
p.date_read {
|
||||
text-align:left;
|
||||
margin-top:0px;
|
||||
@ -104,3 +112,14 @@ hr.annotations_divider {
|
||||
margin-top:0em;
|
||||
margin-bottom:0em;
|
||||
}
|
||||
|
||||
td.publisher, td.date {
|
||||
font-weight:bold;
|
||||
text-align:center;
|
||||
}
|
||||
td.rating {
|
||||
text-align: center;
|
||||
}
|
||||
td.thumbnail img {
|
||||
-webkit-box-shadow: 6px 6px 6px #888;
|
||||
}
|
@ -208,6 +208,8 @@ h2.library_name {
|
||||
|
||||
}
|
||||
|
||||
.toplevel li a { text-decoration: none; }
|
||||
|
||||
.toplevel li img {
|
||||
vertical-align: middle;
|
||||
margin-right: 1em;
|
||||
@ -261,9 +263,16 @@ h2.library_name {
|
||||
|
||||
}
|
||||
|
||||
.category div.category-item span.href { display: none }
|
||||
.category div.category-item a { text-decoration: none; color: inherit; }
|
||||
|
||||
#groups span.load_href { display: none }
|
||||
#groups a.load_href {
|
||||
text-decoration: none;
|
||||
color: inherit;
|
||||
font-size: medium;
|
||||
font-weight: normal;
|
||||
padding: 0;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
#groups h3 {
|
||||
font-weight: bold;
|
||||
|
@ -4,28 +4,29 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<title>..:: calibre library ::.. {title}</title>
|
||||
<title>..:: calibre {library} ::.. {title}</title>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
||||
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="/static/browse/browse.css" />
|
||||
<link type="text/css" href="/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
||||
<link rel="stylesheet" type="text/css" href="/static/jquery.multiselect.css" />
|
||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/browse/browse.css" />
|
||||
<link type="text/css" href="{prefix}/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/jquery.multiselect.css" />
|
||||
|
||||
<script type="text/javascript" src="/static/jquery.js"></script>
|
||||
<script type="text/javascript" src="/static/jquery.corner.js"></script>
|
||||
<script type="text/javascript" src="{prefix}/static/jquery.js"></script>
|
||||
<script type="text/javascript" src="{prefix}/static/jquery.corner.js"></script>
|
||||
|
||||
<script type="text/javascript"
|
||||
src="/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"></script>
|
||||
src="{prefix}/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"></script>
|
||||
<script type="text/javascript"
|
||||
src="/static/jquery.multiselect.min.js"></script>
|
||||
src="{prefix}/static/jquery.multiselect.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" src="/static/browse/browse.js"></script>
|
||||
<script type="text/javascript" src="{prefix}/static/browse/browse.js"></script>
|
||||
|
||||
<script type="text/javascript">
|
||||
var sort_cookie_name = "{sort_cookie_name}";
|
||||
var sort_select_label = "{sort_select_label}";
|
||||
var url_prefix = "{prefix}";
|
||||
$(document).ready(function() {{
|
||||
init();
|
||||
{script}
|
||||
@ -39,16 +40,16 @@
|
||||
<div id="header">
|
||||
<div class="area">
|
||||
<div class="bubble">
|
||||
<p><a href="/browse" title="Return to top level"
|
||||
>→ home ←</a></p>
|
||||
<p><a href="{prefix}/browse" title="Return to top level"
|
||||
>→ {home} ←</a></p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="nav-container">
|
||||
<ul id="primary-nav">
|
||||
<li><a id="nav-mobile" href="/mobile" title="A version of this website suited for mobile browsers">Mobile</a></li>
|
||||
<li><a id="nav-mobile" href="{prefix}/mobile" title="A version of this website suited for mobile browsers">Mobile</a></li>
|
||||
|
||||
<li><a id="nav-demo" href="/old" title="The old version of this webiste">Old</a></li>
|
||||
<li><a id="nav-download" href="/opds" title="An OPDS feed based version of this website, used in special purpose applications">Feed</a></li>
|
||||
<li><a id="nav-demo" href="{prefix}/old" title="The old version of this webiste">Old</a></li>
|
||||
<li><a id="nav-download" href="{prefix}/opds" title="An OPDS feed based version of this website, used in special purpose applications">Feed</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@ -58,7 +59,7 @@
|
||||
<input type="hidden" name="cmd" value="_s-xclick"></input>
|
||||
<input type="hidden" name="hosted_button_id" value="3028915"></input>
|
||||
<input type="image"
|
||||
src="http://calibre-ebook.com/site_media//img/button-donate.png"
|
||||
src="{prefix}/static/button-donate.png"
|
||||
name="submit"></input>
|
||||
<img alt="" src="https://www.paypal.com/en_US/i/scr/pixel.gif"
|
||||
width="1" height="1"></img>
|
||||
@ -76,10 +77,10 @@
|
||||
</select>
|
||||
</div>
|
||||
<div id="search_box">
|
||||
<form name="search_form" action="/browse/search" method="get" accept-charset="UTF-8">
|
||||
<form name="search_form" action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
|
||||
<input value="{initial_search}" type="text" title="Search" name="query"
|
||||
class="search_input" />
|
||||
<input type="submit" value="Search" title="Search" alt="Search" />
|
||||
<input type="submit" value="{Search}" title="{Search}" alt="{Search}" />
|
||||
</form>
|
||||
</div>
|
||||
<div> </div>
|
||||
|
@ -109,14 +109,14 @@ function toplevel_layout() {
|
||||
var last = $(".toplevel li").last();
|
||||
var title = $('.toplevel h3').first();
|
||||
var bottom = last.position().top + last.height() - title.position().top;
|
||||
$("#main").height(Math.max(200, bottom));
|
||||
$("#main").height(Math.max(200, bottom+75));
|
||||
}
|
||||
|
||||
function toplevel() {
|
||||
$(".sort_select").hide();
|
||||
|
||||
$(".toplevel li").click(function() {
|
||||
var href = $(this).children("span.url").text();
|
||||
var href = $(this).children("a").attr('href');
|
||||
window.location = href;
|
||||
});
|
||||
|
||||
@ -133,7 +133,7 @@ function render_error(msg) {
|
||||
// Category feed {{{
|
||||
|
||||
function category_clicked() {
|
||||
var href = $(this).find("span.href").html();
|
||||
var href = $(this).find("a").attr('href');
|
||||
window.location = href;
|
||||
}
|
||||
|
||||
@ -151,11 +151,12 @@ function category() {
|
||||
|
||||
change: function(event, ui) {
|
||||
if (ui.newContent) {
|
||||
var href = ui.newContent.children("span.load_href").html();
|
||||
var href = ui.newContent.prev().children("a.load_href").attr('href');
|
||||
ui.newContent.children(".loading").show();
|
||||
if (href) {
|
||||
$.ajax({
|
||||
url:href,
|
||||
cache: false,
|
||||
data:{'sort':cookie(sort_cookie_name)},
|
||||
success: function(data) {
|
||||
this.children(".loaded").html(data);
|
||||
@ -212,6 +213,7 @@ function load_page(elem) {
|
||||
url: href,
|
||||
context: elem,
|
||||
dataType: "json",
|
||||
cache : false,
|
||||
type: 'POST',
|
||||
timeout: 600000, //milliseconds (10 minutes)
|
||||
data: {'ids': ids},
|
||||
@ -255,7 +257,7 @@ function booklist(hide_sort) {
|
||||
function show_details(a_dom) {
|
||||
var book = $(a_dom).closest('div.summary');
|
||||
var bd = $('#book_details_dialog');
|
||||
bd.html('<span class="loading"><img src="/static/loading.gif" alt="Loading" />Loading, please wait…</span>');
|
||||
bd.html('<span class="loading"><img src="'+url_prefix+'/static/loading.gif" alt="Loading" />Loading, please wait…</span>');
|
||||
bd.dialog('option', 'width', $(window).width() - 100);
|
||||
bd.dialog('option', 'height', $(window).height() - 100);
|
||||
bd.dialog('option', 'title', book.find('.title').text());
|
||||
@ -263,6 +265,7 @@ function show_details(a_dom) {
|
||||
$.ajax({
|
||||
url: book.find('.details-href').attr('title'),
|
||||
context: bd,
|
||||
cache: false,
|
||||
dataType: "json",
|
||||
timeout: 600000, //milliseconds (10 minutes)
|
||||
error: function(xhr, stat, err) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
<div id="details_{id}" class="details">
|
||||
<div class="left">
|
||||
<img alt="Cover of {title}" src="/get/cover/{id}" />
|
||||
<img alt="Cover of {title}" src="{prefix}/get/cover/{id}" />
|
||||
</div>
|
||||
<div class="right">
|
||||
<div class="field formats">{formats}</div>
|
||||
|
@ -1,6 +1,6 @@
|
||||
<div id="summary_{id}" class="summary">
|
||||
<div class="left">
|
||||
<img alt="Cover of {title}" src="/get/thumb_90_120/{id}" />
|
||||
<img alt="Cover of {title}" src="{prefix}/get/thumb_90_120/{id}" />
|
||||
{get_button}
|
||||
</div>
|
||||
<div class="right">
|
||||
@ -8,7 +8,7 @@
|
||||
<span class="rating_container">{stars}</span>
|
||||
<span class="series">{series}</span>
|
||||
<a href="#" onclick="show_details(this); return false;" title="{details_tt}">{details}</a>
|
||||
<a href="/browse/book/{id}" title="{permalink_tt}">{permalink}</a>
|
||||
<a href="{prefix}/browse/book/{id}" title="{permalink_tt}">{permalink}</a>
|
||||
</div>
|
||||
<div class="title"><strong>{title}</strong></div>
|
||||
<div class="authors">{authors}</div>
|
||||
|
BIN
resources/content_server/button-donate.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
@ -40,7 +40,7 @@ function create_table_headers() {
|
||||
|
||||
|
||||
function format_url(format, id, title) {
|
||||
return 'get/'+format.toLowerCase() + '/'+encodeURIComponent(title) + '_' + id+'.'+format.toLowerCase();
|
||||
return url_prefix + '/get/'+format.toLowerCase() + '/'+encodeURIComponent(title) + '_' + id+'.'+format.toLowerCase();
|
||||
}
|
||||
|
||||
function render_book(book) {
|
||||
@ -101,7 +101,7 @@ function render_book(book) {
|
||||
}
|
||||
}
|
||||
title += '</span>'
|
||||
title += '<img style="display:none" alt="" src="get/cover/{0}" /></span>'.format(id);
|
||||
title += '<img style="display:none" alt="" src="{1}/get/cover/{0}" /></span>'.format(id, url_prefix);
|
||||
title += '<div class="comments">{0}</div>'.format(comments)
|
||||
// Render authors cell
|
||||
var _authors = new Array();
|
||||
|
@ -3,26 +3,27 @@
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" version="XHTML 1.1" xml:lang="en">
|
||||
<head>
|
||||
<title>calibre library</title>
|
||||
<link rel="stylesheet" type="text/css" href="/static/gui.css" charset="utf-8" />
|
||||
<script type="text/javascript" src="/static/date.js" charset="utf-8"></script>
|
||||
<script type="text/javascript" src="/static/jquery.js" charset="utf-8"></script>
|
||||
<script type="text/javascript" src="/static/gui.js" charset="utf-8"></script>
|
||||
<script type="text/javascript">var url_prefix='{prefix}';</script>
|
||||
<link rel="stylesheet" type="text/css" href="{prefix}/static/gui.css" charset="utf-8" />
|
||||
<script type="text/javascript" src="{prefix}/static/date.js" charset="utf-8"></script>
|
||||
<script type="text/javascript" src="{prefix}/static/jquery.js" charset="utf-8"></script>
|
||||
<script type="text/javascript" src="{prefix}/static/gui.js" charset="utf-8"></script>
|
||||
<link rel="icon" href="http://calibre-ebook.com/favicon.ico" type="image/x-icon" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="banner">
|
||||
<a style="border: 0pt" href="http://calibre-ebook.com" alt="calibre" title="calibre"><img style="border:0pt" src="/static/calibre_banner.png" alt="calibre" /></a>
|
||||
<a style="border: 0pt" href="http://calibre-ebook.com" alt="calibre" title="calibre"><img style="border:0pt" src="{prefix}/static/calibre_banner.png" alt="calibre" /></a>
|
||||
</div>
|
||||
|
||||
<div id="search_box">
|
||||
<form name="search_form" onsubmit="search();return false;" action="./" method="get" accept-charset="UTF-8">
|
||||
<form name="search_form" onsubmit="search();return false;" action="{prefix}/old" method="get" accept-charset="UTF-8">
|
||||
<input value="" id="s" type="text" />
|
||||
<input type="image" src="/static/btn_search_box.png" width="27" height="24" id="go" alt="Search" title="Search" />
|
||||
<input type="image" src="{prefix}/static/btn_search_box.png" width="27" height="24" id="go" alt="Search" title="Search" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="count_bar">
|
||||
<span id="left"><img src="/static/first.png" alt="Show first set of books" title="Show first set of books"/> <img src="/static/previous.png" alt="Show previous set of books" title="Show previous set of books"/> </span><span id="count"> </span> <span id="right"><img src="/static/next.png" alt="Show next set of books" title="Show next set of books"/> <img src="/static/last.png" alt="Show last set of books" title="Show last set of books" /></span>
|
||||
<span id="left"><img src="{prefix}/static/first.png" alt="Show first set of books" title="Show first set of books"/> <img src="{prefix}/static/previous.png" alt="Show previous set of books" title="Show previous set of books"/> </span><span id="count"> </span> <span id="right"><img src="{prefix}/static/next.png" alt="Show next set of books" title="Show next set of books"/> <img src="{prefix}/static/last.png" alt="Show last set of books" title="Show last set of books" /></span>
|
||||
</div>
|
||||
|
||||
<div id="main">
|
||||
@ -38,7 +39,7 @@
|
||||
|
||||
<div id="loading">
|
||||
<div>
|
||||
<img align="top" src="/static/loading.gif" alt="Loading..." title="Loading..."/> <span id="loading_msg">Loading…</span>
|
||||
<img align="top" src="{prefix}/static/loading.gif" alt="Loading..." title="Loading..."/> <span id="loading_msg">Loading…</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -1,5 +1,9 @@
|
||||
/* CSS for the mobile version of the content server webpage */
|
||||
|
||||
.body {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
.navigation table.buttons {
|
||||
width: 100%;
|
||||
}
|
||||
@ -53,6 +57,7 @@ div.navigation {
|
||||
}
|
||||
#listing td {
|
||||
padding: 0.25em;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
#listing td.thumbnail {
|
||||
@ -73,6 +78,7 @@ div.navigation {
|
||||
overflow: hidden;
|
||||
text-align: center;
|
||||
text-decoration: none;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
#logo {
|
||||
@ -83,4 +89,17 @@ div.navigation {
|
||||
clear: both;
|
||||
}
|
||||
|
||||
.data-container {
|
||||
display: inline-block;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.first-line {
|
||||
font-size: larger;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.second-line {
|
||||
margin-top: 0.75ex;
|
||||
display: block;
|
||||
}
|
||||
|
3385
resources/content_server/read/monocle.js
Normal file
@ -106,7 +106,8 @@ title_sort_articles=r'^(A|The|An)\s+'
|
||||
auto_connect_to_folder = ''
|
||||
|
||||
|
||||
# Specify renaming rules for sony collections. Collections on Sonys are named
|
||||
# Specify renaming rules for sony collections. This tweak is only applicable if
|
||||
# metadata management is set to automatic. Collections on Sonys are named
|
||||
# depending upon whether the field is standard or custom. A collection derived
|
||||
# from a standard field is named for the value in that field. For example, if
|
||||
# the standard 'series' column contains the name 'Darkover', then the series
|
||||
@ -137,6 +138,24 @@ auto_connect_to_folder = ''
|
||||
sony_collection_renaming_rules={}
|
||||
|
||||
|
||||
# Specify how sony collections are sorted. This tweak is only applicable if
|
||||
# metadata management is set to automatic. You can indicate which metadata is to
|
||||
# be used to sort on a collection-by-collection basis. The format of the tweak
|
||||
# is a list of metadata fields from which collections are made, followed by the
|
||||
# name of the metadata field containing the sort value.
|
||||
# Example: The following indicates that collections built from pubdate and tags
|
||||
# are to be sorted by the value in the custom column '#mydate', that collections
|
||||
# built from 'series' are to be sorted by 'series_index', and that all other
|
||||
# collections are to be sorted by title. If a collection metadata field is not
|
||||
# named, then if it is a series- based collection it is sorted by series order,
|
||||
# otherwise it is sorted by title order.
|
||||
# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')]
|
||||
# Note that the bracketing and parentheses are required. The syntax is
|
||||
# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ]
|
||||
# Default: empty (no rules), so no collection attributes are named.
|
||||
sony_collection_sorting_rules = []
|
||||
|
||||
|
||||
# Create search terms to apply a query across several built-in search terms.
|
||||
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
|
||||
# Example: create the term 'myseries' that when used as myseries:foo would
|
||||
@ -184,3 +203,17 @@ content_server_wont_display = ['']
|
||||
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
|
||||
maximum_resort_levels = 5
|
||||
|
||||
# Absolute path to a TTF font file to use as the font for the title and author
|
||||
# when generating a default cover. Useful if the default font (Liberation
|
||||
# Serif) does not contain glyphs for the language of the books in your library.
|
||||
generate_cover_title_font = None
|
||||
|
||||
# Absolute path to a TTF font file to use as the font for the footer in the
|
||||
# default cover
|
||||
generate_cover_foot_font = None
|
||||
|
||||
|
||||
# Behavior of doubleclick on the books list. Choices:
|
||||
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
|
||||
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||
doubleclick_on_library_view = 'open_viewer'
|
||||
|
BIN
resources/images/format-text-bold.png
Normal file
After Width: | Height: | Size: 5.0 KiB |
BIN
resources/images/format-text-italic.png
Normal file
After Width: | Height: | Size: 4.1 KiB |
BIN
resources/images/format-text-strikethrough.png
Normal file
After Width: | Height: | Size: 5.9 KiB |
BIN
resources/images/format-text-underline.png
Normal file
After Width: | Height: | Size: 4.4 KiB |
BIN
resources/images/hotmail.png
Normal file
After Width: | Height: | Size: 2.6 KiB |
BIN
resources/images/news/avto-magazin.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
resources/images/news/dnevnik.png
Normal file
After Width: | Height: | Size: 861 B |
BIN
resources/images/news/perfil.png
Normal file
After Width: | Height: | Size: 781 B |
BIN
resources/images/news/rollingstone.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/siol.png
Normal file
After Width: | Height: | Size: 423 B |
BIN
resources/images/news/theecocolapse.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
@ -39,7 +39,16 @@
|
||||
.cbj_title {
|
||||
font-size: x-large;
|
||||
text-align: center;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Author
|
||||
*/
|
||||
.cbj_author {
|
||||
font-size: medium;
|
||||
text-align: center;
|
||||
margin-bottom: 1ex;
|
||||
}
|
||||
|
||||
/*
|
||||
** Table containing Series, Publication Year, Rating and Tags
|
||||
|
@ -7,6 +7,7 @@
|
||||
<body>
|
||||
<div class="cbj_banner">
|
||||
<div class="cbj_title">{title}</div>
|
||||
<div class="cbj_author">{author}</div>
|
||||
<table class="cbj_header">
|
||||
<tr class="cbj_series">
|
||||
<td class="cbj_label">{series_label}:</td>
|
||||
|
@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
||||
title = self.tag_to_string(poem.find('h4'))
|
||||
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
||||
url = 'http://www.theatlantic.com'+poem.find('a')['href']
|
||||
url = poem.find('a')['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com' + url
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
self.log('\t\t', desc)
|
||||
poems.append({'title':title, 'url':url, 'description':desc,
|
||||
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
if div is not None:
|
||||
self.log('Found section: Advice')
|
||||
title = self.tag_to_string(div.find('h4'))
|
||||
url = 'http://www.theatlantic.com'+div.find('a')['href']
|
||||
url = div.find('a')['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com' + url
|
||||
desc = self.tag_to_string(div.find('p'))
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
self.log('\t\t', desc)
|
||||
|
46
resources/recipes/avto-magazin.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
avto-magazin.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Dnevnik(BasicNewsRecipe):
|
||||
title = u'Avto Magazin'
|
||||
__author__ = u'BlonG'
|
||||
description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
labguage = 'sl'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||
# dict(name='div', attrs={'class':'entry-content'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'voteConfirmation'}),
|
||||
dict(name='div', attrs={'id':'InsideVote'}),
|
||||
dict(name='div', attrs={'class':'Zone234'}),
|
||||
dict(name='div', attrs={'class':'Comments'}),
|
||||
dict(name='div', attrs={'class':'sorodneNovice'}),
|
||||
dict(name='div', attrs={'id':'footer'}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Novice', u'http://www.avto-magazin.si/rss/')
|
||||
]
|
@ -1,37 +1,37 @@
|
||||
import datetime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1286242553(BasicNewsRecipe):
|
||||
title = u'CACM'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
needs_subscription = True
|
||||
feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')]
|
||||
language = 'en'
|
||||
__author__ = 'jonmisurda'
|
||||
no_stylesheets = True
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \
|
||||
'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']})
|
||||
]
|
||||
cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://cacm.acm.org/login')
|
||||
br.select_form(nr=1)
|
||||
br['current_member[user]'] = self.username
|
||||
br['current_member[passwd]'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_cover_url(self):
|
||||
now = datetime.datetime.now()
|
||||
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month))
|
||||
cover_item = soup.find('img',attrs={'alt':'magazine cover image'})
|
||||
if cover_item:
|
||||
cover_url = cover_item['src']
|
||||
return cover_url
|
||||
import datetime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1286242553(BasicNewsRecipe):
|
||||
title = u'CACM'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
needs_subscription = True
|
||||
feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')]
|
||||
language = 'en'
|
||||
__author__ = 'jonmisurda'
|
||||
no_stylesheets = True
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \
|
||||
'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']})
|
||||
]
|
||||
cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://cacm.acm.org/login')
|
||||
br.select_form(nr=1)
|
||||
br['current_member[user]'] = self.username
|
||||
br['current_member[passwd]'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_cover_url(self):
|
||||
now = datetime.datetime.now()
|
||||
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month))
|
||||
cover_item = soup.find('img',attrs={'alt':'magazine cover image'})
|
||||
if cover_item:
|
||||
cover_url = cover_item['src']
|
||||
return cover_url
|
||||
|
43
resources/recipes/calcalist.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import re
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
description = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.'
|
||||
cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG'
|
||||
title = u'Calcalist'
|
||||
language = 'he'
|
||||
__author__ = 'marbs'
|
||||
extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_attributes = ['width']
|
||||
simultaneous_downloads = 5
|
||||
keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
|
||||
remove_tags = [dict(name='p', attrs={'text':[' ']})]
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'),
|
||||
(u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
|
||||
(u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
|
||||
(u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'),
|
||||
(u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'),
|
||||
(u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'),
|
||||
(u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'),
|
||||
(u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'),
|
||||
(u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'),
|
||||
(u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'),
|
||||
(u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'),
|
||||
(u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'),
|
||||
(u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
split1 = url.split("-")
|
||||
print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
|
||||
return print_url
|
50
resources/recipes/clic_rbs.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ClicRBS(BasicNewsRecipe):
|
||||
title = u'ClicRBS'
|
||||
language = 'pt'
|
||||
__author__ = 'arvoredo'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 9
|
||||
cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
|
||||
]
|
||||
|
||||
remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
|
||||
remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
|
||||
remove_tags_before = dict(name='div', attrs={'class':'descricao'})
|
||||
remove_tags_before = dict(name='div', attrs={'class':'coluna'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'extra'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
|
||||
remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
|
||||
remove_tags_after = dict(name='ul', attrs={'class':'lista'})
|
||||
|
||||
feeds = [
|
||||
(u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
|
||||
, (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
|
||||
, (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
|
||||
, (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
|
||||
, (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
|
||||
, (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
|
||||
, (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
|
||||
, (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
|
||||
, (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
|
||||
, (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
|
||||
, (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
|
||||
, (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
|
||||
, (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
|
||||
, (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
|
||||
, (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
|
||||
, (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
cite{color:#007BB5; font-size:xx-small; font-style:italic;}
|
||||
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||
h3{font-size:large; color:#082963; font-weight:bold;}
|
||||
#ident{color:#0179B4; font-size:xx-small;}
|
||||
p{color:#000000;font-weight:normal;}
|
||||
.commentario p{color:#007BB5; font-style:italic;}
|
||||
'''
|
44
resources/recipes/cm_journal.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CMJornal_pt(BasicNewsRecipe):
|
||||
title = 'Correio da Manha - Portugal'
|
||||
__author__ = 'jmst'
|
||||
description = 'As noticias de Portugal e do Mundo'
|
||||
publisher = 'Cofina Media'
|
||||
category = ''
|
||||
oldest_article = 1
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'pt'
|
||||
extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name=['h2','h1'])
|
||||
, dict(name='div', attrs={'class': ['news']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe'])
|
||||
,dict(name='a',attrs={'href':['#']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
|
||||
,(u'Portugal' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010' )
|
||||
,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
|
||||
,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
|
||||
,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012' )
|
||||
,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('noticia.aspx', 'Imprimir.aspx')
|
||||
|
@ -1,9 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
newyorker.com
|
||||
cubadebate.cu
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -13,32 +11,44 @@ class CubaDebate(BasicNewsRecipe):
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Contra el Terorismo Mediatico'
|
||||
oldest_article = 15
|
||||
language = 'es'
|
||||
|
||||
language = 'es'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publisher = 'Cubadebate'
|
||||
category = 'news, politics, Cuba'
|
||||
encoding = 'utf-8'
|
||||
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
||||
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : 'es'
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'pretty_print': True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
||||
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
||||
remove_tags = [dict(name='link')]
|
||||
remove_tags = [
|
||||
dict(name=['link','base','embed','object','meta','iframe'])
|
||||
,dict(attrs={'id':'addthis_container'})
|
||||
]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||
|
||||
remove_attributes=['width','height','lang']
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -25,7 +25,7 @@ class Danas(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
.article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
|
||||
.article,.articledescription,body,.lokacija,.feed{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
|
||||
.nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
|
||||
.antrfileText{border-left: 2px solid #999999;
|
||||
margin-left: 0.8em;
|
||||
@ -59,11 +59,14 @@ class Danas(BasicNewsRecipe):
|
||||
,(re.compile(u'\u201d'), lambda match: '”') # right double quotation mark
|
||||
,(re.compile(u'\u201e'), lambda match: '“') # double low-9 quotation mark
|
||||
,(re.compile(u'\u201f'), lambda match: '”') # double high-reversed-9 quotation mark
|
||||
,(re.compile(u'\u00f4'), lambda match: '“') # latin small letter o with circumflex
|
||||
,(re.compile(u'\u00f6'), lambda match: '”') # latin small letter o with dieaeresis
|
||||
,(re.compile(u'\u00e1'), lambda match: ' ' ) # latin small letter a with acute
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner','listaVesti','article_nav']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name=['object','link','iframe','meta'])
|
||||
]
|
||||
|
61
resources/recipes/deredactie.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class deredactie(BasicNewsRecipe):
|
||||
title = u'Deredactie.be'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
__author__ = 'malfi'
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
|
||||
remove_tags.append(dict(name = 'hr'))
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
def parse_index(self):
|
||||
categories = []
|
||||
catnames = {}
|
||||
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
|
||||
for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
|
||||
a = elem.find('a', href=True)
|
||||
m = re.search('(?<=/)[^/]*$', a['href'])
|
||||
cat = str(m.group(0))
|
||||
categories.append(cat)
|
||||
catnames[cat] = a['title']
|
||||
self.log("found cat %s\n" % catnames[cat])
|
||||
|
||||
feeds = []
|
||||
|
||||
for cat in categories:
|
||||
articles = []
|
||||
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
|
||||
for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
|
||||
skip_this_article = False
|
||||
url = a['href'].strip()
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.deredactie.be' + url
|
||||
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
|
||||
for article in articles :
|
||||
if article['url'] == url :
|
||||
skip_this_article = True
|
||||
self.log("SKIPPING DUP %s" % url)
|
||||
break
|
||||
if skip_this_article :
|
||||
continue;
|
||||
articles.append(myarticle)
|
||||
self.log("Adding URL %s\n" %url)
|
||||
if articles:
|
||||
feeds.append((catnames[cat], articles))
|
||||
return feeds
|
||||
|
42
resources/recipes/diario_sport.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DiarioSport(BasicNewsRecipe):
|
||||
title = u'Diario Sport'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 75
|
||||
__author__ = 'Jefferson Frantz'
|
||||
description = 'Todas las noticias del Barça y del mundo del deporte en general'
|
||||
timefmt = ' [%d %b, %Y]'
|
||||
language = 'es'
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')]
|
||||
|
||||
extra_css = '''
|
||||
h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['noticiasMedio']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','ul'])
|
||||
,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
|
||||
,dict(name='p', attrs={'class':['masinformacion','hora']})
|
||||
,dict(name='a', attrs={'class':["'link'"]})
|
||||
,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
|
||||
,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
img = soup.find('img',src='/img/videos/mascaravideo.png')
|
||||
if not img is None:
|
||||
img.extract()
|
||||
|
||||
return soup
|
||||
|
63
resources/recipes/dnevnik.recipe
Normal file
@ -0,0 +1,63 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
dnevnik.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Dnevnik(BasicNewsRecipe):
|
||||
title = u'Dnevnik.si'
|
||||
__author__ = u'BlonG'
|
||||
description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
|
||||
Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
|
||||
bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
|
||||
pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
|
||||
dru\u017ebe.'''
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
language = 'sl'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||
dict(name='div', attrs={'class':'entry-content'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'fb_article_top'}),
|
||||
dict(name='div', attrs={'class':'related'}),
|
||||
dict(name='div', attrs={'class':'fb_article_foot'}),
|
||||
dict(name='div', attrs={'class':'spreading'}),
|
||||
dict(name='dl', attrs={'class':'ad'}),
|
||||
dict(name='p', attrs={'class':'report'}),
|
||||
dict(name='div', attrs={'class':'hfeed comments'}),
|
||||
dict(name='dl', attrs={'id':'entryPanel'}),
|
||||
dict(name='dl', attrs={'class':'infopush ip_wide'}),
|
||||
dict(name='div', attrs={'class':'sidebar'}),
|
||||
dict(name='dl', attrs={'class':'bottom'}),
|
||||
dict(name='div', attrs={'id':'footer'}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13')
|
||||
,(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14')
|
||||
,(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116')
|
||||
,(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5')
|
||||
,(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15')
|
||||
,(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17')
|
||||
,(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18')
|
||||
,(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19')
|
||||
,(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20')
|
||||
,(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21')
|
||||
,(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
|
||||
]
|
77
resources/recipes/el_faro.recipe
Normal file
@ -0,0 +1,77 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElFaroDeVigo(BasicNewsRecipe):
|
||||
title = u'El Faro de Vigo'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Jefferson Frantz'
|
||||
description = 'Noticias de Vigo'
|
||||
timefmt = ' [%d %b, %Y]'
|
||||
language = 'es'
|
||||
encoding = 'cp1252'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [
|
||||
## (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
|
||||
## (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
|
||||
(u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
|
||||
(u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
|
||||
(u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
|
||||
## (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
|
||||
(u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
|
||||
(u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
|
||||
(u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
|
||||
(u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
|
||||
(u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
|
||||
(u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
|
||||
(u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
|
||||
(u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
|
||||
|
||||
extra_css = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
|
||||
h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
|
||||
h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
|
||||
.enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
|
||||
.noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
||||
url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||
fitem = soup.find('img',src=url)
|
||||
if fitem:
|
||||
par = fitem.parent
|
||||
par.extract()
|
||||
url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||
fitem = soup.find('img',src=url)
|
||||
if fitem:
|
||||
par = fitem.parent
|
||||
par.extract()
|
||||
url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||
fitem = soup.find('img',src=url)
|
||||
if fitem:
|
||||
par = fitem.parent
|
||||
par.extract()
|
||||
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
divs = soup.findAll(True, {'class':'enlacenegrita10'})
|
||||
for div in divs:
|
||||
div['align'] = 'left'
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','ul','iframe','ol'])
|
||||
,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
|
||||
,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
|
||||
|
||||
]
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
|
||||
description = 'Main daily newspaper from Spain - v1.04 (19, October 2010)'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
@ -32,19 +32,16 @@ class ElPais(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
|
||||
|
||||
extra_css = '''
|
||||
p{style:normal size:12 serif}
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
|
||||
|
||||
'''
|
||||
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
|
||||
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
|
||||
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos estirar','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
|
||||
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
|
||||
dict(name='p', attrs={'class':'nav_meses'}),
|
||||
dict(attrs={'class':['enlaces_m','miniaturas_m']})
|
||||
dict(attrs={'class':['enlaces_m','miniaturas_m','nav_miniaturas_m']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -1,38 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch elektrolese.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class elektrolese(BasicNewsRecipe):
|
||||
|
||||
title = u'elektrolese'
|
||||
description = 'News about electronic publishing'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%a %d %b %Y]'
|
||||
language = 'de'
|
||||
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables':True}
|
||||
encoding = 'utf-8'
|
||||
|
||||
|
||||
remove_tags_after = [dict(id='comments')]
|
||||
filter_regexps = [r'ad\.doubleclick\.net']
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
|
||||
dict(id='comments'),
|
||||
dict(id='Navbar1')]
|
||||
|
||||
|
||||
|
||||
feeds = [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elperiodico.cat
|
||||
'''
|
||||
@ -12,8 +12,8 @@ from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElPeriodico_cat(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias desde Catalunya'
|
||||
__author__ = 'Jordi Balcells/Darko Miletic'
|
||||
description = 'Noticies des de Catalunya'
|
||||
publisher = 'elperiodico.cat'
|
||||
category = 'news, politics, Spain, Catalunya'
|
||||
oldest_article = 2
|
||||
@ -33,15 +33,25 @@ class ElPeriodico_cat(BasicNewsRecipe):
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
|
||||
feeds = [(u'Portada', u'http://www.elperiodico.cat/ca/rss/rss_portada.xml'),
|
||||
(u'Internacional', u'http://www.elperiodico.cat/ca/rss/internacional/rss.xml'),
|
||||
(u'Societat', u'http://www.elperiodico.cat/ca/rss/societat/rss.xml'),
|
||||
(u'Ci\xe8ncia i tecnologia', u'http://www.elperiodico.cat/ca/rss/ciencia-i-tecnologia/rss.xml'),
|
||||
(u'Esports', u'http://www.elperiodico.cat/ca/rss/esports/rss.xml'),
|
||||
(u'Gent', u'http://www.elperiodico.cat/ca/rss/gent/rss.xml'),
|
||||
(u'Opini\xf3', u'http://www.elperiodico.cat/ca/rss/opinio/rss.xml'),
|
||||
(u'Pol\xedtica', u'http://www.elperiodico.cat/ca/rss/politica/rss.xml'),
|
||||
(u'Barcelona', u'http://www.elperiodico.cat/ca/rss/barcelona/rss.xml'),
|
||||
(u'Economia', u'http://www.elperiodico.cat/ca/rss/economia/rss.xml'),
|
||||
(u'Cultura i espectacles', u'http://www.elperiodico.cat/ca/rss/cultura-i-espectacles/rss.xml'),
|
||||
(u'Tele', u'http://www.elperiodico.cat/ca/rss/tele/rss.xml')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'titularnoticia'}),
|
||||
dict(name='div', attrs={'class':'noticia_completa'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||
,dict(name='div', attrs={'id':'inferiores'})
|
||||
remove_tags = [dict(name='div', attrs={'class':['opcionb','opcionb last','columna_noticia']}),
|
||||
dict(name='span', attrs={'class':'opcionesnoticia'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -2,17 +2,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elperiodico.com
|
||||
elperiodico.cat
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElPeriodico_esp(BasicNewsRecipe):
|
||||
class ElPeriodico_cat(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Jordi Balcells/Darko Miletic'
|
||||
description = 'Noticias desde Catalunya'
|
||||
publisher = 'elperiodico.com'
|
||||
category = 'news, politics, Spain, Catalunya'
|
||||
@ -33,15 +33,25 @@ class ElPeriodico_esp(BasicNewsRecipe):
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
|
||||
feeds = [(u'Portada', u'http://www.elperiodico.com/es/rss/rss_portada.xml'),
|
||||
(u'Internacional', u'http://elperiodico.com/es/rss/internacional/rss.xml'),
|
||||
(u'Sociedad', u'http://elperiodico.com/es/rss/sociedad/rss.xml'),
|
||||
(u'Ciencia y Tecnolog\xeda', u'http://elperiodico.com/es/rss/ciencia-y-tecnologia/rss.xml'),
|
||||
(u'Deportes', u'http://elperiodico.com/es/rss/deportes/rss.xml'),
|
||||
(u'Gente', u'http://elperiodico.com/es/rss/gente/rss.xml'),
|
||||
(u'Opini\xf3n', u'http://elperiodico.com/es/rss/opinion/rss.xml'),
|
||||
(u'Pol\xedtica', u'http://elperiodico.com/es/rss/politica/rss.xml'),
|
||||
(u'Barcelona', u'http://elperiodico.com/es/rss/barcelona/rss.xml'),
|
||||
(u'Econom\xeda', u'http://elperiodico.com/es/rss/economia/rss.xml'),
|
||||
(u'Cultura y espect\xe1culos', u'http://elperiodico.com/es/rss/cultura-y-espectaculos/rss.xml'),
|
||||
(u'Tele', u'http://elperiodico.com/es/rss/cultura-y-espectaculos/rss.xml')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'titularnoticia'}),
|
||||
dict(name='div', attrs={'class':'noticia_completa'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||
,dict(name='div', attrs={'id':'inferiores'})
|
||||
remove_tags = [dict(name='div', attrs={'class':['opcionb','opcionb last','columna_noticia']}),
|
||||
dict(name='span', attrs={'class':'opcionesnoticia'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
58
resources/recipes/eu_commission.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
LANGUAGE = 'de'
|
||||
|
||||
def feedlink(num):
|
||||
return u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
|
||||
str(num)+'&lang='+ LANGUAGE
|
||||
|
||||
class EUCommissionPress(BasicNewsRecipe):
|
||||
title = u'Pressemitteilungen der EU Kommission pro Politikbereich'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
|
||||
__author__ = 'malfi'
|
||||
language = LANGUAGE
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
|
||||
remove_tags = []
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Pressemitteilung des Tages',feedlink(64)),
|
||||
(u'Presidency',feedlink(137)),
|
||||
(u'Foreign affairs and security policy',feedlink(138)),
|
||||
(u'Agriculture and rural development',feedlink(139)),
|
||||
(u'Budget and financial programming ',feedlink(140)),
|
||||
(u'Climate action',feedlink(141)),
|
||||
(u'Competition',feedlink(142)),
|
||||
(u'Development',feedlink(143)),
|
||||
(u'Digital agenda',feedlink(144)),
|
||||
(u'Economic and monetary affairs',feedlink(145)),
|
||||
(u'Education, culture, multilingualism and youth ',feedlink(146)),
|
||||
(u'Employment, social Affairs and inclusion ',feedlink(147)),
|
||||
(u'Energy',feedlink(148)),
|
||||
(u'Enlargment and European neighbourhood policy ',feedlink(149)),
|
||||
(u'Environment',feedlink(150)),
|
||||
(u'Health and consumer policy',feedlink(151)),
|
||||
(u'Home affairs',feedlink(152)),
|
||||
(u'Industry and entrepreneurship',feedlink(153)),
|
||||
(u'Inter-Institutional relations and administration',feedlink(154)),
|
||||
(u'Internal market and services',feedlink(155)),
|
||||
(u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
|
||||
(u'Justice, fundamental rights and citizenship',feedlink(157)),
|
||||
(u'Maritime affairs and fisheries',feedlink(158)),
|
||||
(u'Regional policy',feedlink(159)),
|
||||
(u'Research and innovation',feedlink(160)),
|
||||
(u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
|
||||
(u'Trade',feedlink(162)),
|
||||
(u'Transport',feedlink(163))
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
51
resources/recipes/european_voice.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EuropeanVoice(BasicNewsRecipe):
|
||||
title = u'European Voice'
|
||||
__author__ = 'malfi'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
|
||||
language = 'en'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})]
|
||||
feeds = [
|
||||
(u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
|
||||
(u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
|
||||
(u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
|
||||
(u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
|
||||
(u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
|
||||
(u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
|
||||
(u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
|
||||
(u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
|
||||
(u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
|
||||
(u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
|
||||
(u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
|
||||
(u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
|
||||
(u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
|
||||
(u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
|
||||
(u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
|
||||
(u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
|
||||
(u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
|
||||
(u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
|
||||
(u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
|
||||
(u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?bPrint=1'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
denied = soup.findAll(True,text='Subscribers')
|
||||
if denied:
|
||||
raise Exception('Article skipped, because content can only be seen with subscription')
|
||||
return soup
|
||||
|
@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
foxnews.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FoxNews(BasicNewsRecipe):
|
||||
@ -21,11 +20,10 @@ class FoxNews(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
publication_type = 'newsportal'
|
||||
remove_empty_feeds = True
|
||||
extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} .caption{font-size: x-small} '
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||
]
|
||||
extra_css = """
|
||||
body{font-family: Arial,sans-serif }
|
||||
.caption{font-size: x-small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -34,27 +32,15 @@ class FoxNews(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_attributes = ['xmlns']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id' :['story','browse-story-content']})
|
||||
,dict(name='div', attrs={'class':['posts articles','slideshow']})
|
||||
,dict(name='h4' , attrs={'class':'storyDate'})
|
||||
,dict(name='h1' , attrs={'xmlns:functx':'http://www.functx.com'})
|
||||
,dict(name='div', attrs={'class':'authInfo'})
|
||||
,dict(name='div', attrs={'id':'articleCont'})
|
||||
]
|
||||
remove_attributes = ['xmlns','lang']
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['share-links','quigo quigo2','share-text','storyControls','socShare','btm-links']})
|
||||
,dict(name='div', attrs={'id' :['otherMedia','loomia_display','img-all-path','story-vcmId','story-url','pane-browse-story-comments','story_related']})
|
||||
,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2','tabs']})
|
||||
,dict(name='a' , attrs={'class':'join-discussion'})
|
||||
,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2']})
|
||||
,dict(name='p' , attrs={'class':'see_fullarchive'})
|
||||
,dict(name=['object','embed','link','script'])
|
||||
dict(name=['object','embed','link','script','iframe','meta','base'])
|
||||
,dict(attrs={'class':['user-control','url-description','ad-context']})
|
||||
]
|
||||
|
||||
remove_tags_before=dict(name='h1')
|
||||
remove_tags_after =dict(attrs={'class':'url-description'})
|
||||
|
||||
feeds = [
|
||||
(u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest' )
|
||||
@ -67,8 +53,5 @@ class FoxNews(BasicNewsRecipe):
|
||||
,(u'Entertainment' , u'http://feeds.foxnews.com/foxnews/entertainment' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print'
|
||||
|
@ -25,15 +25,15 @@ class Fudzilla(BasicNewsRecipe):
|
||||
remove_tags_before = dict(name='div', attrs={'class':['padding']})
|
||||
|
||||
remove_tags = [dict(name='td', attrs={'class':['left','right']}),
|
||||
dict(name='div', attrs={'id':['toolbar','buttons']}),
|
||||
dict(name='div', attrs={'class':['artbannersxtd','back_button']}),
|
||||
dict(name='span', attrs={'class':['pathway']}),
|
||||
dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}),
|
||||
dict(name='table', attrs={'class':['headlines']}),
|
||||
dict(name='div', attrs={'id':['toolbar','buttons']}),
|
||||
dict(name='div', attrs={'class':['artbannersxtd','back_button']}),
|
||||
dict(name='span', attrs={'class':['pathway']}),
|
||||
dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}),
|
||||
dict(name='table', attrs={'class':['headlines']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
|
||||
(u'Posts', u'http://www.fudzilla.com/?format=feed')
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
|
40
resources/recipes/gamespot.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'Marc Toensing'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GamespotCom(BasicNewsRecipe):
|
||||
|
||||
title = u'Gamespot.com Reviews'
|
||||
description = 'review articles from gamespot.com'
|
||||
language = 'en'
|
||||
__author__ = u'Marc T\xf6nsing'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 40
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
no_javascript = True
|
||||
|
||||
feeds = [
|
||||
('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
|
||||
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
|
||||
('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
|
||||
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
|
||||
('PlayStation 3 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1028'),
|
||||
('PlayStation 2 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=7'),
|
||||
('PlayStation Portable Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1024'),
|
||||
('Nintendo DS Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1026'),
|
||||
('iPhone Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1049'),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'top_bar'}),
|
||||
dict(name='div', attrs={'class':'video_embed'})
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://image.gamespotcdn.net/gamespot/shared/gs5/gslogo_bw.gif'
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('link') + '?print=1'
|
28
resources/recipes/german_gov.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GermanGovermentPress(BasicNewsRecipe):
|
||||
title = u'Pressemitteilungen der Bundesregierung'
|
||||
oldest_article = 14
|
||||
__author__ = 'malfi'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'h2'))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
|
||||
remove_tags = []
|
||||
feeds = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
def print_version(self, url):
|
||||
m = re.search(r'^(.*).html$', url)
|
||||
return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__copyright__ = '2010, Szing'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
@ -10,49 +10,52 @@ globeandmail.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GlobeAndMail(BasicNewsRecipe):
|
||||
title = u'Globe and Mail'
|
||||
language = 'en_CA'
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||
title = u'Globe & Mail'
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Szing'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
|
||||
h4 {margin-top: 0px;}
|
||||
#byline { font-family: monospace; font-weight:bold; }
|
||||
#placeline {font-weight:bold;}
|
||||
#credit {margin-top:0px;}
|
||||
.tag {font-size: 22pt;}'''
|
||||
description = 'Canada\'s national newspaper'
|
||||
keep_only_tags = [dict(name='article')]
|
||||
remove_tags = [dict(name='aside'),
|
||||
dict(name='footer'),
|
||||
dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
|
||||
dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
|
||||
]
|
||||
feeds = [
|
||||
(u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
|
||||
(u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
|
||||
(u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
|
||||
(u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
||||
(u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
|
||||
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
||||
(u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
|
||||
(u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
|
||||
(u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
||||
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
||||
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
||||
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
|
||||
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||
(u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||
]
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf8'
|
||||
publisher = 'Globe & Mail'
|
||||
language = 'en_CA'
|
||||
extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if '/video/' not in url:
|
||||
return url
|
||||
feeds = [
|
||||
(u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
|
||||
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
||||
(u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
|
||||
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
|
||||
(u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
|
||||
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
||||
(u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
||||
(u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
||||
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
||||
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='h2', attrs={'id':'articletitle'}),
|
||||
dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
|
||||
dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
|
||||
dict(name='id', attrs={'class':'article'}),
|
||||
dict(name='table', attrs={'class':'todays-market'}),
|
||||
dict(name='header', attrs={'id':'leadheader'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
|
||||
]
|
||||
|
||||
#this has to be here or the text in the article appears twice.
|
||||
remove_tags_after = [dict(id='article')]
|
||||
|
||||
#Use the mobile version rather than the web version
|
||||
def print_version(self, url):
|
||||
return url + '&service=mobile'
|
||||
|
||||
|
41
resources/recipes/handelsblatt.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Handelsblatt(BasicNewsRecipe):
|
||||
title = u'Handelsblatt'
|
||||
__author__ = 'malfi'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
|
||||
remove_tags = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
|
||||
|
||||
feeds = [
|
||||
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
|
||||
(u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
|
||||
(u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
|
||||
(u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
|
||||
(u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
|
||||
(u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
|
||||
(u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
|
||||
(u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
|
||||
(u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
|
||||
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
m = re.search('(?<=;)[0-9]*', url)
|
||||
return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
|
||||
|
||||
|
38
resources/recipes/hola.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
|
||||
'''
|
||||
hola.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Hackaday(BasicNewsRecipe):
|
||||
title = u'Hola'
|
||||
__author__ = 'bmsleight'
|
||||
description = 'diario de actualidad, moda y belleza.'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'cuerpo'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ),
|
||||
(u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ),
|
||||
(u'Cine' , u'http://www.hola.com/cine/rss.xml' ),
|
||||
(u'Música' , u'http://www.hola.com/musica/rss.xml' ),
|
||||
(u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ),
|
||||
(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ),
|
||||
(u'Niños' , u'http://www.hola.com/ninos/rss.xml' ),
|
||||
(u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = article.get('guid', None)
|
||||
return url
|
@ -33,13 +33,14 @@ class IrishTimes(BasicNewsRecipe):
|
||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = 'http://www.irishtimes.com' + \
|
||||
(((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01..htm','_pf.html')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.link
|
||||
|
@ -38,6 +38,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
.loc{font-weight: bold}
|
||||
.carton{text-align: center}
|
||||
.credit{font-weight: bold}
|
||||
.sumario{font-weight: bold; text-align: center}
|
||||
.text{margin-top: 1.4em}
|
||||
p.inicial{display: inline; font-size: xx-large; font-weight: bold}
|
||||
p.s-s{display: inline; text-indent: 0}
|
||||
|
177
resources/recipes/lenta_ru.recipe
Normal file
@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
'''
|
||||
Lenta.ru
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class LentaRURecipe(BasicNewsRecipe):
|
||||
title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438'
|
||||
__author__ = 'Nikolai Kotchetkov'
|
||||
publisher = 'lenta.ru'
|
||||
category = 'news, Russia'
|
||||
description = u'''\u0415\u0436\u0435\u0434\u043d\u0435\u0432\u043d\u0430\u044f
|
||||
\u0438\u043d\u0442\u0435\u0440\u043d\u0435\u0442-\u0433\u0430\u0437\u0435\u0442\u0430.
|
||||
\u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u0441\u043e
|
||||
\u0432\u0441\u0435\u0433\u043e \u043c\u0438\u0440\u0430 \u043d\u0430
|
||||
\u0440\u0443\u0441\u0441\u043a\u043e\u043c
|
||||
\u044f\u0437\u044b\u043a\u0435'''
|
||||
description = u'Ежедневная интернет-газета. Новости со всего мира на русском языке'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
|
||||
masthead_url = u'http://img.lenta.ru/i/logowrambler.gif'
|
||||
cover_url = u'http://img.lenta.ru/i/logowrambler.gif'
|
||||
|
||||
#Add feed names if you want them to be sorted (feeds of this list appear first)
|
||||
sortOrder = [u'_default', u'В России', u'б.СССР', u'В мире']
|
||||
|
||||
encoding = 'cp1251'
|
||||
language = 'ru'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='td', attrs={'class':['statya','content']})]
|
||||
|
||||
remove_tags_after = [dict(name='p', attrs={'class':'links'}), dict(name='div', attrs={'id':'readers-block'})]
|
||||
|
||||
remove_tags = [dict(name='table', attrs={'class':['vrezka','content']}), dict(name='div', attrs={'class':'b240'}), dict(name='div', attrs={'id':'readers-block'}), dict(name='p', attrs={'class':'links'})]
|
||||
|
||||
feeds = [u'http://lenta.ru/rss/']
|
||||
|
||||
extra_css = 'h1 {font-size: 1.2em; margin: 0em 0em 0em 0em;} h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;} h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'
|
||||
|
||||
def parse_index(self):
|
||||
try:
|
||||
feedData = parse(self.feeds[0])
|
||||
if not feedData:
|
||||
raise NotImplementedError
|
||||
self.log("parse_index: Feed loaded successfully.")
|
||||
if feedData.feed.has_key('title'):
|
||||
self.title = feedData.feed.title
|
||||
self.log("parse_index: Title updated to: ", self.title)
|
||||
if feedData.feed.has_key('image'):
|
||||
self.log("HAS IMAGE!!!!")
|
||||
|
||||
def get_virtual_feed_articles(feed):
|
||||
if feeds.has_key(feed):
|
||||
return feeds[feed][1]
|
||||
self.log("Adding new feed: ", feed)
|
||||
articles = []
|
||||
feeds[feed] = (feed, articles)
|
||||
return articles
|
||||
|
||||
feeds = {}
|
||||
|
||||
#Iterate feed items and distribute articles using tags
|
||||
for item in feedData.entries:
|
||||
link = item.get('link', '');
|
||||
title = item.get('title', '');
|
||||
if '' == link or '' == title:
|
||||
continue
|
||||
article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
|
||||
if not item.has_key('tags'):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
for tag in item.tags:
|
||||
addedToDefault = False
|
||||
term = tag.get('term', '')
|
||||
if '' == term:
|
||||
if (not addedToDefault):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
get_virtual_feed_articles(term).append(article)
|
||||
|
||||
#Get feed list
|
||||
#Select sorted feeds first of all
|
||||
result = []
|
||||
for feedName in self.sortOrder:
|
||||
if (not feeds.has_key(feedName)): continue
|
||||
result.append(feeds[feedName])
|
||||
del feeds[feedName]
|
||||
result = result + feeds.values()
|
||||
|
||||
return result
|
||||
|
||||
except Exception, err:
|
||||
self.log(err)
|
||||
raise NotImplementedError
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
#self.log('Original: ', soup.prettify())
|
||||
|
||||
contents = Tag(soup, 'div')
|
||||
|
||||
#Extract tags with given attributes
|
||||
extractElements = {'div' : [{'id' : 'readers-block'}]}
|
||||
|
||||
#Remove all elements that were not extracted before
|
||||
for tag, attrs in extractElements.iteritems():
|
||||
for attr in attrs:
|
||||
garbage = soup.findAll(tag, attr)
|
||||
if garbage:
|
||||
for pieceOfGarbage in garbage:
|
||||
pieceOfGarbage.extract()
|
||||
|
||||
#Find article text using header
|
||||
#and add all elements to contents
|
||||
element = soup.find({'h1' : True, 'h2' : True})
|
||||
if (element):
|
||||
element.name = 'h1'
|
||||
while element:
|
||||
nextElement = element.nextSibling
|
||||
element.extract()
|
||||
contents.insert(len(contents.contents), element)
|
||||
element = nextElement
|
||||
|
||||
#Place article date after header
|
||||
dates = soup.findAll(text=re.compile('\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}'))
|
||||
if dates:
|
||||
for date in dates:
|
||||
for string in date:
|
||||
parent = date.parent
|
||||
if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == parent['class']):
|
||||
#Date div found
|
||||
parent.extract()
|
||||
parent['style'] = 'font-size: 0.5em; color: gray; font-family: monospace;'
|
||||
contents.insert(1, parent)
|
||||
break
|
||||
|
||||
#Place article picture after date
|
||||
pic = soup.find('img')
|
||||
if pic:
|
||||
picDiv = Tag(soup, 'div')
|
||||
picDiv['style'] = 'width: 100%; text-align: center;'
|
||||
pic.extract()
|
||||
picDiv.insert(0, pic)
|
||||
title = pic.get('title', None)
|
||||
if title:
|
||||
titleDiv = Tag(soup, 'div')
|
||||
titleDiv['style'] = 'font-size: 0.5em;'
|
||||
titleDiv.insert(0, title)
|
||||
picDiv.insert(1, titleDiv)
|
||||
contents.insert(2, picDiv)
|
||||
|
||||
body = soup.find('td', {'class':['statya','content']})
|
||||
if body:
|
||||
body.replaceWith(contents)
|
||||
|
||||
#self.log('Result: ', soup.prettify())
|
||||
return soup
|
||||
|
35
resources/recipes/marctv.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch MarcTV.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MarcTVde(BasicNewsRecipe):
|
||||
|
||||
title = 'Marc Toensings Visionen'
|
||||
|
||||
description = 'Marc Toensings Visionen'
|
||||
|
||||
language = 'de'
|
||||
|
||||
__author__ = 'Marc Toensing'
|
||||
|
||||
max_articles_per_feed = 40
|
||||
|
||||
oldest_article = 665
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
remove_tags = []
|
||||
|
||||
keep_only_tags = dict(name='div', attrs={'class':["content"]})
|
||||
|
||||
feeds = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
|
||||
|
||||
extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://marctv.de/marctv.png'
|
@ -1,53 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>'
|
||||
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, Louis Gesbert <meta at antislash dot info>'
|
||||
'''
|
||||
Mediapart
|
||||
'''
|
||||
|
||||
import re, string
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Mediapart(BasicNewsRecipe):
|
||||
title = 'Mediapart'
|
||||
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
|
||||
__author__ = 'Mathieu Godlewski'
|
||||
description = 'Global news in french from online newspapers'
|
||||
oldest_article = 7
|
||||
language = 'fr'
|
||||
needs_subscription = True
|
||||
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg'
|
||||
|
||||
feeds = [
|
||||
('Les articles', 'http://www.mediapart.fr/articles/feed'),
|
||||
]
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||
(r'<p>Mediapart\.fr</p>', lambda match : ''),
|
||||
(r'<p[^>]*>[\s]*</p>', lambda match : ''),
|
||||
(r'<p><a href="[^\.]+\.pdf">[^>]*</a></p>', lambda match : ''),
|
||||
# -- print-version has poor quality on this website, better do the conversion ourselves
|
||||
#
|
||||
# preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||
# [
|
||||
# (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||
# (r'<span class=\'auteur_staff\'>[^>]+<a title=\'[^\']*\'[^>]*>([^<]*)</a>[^<]*</span>',
|
||||
# lambda match : '<i>'+match.group(1)+'</i>'),
|
||||
# (r'\'', lambda match: '’'),
|
||||
# ]
|
||||
# ]
|
||||
#
|
||||
# remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
|
||||
# dict(name='div', attrs={'class':'print-links'}),
|
||||
# dict(name='img', attrs={'src':'entete_article.png'}),
|
||||
# dict(name='br') ]
|
||||
#
|
||||
# def print_version(self, url):
|
||||
# raw = self.browser.open(url).read()
|
||||
# soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||
# div = soup.find('div', {'id':re.compile('node-\d+')})
|
||||
# if div is None:
|
||||
# return None
|
||||
# article_id = string.replace(div['id'], 'node-', '')
|
||||
# if article_id is None:
|
||||
# return None
|
||||
# return 'http://www.mediapart.fr/print/'+article_id
|
||||
|
||||
# -- Non-print version [dict(name='div', attrs={'class':'advert'})]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'title'}),
|
||||
dict(name='div', attrs={'class':'page_papier_detail'}),
|
||||
]
|
||||
]
|
||||
|
||||
remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
|
||||
dict(name='div', attrs={'class':'print-links'}),
|
||||
dict(name='img', attrs={'src':'entete_article.png'}),
|
||||
]
|
||||
def preprocess_html(self,soup):
|
||||
for title in soup.findAll('div', {'class':'titre'}):
|
||||
tag = Tag(soup, 'h3')
|
||||
title.replaceWith(tag)
|
||||
tag.insert(0,title)
|
||||
return soup
|
||||
|
||||
# -- Handle login
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.mediapart.fr/')
|
||||
br.select_form(nr=1)
|
||||
br['name'] = self.username
|
||||
br['pass'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
raw = self.browser.open(url).read()
|
||||
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||
div = soup.find('div', {'class':'node node-type-article'})
|
||||
if div is None:
|
||||
return None
|
||||
article_id = string.replace(div['id'], 'node-', '')
|
||||
if article_id is None:
|
||||
return None
|
||||
return 'http://www.mediapart.fr/print/'+article_id
|
||||
|
61
resources/recipes/ming_pao.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Eddie Lau'
|
||||
'''
|
||||
modified from Singtao Toronto calibre recipe by rty
|
||||
Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper'
|
||||
publisher = 'news.mingpao.com'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||
|
||||
def get_fetchdate(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 5.30am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.5/24)
|
||||
return dt_local.strftime("%Y%m%d")
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls:
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
57
resources/recipes/mmc_rtv.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
www.rtvslo.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MMCRTV(BasicNewsRecipe):
|
||||
title = u'MMC RTV Slovenija'
|
||||
__author__ = u'BlonG'
|
||||
description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija"
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
language = 'sl'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
split_url = url.split("/")
|
||||
print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + split_url[-1]
|
||||
return print_url
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'title'}),
|
||||
dict(name='div', attrs={'id':'newsbody'}),
|
||||
dict(name='div', attrs={'id':'newsblocks'}),
|
||||
]
|
||||
# remove_tags=[
|
||||
# 40 dict(name='div', attrs={'id':'newsblocks'}),
|
||||
# ]
|
||||
|
||||
feeds = [
|
||||
(u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'),
|
||||
(u'Svet', u'http://www.rtvslo.si/feeds/02.xml'),
|
||||
(u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'),
|
||||
(u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'),
|
||||
(u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'),
|
||||
(u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'),
|
||||
(u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'),
|
||||
(u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'),
|
||||
(u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'),
|
||||
]
|
||||
|
||||
# def preprocess_html(self, soup):
|
||||
# newsblocks = soup.find('div',attrs = ['id':'newsblocks'])
|
||||
# soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks)
|
||||
# return soup
|
||||
|
@ -8,11 +8,11 @@ import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewScientist(BasicNewsRecipe):
|
||||
title = 'New Scientist - Online News'
|
||||
title = 'New Scientist - Online News w. subscription'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Science news and science articles from New Scientist.'
|
||||
language = 'en'
|
||||
publisher = 'New Scientist'
|
||||
publisher = 'Reed Business Information Ltd.'
|
||||
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
@ -21,7 +21,12 @@ class NewScientist(BasicNewsRecipe):
|
||||
cover_url = 'http://www.newscientist.com/currentcover.jpg'
|
||||
masthead_url = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
|
||||
encoding = 'utf-8'
|
||||
extra_css = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '
|
||||
needs_subscription = 'optional'
|
||||
extra_css = """
|
||||
body{font-family: Arial,sans-serif}
|
||||
img{margin-bottom: 0.8em}
|
||||
.quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -33,15 +38,27 @@ class NewScientist(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open('http://www.newscientist.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://www.newscientist.com/user/login?redirectURL=')
|
||||
br.select_form(nr=2)
|
||||
br['loginId' ] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
||||
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial','sharebtns']})
|
||||
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
||||
,dict(name='meta' , attrs={'name' :'description' })
|
||||
,dict(name='a' , attrs={'rel' :'tag' })
|
||||
,dict(name='a' , attrs={'rel' :'tag' })
|
||||
,dict(name=['link','base','meta','iframe','object','embed'])
|
||||
]
|
||||
remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
|
||||
remove_attributes = ['height','width']
|
||||
remove_attributes = ['height','width','lang']
|
||||
|
||||
feeds = [
|
||||
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
||||
@ -62,6 +79,8 @@ class NewScientist(BasicNewsRecipe):
|
||||
return url + '?full=true&print=true'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(['quote','quotetext']):
|
||||
item.name='p'
|
||||
for tg in soup.findAll('a'):
|
||||
if tg.string == 'Home':
|
||||
tg.parent.extract()
|
||||
|
68
resources/recipes/newsweek_polska.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Newsweek(BasicNewsRecipe):
|
||||
EDITION = 0
|
||||
|
||||
title = u'Newsweek Polska'
|
||||
__author__ = 'Mateusz Kielar'
|
||||
description = 'Weekly magazine'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
|
||||
|
||||
extra_css = '''
|
||||
.body {font-size: small}
|
||||
.author {font-size: x-small}
|
||||
.lead {font-size: x-small}
|
||||
.title{font-size: x-large; font-weight: bold}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
|
||||
|
||||
def find_last_full_issue(self):
|
||||
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
|
||||
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||
page = self.index_to_soup(issue)
|
||||
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||
page = self.index_to_soup(issue)
|
||||
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
|
||||
|
||||
def parse_index(self):
|
||||
self.find_last_full_issue()
|
||||
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
|
||||
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
|
||||
self.cover_url = img['src']
|
||||
feeds = []
|
||||
parent = soup.find(id='content-left-big')
|
||||
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
|
||||
section = self.tag_to_string(txt).capitalize()
|
||||
articles = list(self.find_articles(txt))
|
||||
feeds.append((section, articles))
|
||||
return feeds
|
||||
|
||||
def find_articles(self, txt):
|
||||
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
|
||||
if a.name in "div":
|
||||
break
|
||||
yield {
|
||||
'title' : self.tag_to_string(a),
|
||||
'url' : 'http://www.newsweek.pl'+a['href'],
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
}
|
||||
|
||||
|
35
resources/recipes/now_toronto.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#Based on Lars Jacob's Taz Digiabo recipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
|
||||
import os, urllib2, zipfile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class NowToronto(BasicNewsRecipe):
|
||||
title = u'Now Toronto'
|
||||
description = u'Now Toronto'
|
||||
__author__ = 'Starson17'
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True
|
||||
}
|
||||
|
||||
def build_index(self):
|
||||
epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
|
||||
soup = self.index_to_soup(epub_feed)
|
||||
url = soup.find(name = 'feedburner:origlink').string
|
||||
f = urllib2.urlopen(url)
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
tmp.close()
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
zfile.extractall(self.output_dir)
|
||||
tmp.close()
|
||||
index = os.path.join(self.output_dir, 'content.opf')
|
||||
self.report_progress(1,_('epub downloaded and extracted'))
|
||||
return index
|
@ -5,65 +5,61 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
nytimes.com
|
||||
'''
|
||||
import re
|
||||
import time
|
||||
from calibre import entity_to_unicode
|
||||
import re, string, time
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
|
||||
Comment, BeautifulStoneSoup
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
title = 'New York Times Top Stories'
|
||||
__author__ = 'GRiker'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
description = 'Top Stories from the New York Times'
|
||||
# set headlinesOnly to True for the headlines-only version
|
||||
headlinesOnly = True
|
||||
|
||||
# List of sections typically included in Top Stories. Use a keyword from the
|
||||
# right column in the excludeSectionKeywords[] list to skip downloading that section
|
||||
sections = {
|
||||
'arts' : 'Arts',
|
||||
'business' : 'Business',
|
||||
'diningwine' : 'Dining & Wine',
|
||||
'editorials' : 'Editorials',
|
||||
'health' : 'Health',
|
||||
'magazine' : 'Magazine',
|
||||
'mediaadvertising' : 'Media & Advertising',
|
||||
'newyorkregion' : 'New York/Region',
|
||||
'oped' : 'Op-Ed',
|
||||
'politics' : 'Politics',
|
||||
'science' : 'Science',
|
||||
'sports' : 'Sports',
|
||||
'technology' : 'Technology',
|
||||
'topstories' : 'Top Stories',
|
||||
'travel' : 'Travel',
|
||||
'us' : 'U.S.',
|
||||
'world' : 'World'
|
||||
}
|
||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||
# Otherwise, only the sections named will be included. For example,
|
||||
#
|
||||
# includeSections = ['Politics','Sports']
|
||||
#
|
||||
# would cause only the Politics and Sports sections to be included.
|
||||
|
||||
# Add section keywords from the right column above to skip that section
|
||||
# For example, to skip sections containing the word 'Sports' or 'Dining', use:
|
||||
# excludeSectionKeywords = ['Sports', 'Dining']
|
||||
# Fetch only Business and Technology
|
||||
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
|
||||
# Fetch only Top Stories
|
||||
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
|
||||
# By default, no sections are skipped.
|
||||
excludeSectionKeywords = []
|
||||
includeSections = [] # by default, all sections included
|
||||
|
||||
# excludeSections: List of sections to exclude. If empty, all sections found will be included.
|
||||
# Otherwise, the sections named will be excluded. For example,
|
||||
#
|
||||
# excludeSections = ['Politics','Sports']
|
||||
#
|
||||
# would cause the Politics and Sports sections to be excluded. This parameter can be used
|
||||
# in conjuction with includeSections although in most cases using one or the other, but
|
||||
# not both, is sufficient.
|
||||
|
||||
excludeSections = []
|
||||
|
||||
# one_picture_per_article specifies that calibre should only use the first image
|
||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||
# will be moved to a location between the headline and the byline.
|
||||
# If one_picture_per_article = False, all images from the article will be included
|
||||
|
||||
# and shown in their original location.
|
||||
one_picture_per_article = True
|
||||
|
||||
# The maximum number of articles that will be downloaded
|
||||
max_articles_per_feed = 40
|
||||
max_articles_per_feed = 100
|
||||
|
||||
|
||||
if headlinesOnly:
|
||||
title='New York Times Headlines'
|
||||
description = 'Headlines from the New York Times'
|
||||
else:
|
||||
title='New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
|
||||
|
||||
timefmt = ''
|
||||
needs_subscription = True
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
cover_margins = (18,18,'grey99')
|
||||
|
||||
@ -82,6 +78,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
'entry-response module',
|
||||
'icon enlargeThis',
|
||||
'leftNavTabs',
|
||||
'metaFootnote',
|
||||
'module box nav',
|
||||
'nextArticleLink',
|
||||
'nextArticleLink clearfix',
|
||||
@ -89,12 +86,13 @@ class NYTimes(BasicNewsRecipe):
|
||||
'relatedSearchesModule',
|
||||
'side_tool',
|
||||
'singleAd',
|
||||
'subNavigation clearfix',
|
||||
'subNavigation tabContent active',
|
||||
'subNavigation tabContent active clearfix',
|
||||
re.compile('^subNavigation'),
|
||||
re.compile('^leaderboard'),
|
||||
re.compile('^module'),
|
||||
]}),
|
||||
dict(id=[
|
||||
'adxLeaderboard',
|
||||
'adxSponLink',
|
||||
'archive',
|
||||
'articleExtras',
|
||||
'articleInline',
|
||||
@ -105,87 +103,98 @@ class NYTimes(BasicNewsRecipe):
|
||||
'footer',
|
||||
'header',
|
||||
'header_search',
|
||||
'inlineBox',
|
||||
'login',
|
||||
'masthead',
|
||||
'masthead-nav',
|
||||
'memberTools',
|
||||
'navigation',
|
||||
'portfolioInline',
|
||||
'readerReviews',
|
||||
'readerReviewsCount',
|
||||
'relatedArticles',
|
||||
'relatedTopics',
|
||||
'respond',
|
||||
'side_search',
|
||||
'side_index',
|
||||
'side_tool',
|
||||
'toolsRight',
|
||||
]),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
|
||||
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||
no_stylesheets = True
|
||||
extra_css = '.headline {text-align: left;}\n \
|
||||
.byline {font-family: monospace; \
|
||||
text-align: left; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.dateline {font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.timestamp {font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.source {text-align: left;}\n \
|
||||
.image {text-align: center;}\n \
|
||||
.credit {text-align: right; \
|
||||
font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.articleBody {text-align: left;}\n \
|
||||
.authorId {text-align: left; \
|
||||
font-style: italic;}\n '
|
||||
extra_css = '''
|
||||
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { text-align: left; font-size: small; }
|
||||
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
a:link {text-decoration: none; }
|
||||
.articleBody { }
|
||||
.authorId {text-align: left; }
|
||||
.image {text-align: center;}
|
||||
.source {text-align: left; }'''
|
||||
|
||||
def dump_ans(self, ans) :
|
||||
def filter_ans(self, ans) :
|
||||
total_article_count = 0
|
||||
for section in ans :
|
||||
idx = 0
|
||||
idx_max = len(ans)-1
|
||||
while idx <= idx_max:
|
||||
if self.includeSections != []:
|
||||
if ans[idx][0] not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",ans[idx][0]
|
||||
del ans[idx]
|
||||
idx_max = idx_max-1
|
||||
continue
|
||||
if ans[idx][0] in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",ans[idx][0]
|
||||
del ans[idx]
|
||||
idx_max = idx_max-1
|
||||
continue
|
||||
if self.verbose:
|
||||
self.log("section %s: %d articles" % (section[0], len(section[1])) )
|
||||
for article in section[1]:
|
||||
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||
for article in ans[idx][1]:
|
||||
total_article_count += 1
|
||||
if self.verbose:
|
||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||
article['url'].encode('cp1252','replace')))
|
||||
idx = idx+1
|
||||
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
return ans
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
|
||||
return fixed
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
try:
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.select_form(name='login')
|
||||
br['USERID'] = self.username
|
||||
br['PASSWORD'] = self.password
|
||||
br.submit()
|
||||
except:
|
||||
self.log("\nFailed to login")
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.select_form(name='login')
|
||||
br['USERID'] = self.username
|
||||
br['PASSWORD'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'Please try again' in raw:
|
||||
raise Exception('Your username and password are incorrect')
|
||||
return br
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
@ -213,6 +222,9 @@ class NYTimes(BasicNewsRecipe):
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def short_title(self):
|
||||
return self.title
|
||||
|
||||
def index_to_soup(self, url_or_raw, raw=False):
|
||||
'''
|
||||
OVERRIDE of class method
|
||||
@ -255,157 +267,184 @@ class NYTimes(BasicNewsRecipe):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def parse_index(self):
|
||||
def parse_todays_index(self):
|
||||
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
def handle_article(div):
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
return
|
||||
if not url.endswith(".html"):
|
||||
return
|
||||
if 'podcast' in url:
|
||||
return
|
||||
if '/video/' in url:
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
return
|
||||
url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
ans.append(feed)
|
||||
articles[feed] = []
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
|
||||
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
key = string.capwords(feed_title(div))
|
||||
key = key.replace('Op-ed','Op-Ed')
|
||||
key = key.replace('U.s.','U.S.')
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
handle_article(lidiv)
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return self.filter_ans(ans)
|
||||
|
||||
def parse_headline_index(self):
|
||||
|
||||
articles = {}
|
||||
ans = []
|
||||
|
||||
feed = key = 'All Top Stories'
|
||||
articles[key] = []
|
||||
ans.append(key)
|
||||
self.log("Scanning 1 section ...")
|
||||
url_list = []
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||
|
||||
# Fetch the outer table
|
||||
table = soup.find('table')
|
||||
previousTable = table
|
||||
# Fetch the content table
|
||||
content_table = soup.find('table',{'id':'content'})
|
||||
if content_table is None:
|
||||
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
||||
return None
|
||||
|
||||
# Find the deepest table containing the stories
|
||||
while True :
|
||||
table = table.find('table')
|
||||
if table.find(text=re.compile('top stories start')) :
|
||||
previousTable = table
|
||||
continue
|
||||
else :
|
||||
table = previousTable
|
||||
break
|
||||
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
||||
|
||||
# There are multiple subtables, find the one containing the stories
|
||||
for block in table.findAll('table') :
|
||||
if block.find(text=re.compile('top stories start')) :
|
||||
table = block
|
||||
break
|
||||
else :
|
||||
continue
|
||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||
for div_sec in td_col.findAll('div',recursive=False):
|
||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||
section_name = re.sub(r'^ *$','',section_name)
|
||||
if section_name == '':
|
||||
continue
|
||||
section_name=string.capwords(section_name)
|
||||
if section_name == 'U.s.':
|
||||
section_name = 'U.S.'
|
||||
elif section_name == 'Op-ed':
|
||||
section_name = 'Op-Ed'
|
||||
pubdate = strftime('%a, %d %b')
|
||||
|
||||
# Again there are multiple subtables, find the one containing the stories
|
||||
for storyblock in table.findAll('table') :
|
||||
if storyblock.find(text=re.compile('top stories start')) :
|
||||
break
|
||||
else :
|
||||
continue
|
||||
|
||||
skipThisSection = False
|
||||
todays_article_count = 0
|
||||
# Within this table are <font face="times new roman, times, san serif"> entries
|
||||
self.log("Fetching feed Top Stories")
|
||||
for tr in storyblock.findAllNext('tr'):
|
||||
if tr.find('span') is not None :
|
||||
|
||||
sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
|
||||
'times new roman,times, sans serif',
|
||||
'times new roman, times, sans serif']})
|
||||
section = None
|
||||
bylines = []
|
||||
descriptions = []
|
||||
pubdate = None
|
||||
|
||||
# Get the Section title
|
||||
for (x,i) in enumerate(sectionblock.contents) :
|
||||
skipThisSection = False
|
||||
# Extract the section title
|
||||
if ('Comment' in str(i.__class__)) :
|
||||
if 'start(name=' in i :
|
||||
section = i[i.find('=')+1:-2]
|
||||
|
||||
if not self.sections.has_key(section) :
|
||||
skipThisSection = True
|
||||
search_div = div_sec
|
||||
for next_tag in h6_sec_name.findNextSiblings(True):
|
||||
if next_tag.__class__.__name__ == 'Tag':
|
||||
if next_tag.name == 'div':
|
||||
search_div = next_tag
|
||||
break
|
||||
|
||||
# Check for excluded section
|
||||
if len(self.excludeSectionKeywords):
|
||||
key = self.sections[section]
|
||||
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
||||
if excluded.search(key) or articles.has_key(key):
|
||||
skipThisSection = True
|
||||
break
|
||||
|
||||
# Get the bylines and descriptions
|
||||
if not skipThisSection :
|
||||
lines = sectionblock.contents
|
||||
contentStrings = []
|
||||
|
||||
for line in lines:
|
||||
if not isinstance(line, Comment) and line.strip and line.strip() > "":
|
||||
contentStrings.append(line.strip())
|
||||
|
||||
# Gather the byline/description pairs
|
||||
bylines = []
|
||||
descriptions = []
|
||||
for contentString in contentStrings:
|
||||
if contentString[0:3] == 'By ' and contentString[3].isupper() :
|
||||
bylines.append(contentString)
|
||||
# Get the articles
|
||||
for h3_item in search_div.findAll('h3'):
|
||||
byline = h3_item.h6
|
||||
if byline is not None:
|
||||
author = self.tag_to_string(byline,usa_alt=False)
|
||||
else:
|
||||
descriptions.append(contentString)
|
||||
|
||||
# Fetch the article titles and URLs
|
||||
articleCount = len(sectionblock.findAll('span'))
|
||||
todays_article_count += articleCount
|
||||
for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
|
||||
a = span.find('a', href=True)
|
||||
author = ''
|
||||
a = h3_item.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
continue
|
||||
if not url.endswith(".html"):
|
||||
continue
|
||||
if 'podcast' in url:
|
||||
continue
|
||||
if 'video' in url:
|
||||
continue
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
continue
|
||||
url_list.append(url)
|
||||
self.log("URL %s" % url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
desc = h3_item.find('p')
|
||||
if desc is not None:
|
||||
description = self.tag_to_string(desc,use_alt=False)
|
||||
else:
|
||||
description = ''
|
||||
if not articles.has_key(section_name):
|
||||
ans.append(section_name)
|
||||
articles[section_name] = []
|
||||
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
|
||||
title = self.tag_to_string(a, use_alt=True)
|
||||
# prepend the section name
|
||||
title = self.sections[section] + " · " + title
|
||||
|
||||
if not isinstance(title, unicode):
|
||||
title = title.decode('utf-8', 'replace')
|
||||
|
||||
# Allow for unattributed, undescribed entries "Editor's Note"
|
||||
if i >= len(descriptions) :
|
||||
description = None
|
||||
else :
|
||||
description = descriptions[i]
|
||||
|
||||
if len(bylines) == articleCount :
|
||||
author = bylines[i]
|
||||
else :
|
||||
author = None
|
||||
|
||||
# Check for duplicates
|
||||
duplicateFound = False
|
||||
if len(articles[feed]) > 1:
|
||||
for article in articles[feed] :
|
||||
if url == article['url'] :
|
||||
duplicateFound = True
|
||||
break
|
||||
|
||||
if duplicateFound:
|
||||
# Continue fetching, don't add this article
|
||||
todays_article_count -= 1
|
||||
continue
|
||||
|
||||
if not articles.has_key(feed):
|
||||
articles[feed] = []
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author, content=''))
|
||||
# self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
|
||||
|
||||
ans = self.sort_index_by(ans, {'Top Stories':-1})
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
self.dump_ans(ans)
|
||||
return ans
|
||||
return self.filter_ans(ans)
|
||||
|
||||
def parse_index(self):
|
||||
if self.headlinesOnly:
|
||||
return self.parse_headline_index()
|
||||
else:
|
||||
return self.parse_todays_index()
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag: # remove Op_Ed author head shots
|
||||
tagline = self.tag_to_string(kicker_tag)
|
||||
if tagline=='Op-Ed Columnist':
|
||||
img_div = soup.find('div','inlineImage module')
|
||||
if img_div:
|
||||
img_div.extract()
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
def postprocess_html(self,soup, True):
|
||||
@ -422,8 +461,9 @@ class NYTimes(BasicNewsRecipe):
|
||||
firstImg = inlineImgs[0]
|
||||
for inlineImg in inlineImgs[1:]:
|
||||
inlineImg.extract()
|
||||
# Move firstImg after headline
|
||||
cgFirst = soup.find(True, {'class':'columnGroup first'})
|
||||
# Move firstImg before article body
|
||||
#article_body = soup.find(True, {'id':'articleBody'})
|
||||
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||
if cgFirst:
|
||||
# Strip all sibling NavigableStrings: noise
|
||||
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||
@ -443,30 +483,18 @@ class NYTimes(BasicNewsRecipe):
|
||||
if headline_found:
|
||||
cgFirst.insert(insertLoc,firstImg)
|
||||
else:
|
||||
self.log(">>> No class:'columnGroup first' found <<<")
|
||||
# Change class="kicker" to <h3>
|
||||
kicker = soup.find(True, {'class':'kicker'})
|
||||
if kicker and kicker.contents[0]:
|
||||
h3Tag = Tag(soup, "h3")
|
||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
||||
use_alt=False)))
|
||||
kicker.replaceWith(h3Tag)
|
||||
self.log(">>> No class:'columnGroup first' found <<<")
|
||||
|
||||
# Change captions to italic -1
|
||||
# Change captions to italic
|
||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||
if caption and caption.contents[0]:
|
||||
emTag = Tag(soup, "em")
|
||||
cTag = Tag(soup, "p", [("class", "caption")])
|
||||
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||
mp_off = c.find("More Photos")
|
||||
if mp_off >= 0:
|
||||
c = c[:mp_off]
|
||||
emTag.insert(0, c)
|
||||
#hrTag = Tag(soup, 'hr')
|
||||
#hrTag['class'] = 'caption_divider'
|
||||
hrTag = Tag(soup, 'div')
|
||||
hrTag['class'] = 'divider'
|
||||
emTag.insert(1, hrTag)
|
||||
caption.replaceWith(emTag)
|
||||
cTag.insert(0, c)
|
||||
caption.replaceWith(cTag)
|
||||
|
||||
# Change <nyt_headline> to <h2>
|
||||
h1 = soup.find('h1')
|
||||
@ -506,17 +534,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
bTag.insert(0, subhead.contents[0])
|
||||
subhead.replaceWith(bTag)
|
||||
|
||||
# Synthesize a section header
|
||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
||||
if dsk and dsk.has_key('content'):
|
||||
hTag = Tag(soup,'h3')
|
||||
hTag['class'] = 'section'
|
||||
hTag.insert(0,NavigableString(dsk['content']))
|
||||
articleTag = soup.find(True, attrs={'id':'article'})
|
||||
if articleTag:
|
||||
articleTag.insert(0,hTag)
|
||||
|
||||
# Add class="articleBody" to <div> so we can format with CSS
|
||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||
if divTag:
|
||||
divTag['class'] = divTag['id']
|
||||
@ -532,11 +549,3 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
@ -4,56 +4,66 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
nytimes.com
|
||||
V5 - One picture per article, moved to top:
|
||||
Headline
|
||||
Image
|
||||
Byline
|
||||
Story
|
||||
'''
|
||||
import re, string, time
|
||||
from calibre import strftime
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
title = 'The New York Times'
|
||||
__author__ = 'GRiker'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
# set headlinesOnly to True for the headlines-only version
|
||||
headlinesOnly = False
|
||||
|
||||
description = 'Daily news from the New York Times (subscription version)'
|
||||
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
|
||||
'New York','Business Day','Science Times','Sports','Dining','Arts',
|
||||
'Home','Styles','Sunday Business','Week In Review','Travel','Magazine',
|
||||
'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion",
|
||||
"T Women's Fashion"]
|
||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||
# Otherwise, only the sections named will be included. For example,
|
||||
#
|
||||
# includeSections = ['Politics','Sports']
|
||||
#
|
||||
# would cause only the Politics and Sports sections to be included.
|
||||
|
||||
# List of sections to exclude
|
||||
# To add a section, copy the section name from the allSectionKeywords list above
|
||||
# For example, to exclude 'Dining' and 'Weddings':
|
||||
#excludeSectionKeywords = ['Dining','Weddings']
|
||||
excludeSectionKeywords = []
|
||||
includeSections = [] # by default, all sections included
|
||||
|
||||
# List of sections to include (test and debug only)
|
||||
# By default, any sections in today's paper that are not listed in excludeSectionKeywords
|
||||
# are downloaded. fetch_only specifies that only certain sections are to be downloaded.
|
||||
# This should only be used for testing and debugging.
|
||||
# For example, to download only 'The Front Page' section:
|
||||
# fetch_only = set(['The Front Page'])
|
||||
fetch_only = set([])
|
||||
if fetch_only:
|
||||
excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only)
|
||||
# excludeSections: List of sections to exclude. If empty, all sections found will be included.
|
||||
# Otherwise, the sections named will be excluded. For example,
|
||||
#
|
||||
# excludeSections = ['Politics','Sports']
|
||||
#
|
||||
# would cause the Politics and Sports sections to be excluded. This parameter can be used
|
||||
# in conjuction with includeSections although in most cases using one or the other, but
|
||||
# not both, is sufficient.
|
||||
|
||||
excludeSections = []
|
||||
|
||||
# one_picture_per_article specifies that calibre should only use the first image
|
||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||
# will be moved to a location between the headline and the byline.
|
||||
# If one_picture_per_article = False, all images from the article will be included
|
||||
|
||||
# and shown in their original location.
|
||||
one_picture_per_article = True
|
||||
|
||||
# The maximum number of articles that will be downloaded
|
||||
max_articles_per_feed = 100
|
||||
|
||||
|
||||
if headlinesOnly:
|
||||
title='New York Times Headlines'
|
||||
description = 'Headlines from the New York Times'
|
||||
else:
|
||||
title='New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
|
||||
|
||||
timefmt = ''
|
||||
needs_subscription = True
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
cover_margins = (18,18,'grey99')
|
||||
|
||||
remove_tags_before = dict(id='article')
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags = [dict(attrs={'class':[
|
||||
@ -69,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
'entry-response module',
|
||||
'icon enlargeThis',
|
||||
'leftNavTabs',
|
||||
'metaFootnote',
|
||||
'module box nav',
|
||||
'nextArticleLink',
|
||||
'nextArticleLink clearfix',
|
||||
@ -76,12 +87,13 @@ class NYTimes(BasicNewsRecipe):
|
||||
'relatedSearchesModule',
|
||||
'side_tool',
|
||||
'singleAd',
|
||||
'subNavigation clearfix',
|
||||
'subNavigation tabContent active',
|
||||
'subNavigation tabContent active clearfix',
|
||||
re.compile('^subNavigation'),
|
||||
re.compile('^leaderboard'),
|
||||
re.compile('^module'),
|
||||
]}),
|
||||
dict(id=[
|
||||
'adxLeaderboard',
|
||||
'adxSponLink',
|
||||
'archive',
|
||||
'articleExtras',
|
||||
'articleInline',
|
||||
@ -92,61 +104,110 @@ class NYTimes(BasicNewsRecipe):
|
||||
'footer',
|
||||
'header',
|
||||
'header_search',
|
||||
'inlineBox',
|
||||
'login',
|
||||
'masthead',
|
||||
'masthead-nav',
|
||||
'memberTools',
|
||||
'navigation',
|
||||
'portfolioInline',
|
||||
'readerReviews',
|
||||
'readerReviewsCount',
|
||||
'relatedArticles',
|
||||
'relatedTopics',
|
||||
'respond',
|
||||
'side_search',
|
||||
'side_index',
|
||||
'side_tool',
|
||||
'toolsRight',
|
||||
]),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
cover_margins = (18,18,'grey99')
|
||||
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||
no_stylesheets = True
|
||||
extra_css = '.headline {text-align: left;}\n \
|
||||
.byline {font-family: monospace; \
|
||||
text-align: left; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.dateline {font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.timestamp {font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.source {text-align: left;}\n \
|
||||
.image {text-align: center;}\n \
|
||||
.credit {text-align: right; \
|
||||
font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.articleBody {text-align: left;}\n \
|
||||
.authorId {text-align: left; \
|
||||
font-style: italic;}\n '
|
||||
extra_css = '''
|
||||
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { text-align: left; font-size: small; }
|
||||
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
a:link {text-decoration: none; }
|
||||
.articleBody { }
|
||||
.authorId {text-align: left; }
|
||||
.image {text-align: center;}
|
||||
.source {text-align: left; }'''
|
||||
|
||||
def filter_ans(self, ans) :
|
||||
total_article_count = 0
|
||||
idx = 0
|
||||
idx_max = len(ans)-1
|
||||
while idx <= idx_max:
|
||||
if self.includeSections != []:
|
||||
if ans[idx][0] not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",ans[idx][0]
|
||||
del ans[idx]
|
||||
idx_max = idx_max-1
|
||||
continue
|
||||
if ans[idx][0] in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",ans[idx][0]
|
||||
del ans[idx]
|
||||
idx_max = idx_max-1
|
||||
continue
|
||||
if self.verbose:
|
||||
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||
for article in ans[idx][1]:
|
||||
total_article_count += 1
|
||||
if self.verbose:
|
||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||
article['url'].encode('cp1252','replace')))
|
||||
idx = idx+1
|
||||
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
return ans
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
|
||||
return fixed
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
try:
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.select_form(name='login')
|
||||
br['USERID'] = self.username
|
||||
br['PASSWORD'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
|
||||
raise Exception('Your username and password are incorrect')
|
||||
#open('/t/log.html', 'wb').write(raw)
|
||||
except:
|
||||
self.log("\nFailed to login")
|
||||
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.select_form(name='login')
|
||||
br['USERID'] = self.username
|
||||
br['PASSWORD'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'Please try again' in raw:
|
||||
raise Exception('Your username and password are incorrect')
|
||||
return br
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = None
|
||||
st = time.localtime()
|
||||
@ -162,143 +223,232 @@ class NYTimes(BasicNewsRecipe):
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def get_masthead_title(self):
|
||||
def short_title(self):
|
||||
return self.title
|
||||
|
||||
def dump_ans(self, ans):
|
||||
total_article_count = 0
|
||||
for section in ans :
|
||||
if self.verbose:
|
||||
self.log("section %s: %d articles" % (section[0], len(section[1])) )
|
||||
for article in section[1]:
|
||||
total_article_count += 1
|
||||
if self.verbose:
|
||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('mac-roman','replace'),
|
||||
article['url'].encode('mac-roman','replace')))
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
def index_to_soup(self, url_or_raw, raw=False):
|
||||
'''
|
||||
OVERRIDE of class method
|
||||
deals with various page encodings between index and articles
|
||||
'''
|
||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
f = self.browser.open(url_or_raw)
|
||||
_raw = f.read()
|
||||
f.close()
|
||||
if not _raw:
|
||||
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||
else:
|
||||
_raw = url_or_raw
|
||||
if raw:
|
||||
return _raw
|
||||
|
||||
def dump_hex(self, src, length=16):
|
||||
''' Diagnostic '''
|
||||
FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
|
||||
N=0; result=''
|
||||
while src:
|
||||
s,src = src[:length],src[length:]
|
||||
hexa = ' '.join(["%02X"%ord(x) for x in s])
|
||||
s = s.translate(FILTER)
|
||||
result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
|
||||
N+=length
|
||||
print result
|
||||
if not isinstance(_raw, unicode) and self.encoding:
|
||||
_raw = _raw.decode(docEncoding, 'replace')
|
||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||
return BeautifulSoup(_raw, markupMassage=massage)
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Entry point
|
||||
print "index_to_soup()"
|
||||
soup = get_the_soup( self.encoding, url_or_raw )
|
||||
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||
if docEncoding == '' :
|
||||
docEncoding = self.encoding
|
||||
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
if self.verbose > 2:
|
||||
self.log( " document encoding: '%s'" % docEncoding)
|
||||
if docEncoding != self.encoding :
|
||||
soup = get_the_soup(docEncoding, url_or_raw)
|
||||
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
|
||||
return fixed
|
||||
return soup
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
def parse_todays_index(self):
|
||||
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
# Find each instance of class="section-headline", class="story", class="story headline"
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline']}):
|
||||
url_list = []
|
||||
|
||||
if div['class'] == 'section-headline':
|
||||
key = string.capwords(feed_title(div))
|
||||
if self.excludeSectionKeywords:
|
||||
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
||||
if excluded.search(key):
|
||||
self.log("Skipping section %s" % key)
|
||||
continue
|
||||
articles[key] = []
|
||||
ans.append(key)
|
||||
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
url += '?pagewanted=all'
|
||||
|
||||
title = self.massageNCXText(self.tag_to_string(a, use_alt=True).strip())
|
||||
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.massageNCXText(self.tag_to_string(summary, use_alt=False))
|
||||
|
||||
author = ''
|
||||
authorAttribution = div.find(True, attrs={'class':'storyheadline-author'})
|
||||
def handle_article(div):
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
return
|
||||
if not url.endswith(".html"):
|
||||
return
|
||||
if 'podcast' in url:
|
||||
return
|
||||
if '/video/' in url:
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
return
|
||||
url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
# Kill commas - Kindle switches to '&'
|
||||
author = re.sub(',','',author)
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
ans.append(feed)
|
||||
articles[feed] = []
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
|
||||
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
key = string.capwords(feed_title(div))
|
||||
key = key.replace('Op-ed','Op-Ed')
|
||||
key = key.replace('U.s.','U.S.')
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
handle_article(lidiv)
|
||||
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
articles[feed] = []
|
||||
if not 'podcasts' in url:
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
ans = self.sort_index_by(ans, {'The Front Page':-1,
|
||||
'Dining In, Dining Out':1,
|
||||
'Obituaries':2})
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
self.dump_ans(ans)
|
||||
return ans
|
||||
return self.filter_ans(ans)
|
||||
|
||||
def parse_headline_index(self):
|
||||
|
||||
articles = {}
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||
|
||||
# Fetch the content table
|
||||
content_table = soup.find('table',{'id':'content'})
|
||||
if content_table is None:
|
||||
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
||||
return None
|
||||
|
||||
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
||||
|
||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||
for div_sec in td_col.findAll('div',recursive=False):
|
||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||
section_name = re.sub(r'^ *$','',section_name)
|
||||
if section_name == '':
|
||||
continue
|
||||
section_name=string.capwords(section_name)
|
||||
if section_name == 'U.s.':
|
||||
section_name = 'U.S.'
|
||||
elif section_name == 'Op-ed':
|
||||
section_name = 'Op-Ed'
|
||||
pubdate = strftime('%a, %d %b')
|
||||
|
||||
search_div = div_sec
|
||||
for next_tag in h6_sec_name.findNextSiblings(True):
|
||||
if next_tag.__class__.__name__ == 'Tag':
|
||||
if next_tag.name == 'div':
|
||||
search_div = next_tag
|
||||
break
|
||||
|
||||
# Get the articles
|
||||
for h3_item in search_div.findAll('h3'):
|
||||
byline = h3_item.h6
|
||||
if byline is not None:
|
||||
author = self.tag_to_string(byline,usa_alt=False)
|
||||
else:
|
||||
author = ''
|
||||
a = h3_item.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
continue
|
||||
if not url.endswith(".html"):
|
||||
continue
|
||||
if 'podcast' in url:
|
||||
continue
|
||||
if 'video' in url:
|
||||
continue
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
continue
|
||||
url_list.append(url)
|
||||
self.log("URL %s" % url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
desc = h3_item.find('p')
|
||||
if desc is not None:
|
||||
description = self.tag_to_string(desc,use_alt=False)
|
||||
else:
|
||||
description = ''
|
||||
if not articles.has_key(section_name):
|
||||
ans.append(section_name)
|
||||
articles[section_name] = []
|
||||
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return self.filter_ans(ans)
|
||||
|
||||
def parse_index(self):
|
||||
if self.headlinesOnly:
|
||||
return self.parse_headline_index()
|
||||
else:
|
||||
return self.parse_todays_index()
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag: # remove Op_Ed author head shots
|
||||
tagline = self.tag_to_string(kicker_tag)
|
||||
if tagline=='Op-Ed Columnist':
|
||||
img_div = soup.find('div','inlineImage module')
|
||||
if img_div:
|
||||
img_div.extract()
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
def postprocess_html(self,soup, True):
|
||||
print "\npostprocess_html()\n"
|
||||
|
||||
if self.one_picture_per_article:
|
||||
# Remove all images after first
|
||||
@ -312,8 +462,9 @@ class NYTimes(BasicNewsRecipe):
|
||||
firstImg = inlineImgs[0]
|
||||
for inlineImg in inlineImgs[1:]:
|
||||
inlineImg.extract()
|
||||
# Move firstImg after headline
|
||||
cgFirst = soup.find(True, {'class':'columnGroup first'})
|
||||
# Move firstImg before article body
|
||||
#article_body = soup.find(True, {'id':'articleBody'})
|
||||
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||
if cgFirst:
|
||||
# Strip all sibling NavigableStrings: noise
|
||||
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||
@ -333,30 +484,18 @@ class NYTimes(BasicNewsRecipe):
|
||||
if headline_found:
|
||||
cgFirst.insert(insertLoc,firstImg)
|
||||
else:
|
||||
self.log(">>> No class:'columnGroup first' found <<<")
|
||||
# Change class="kicker" to <h3>
|
||||
kicker = soup.find(True, {'class':'kicker'})
|
||||
if kicker and kicker.contents and kicker.contents[0]:
|
||||
h3Tag = Tag(soup, "h3")
|
||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
||||
use_alt=False)))
|
||||
kicker.replaceWith(h3Tag)
|
||||
self.log(">>> No class:'columnGroup first' found <<<")
|
||||
|
||||
# Change captions to italic -1
|
||||
# Change captions to italic
|
||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||
if caption and caption.contents[0]:
|
||||
emTag = Tag(soup, "em")
|
||||
cTag = Tag(soup, "p", [("class", "caption")])
|
||||
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||
mp_off = c.find("More Photos")
|
||||
if mp_off >= 0:
|
||||
c = c[:mp_off]
|
||||
emTag.insert(0, c)
|
||||
#hrTag = Tag(soup, 'hr')
|
||||
#hrTag['class'] = 'caption_divider'
|
||||
hrTag = Tag(soup, 'div')
|
||||
hrTag['class'] = 'divider'
|
||||
emTag.insert(1, hrTag)
|
||||
caption.replaceWith(emTag)
|
||||
cTag.insert(0, c)
|
||||
caption.replaceWith(cTag)
|
||||
|
||||
# Change <nyt_headline> to <h2>
|
||||
h1 = soup.find('h1')
|
||||
@ -396,17 +535,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
bTag.insert(0, subhead.contents[0])
|
||||
subhead.replaceWith(bTag)
|
||||
|
||||
# Synthesize a section header
|
||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
||||
if dsk and dsk.has_key('content'):
|
||||
hTag = Tag(soup,'h3')
|
||||
hTag['class'] = 'section'
|
||||
hTag.insert(0,NavigableString(dsk['content']))
|
||||
articleTag = soup.find(True, attrs={'id':'article'})
|
||||
if articleTag:
|
||||
articleTag.insert(0,hTag)
|
||||
|
||||
# Add class="articleBody" to <div> so we can format with CSS
|
||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||
if divTag:
|
||||
divTag['class'] = divTag['id']
|
||||
@ -422,56 +550,3 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self,article,soup,first):
|
||||
'''
|
||||
Extract author and description from article, add to article metadata
|
||||
'''
|
||||
def extract_author(soup):
|
||||
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
||||
if byline :
|
||||
author = byline['content']
|
||||
else :
|
||||
# Try for <div class="byline">
|
||||
byline = soup.find('div', attrs={'class':'byline'})
|
||||
if byline:
|
||||
author = byline.renderContents()
|
||||
else:
|
||||
print soup.prettify()
|
||||
return None
|
||||
return author
|
||||
|
||||
def extract_description(soup):
|
||||
description = soup.find('meta',attrs={'name':['description','description ']})
|
||||
if description :
|
||||
return self.massageNCXText(description['content'])
|
||||
else:
|
||||
# Take first paragraph of article
|
||||
articlebody = soup.find('div',attrs={'id':'articlebody'})
|
||||
if not articlebody:
|
||||
# Try again with class instead of id
|
||||
articlebody = soup.find('div',attrs={'class':'articlebody'})
|
||||
if not articlebody:
|
||||
print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
|
||||
print soup.prettify()
|
||||
return None
|
||||
paras = articlebody.findAll('p')
|
||||
for p in paras:
|
||||
if p.renderContents() > '' :
|
||||
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
||||
return None
|
||||
|
||||
if not article.author:
|
||||
article.author = extract_author(soup)
|
||||
if not article.summary:
|
||||
article.summary = article.text_summary = extract_description(soup)
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('utf-8','replace'))
|
||||
#a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
|
@ -1,74 +1,43 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class NewZealandHerald(BasicNewsRecipe):
|
||||
|
||||
title = 'New Zealand Herald'
|
||||
__author__ = 'Krittika Goyal'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Daily news'
|
||||
timefmt = ' [%d %b, %Y]'
|
||||
language = 'en_NZ'
|
||||
oldest_article = 2.5
|
||||
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(name='div', attrs={'class':'contentContainer left eight'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'callToAction'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['sectionHeader', 'tools','callToAction', 'contentContainer right two nopad relatedColumn']}),
|
||||
#dict(name='div', attrs={'id':['shareContainer']}),
|
||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or #author')"}),
|
||||
#dict(name='table', attrs={'cellspacing':'0'}),
|
||||
feeds = [
|
||||
('Business',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'),
|
||||
('World',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000002.xml'),
|
||||
('National',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000001.xml'),
|
||||
('Entertainment',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_001501119.xml'),
|
||||
('Travel',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000007.xml'),
|
||||
('Opinion',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'),
|
||||
('Life & Style',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'),
|
||||
('Technology'
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'),
|
||||
('Sport',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'),
|
||||
('Motoring',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000009.xml'),
|
||||
('Property',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
table = soup.find('table')
|
||||
if table is not None:
|
||||
table.extract()
|
||||
return soup
|
||||
|
||||
#TO GET ARTICLES IN SECTION
|
||||
def nz_parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
div = soup.find(attrs={'class':'col-300 categoryList'})
|
||||
date = div.find(attrs={'class':'link-list-heading'})
|
||||
|
||||
current_articles = []
|
||||
for x in date.findAllNext(attrs={'class':['linkList', 'link-list-heading']}):
|
||||
if x.get('class') == 'link-list-heading': break
|
||||
for li in x.findAll('li'):
|
||||
a = li.find('a', href=True)
|
||||
if a is None:
|
||||
continue
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.nzherald.co.nz'+url
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
return current_articles
|
||||
|
||||
|
||||
# To GET SECTIONS
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
('National',
|
||||
'http://www.nzherald.co.nz/nz/news/headlines.cfm?c_id=1'),
|
||||
('World',
|
||||
'http://www.nzherald.co.nz/world/news/headlines.cfm?c_id=2'),
|
||||
('Politics',
|
||||
'http://www.nzherald.co.nz/politics/news/headlines.cfm?c_id=280'),
|
||||
('Crime',
|
||||
'http://www.nzherald.co.nz/crime/news/headlines.cfm?c_id=30'),
|
||||
('Environment',
|
||||
'http://www.nzherald.co.nz/environment/news/headlines.cfm?c_id=39'),
|
||||
]:
|
||||
articles = self.nz_parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
def print_version(self, url):
|
||||
m = re.search(r'objectid=(\d+)', url)
|
||||
if m is None:
|
||||
return url
|
||||
return 'http://www.nzherald.co.nz/news/print.cfm?pnum=1&objectid=' + m.group(1)
|
||||
|
||||
|
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
#autor{font-weight: bold}
|
||||
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||
.fgprincipal{font-size: large; font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link'])
|
||||
,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('span', attrs={'id':'seccion'}):
|
||||
it = item.a
|
||||
it.name='span'
|
||||
del it['href']
|
||||
del it['title']
|
||||
for item in soup.findAll('p'):
|
||||
it = item.find('h3')
|
||||
if it:
|
||||
it.name='span'
|
||||
return soup
|
70
resources/recipes/pc_lab.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class PCLab(BasicNewsRecipe):
|
||||
cover_url = 'http://pclab.pl/img/logo.png'
|
||||
title = u"PC Lab"
|
||||
__author__ = 'ravcio - rlelusz[at]gmail.com'
|
||||
description = u"Articles from PC Lab website"
|
||||
language = 'pl'
|
||||
oldest_article = 30.0
|
||||
max_articles_per_feed = 100
|
||||
recursions = 0
|
||||
encoding = 'iso-8859-2'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['substance']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['chapters']})
|
||||
,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['navigation']})
|
||||
]
|
||||
|
||||
#links to RSS feeds
|
||||
feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
|
||||
|
||||
#load second and subsequent page content
|
||||
# in: soup - full page with 'next' button
|
||||
# out: appendtag - tag to which new page is to be added
|
||||
def append_page(self, soup, appendtag):
|
||||
# find the 'Next' button
|
||||
pager = soup.find('div', attrs={'class':'next'})
|
||||
|
||||
if pager:
|
||||
#search for 'a' element with link to next page (exit if not found)
|
||||
a = pager.find('a')
|
||||
if a:
|
||||
nexturl = a['href']
|
||||
|
||||
soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
|
||||
|
||||
pagetext_substance = soup2.find('div', attrs={'class':'substance'})
|
||||
pagetext = pagetext_substance.find('div', attrs={'class':'data'})
|
||||
pagetext.extract()
|
||||
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
pos = len(appendtag.contents)
|
||||
|
||||
self.append_page(soup2, appendtag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
# soup.body contains no title and no navigator, they are in soup
|
||||
self.append_page(soup, soup.body)
|
||||
|
||||
# finally remove some tags
|
||||
tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
|
||||
[tag.extract() for tag in tags]
|
||||
|
||||
return soup
|
66
resources/recipes/perfil.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
perfil.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Perfil(BasicNewsRecipe):
|
||||
title = 'Perfil'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y el resto del mundo'
|
||||
publisher = 'perfil.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
.seccion{border-bottom: 1px dotted #666666; text-transform: uppercase; font-size: x-large}
|
||||
.foto1 h1{font-size: x-small}
|
||||
h1{font-family: Georgia,"Times New Roman",serif}
|
||||
img{margin-bottom: 0.4em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['iframe','embed','object','base','meta','link'])
|
||||
,dict(name='a', attrs={'href':'#comentarios'})
|
||||
,dict(name='div', attrs={'class':'foto3'})
|
||||
,dict(name='img', attrs={'alt':'ampliar'})
|
||||
]
|
||||
keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
|
||||
remove_attributes=['onload','lang','width','height','border']
|
||||
|
||||
feeds = [
|
||||
(u'Ultimo momento' , u'http://www.perfil.com/rss/ultimomomento.xml')
|
||||
,(u'Politica' , u'http://www.perfil.com/rss/politica.xml' )
|
||||
,(u'Policia' , u'http://www.perfil.com/rss/policia.xml' )
|
||||
,(u'Internacionales', u'http://www.perfil.com/rss/internacional.xml')
|
||||
,(u'Economia' , u'http://www.perfil.com/rss/economia.xml' )
|
||||
,(u'Deportes' , u'http://www.perfil.com/rss/deportes.xml' )
|
||||
,(u'Opinion' , u'http://www.perfil.com/rss/columnistas.xml' )
|
||||
,(u'Sociedad' , u'http://www.perfil.com/rss/sociedad.xml' )
|
||||
,(u'Cultura' , u'http://www.perfil.com/rss/cultura.xml' )
|
||||
,(u'Espectaculos' , u'http://www.perfil.com/rss/espectaculos.xml' )
|
||||
,(u'Ciencia' , u'http://www.perfil.com/rss/ciencia.xml' )
|
||||
,(u'Salud' , u'http://www.perfil.com/rss/salud.xml' )
|
||||
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -1,13 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
politika.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Politika(BasicNewsRecipe):
|
||||
title = 'Politika Online'
|
||||
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
delay = 1
|
||||
language = 'sr'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://static.politika.co.rs/images_new/politika.gif'
|
||||
extra_css = """
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||
h1{font-family: "Times New Roman",Times,serif1,serif}
|
||||
.articledescription{font-family: sans1, sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['send_print','txt-komentar']})
|
||||
,dict(name=['object','link','a'])
|
||||
,dict(name='h1', attrs={'class':'box_header-tags'})
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'big_article_home item_details'})]
|
||||
remove_tags_after = dict(attrs={'class':'online_date'})
|
||||
remove_tags = [dict(name=['link','meta','iframe','embed','object'])]
|
||||
|
||||
feeds = [
|
||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
|
||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
|
||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||
,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
|
||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||
,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
|
||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||
,(u'Spektar' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||
if ftag.has_key('align'):
|
||||
del ftag['align']
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll('a', attrs={'class':'category'}):
|
||||
item.name='span'
|
||||
if item.has_key('href'):
|
||||
del item['href']
|
||||
if item.has_key('title'):
|
||||
del item['title']
|
||||
return soup
|
||||
|
68
resources/recipes/polityka.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Polityka(BasicNewsRecipe):
|
||||
|
||||
title = u'Polityka'
|
||||
__author__ = 'Mateusz Kielar'
|
||||
description = 'Weekly magazine. Last archive issue'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||
remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size: x-large; font-weight: bold}
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://archiwum.polityka.pl/')
|
||||
box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
|
||||
feeds = []
|
||||
last = 0
|
||||
self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
|
||||
last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
|
||||
|
||||
while True:
|
||||
index = self.index_to_soup(last_edition)
|
||||
|
||||
|
||||
box_list = index.findAll('div', attrs={'class' : 'box_list'})
|
||||
if len(box_list) == 0:
|
||||
break
|
||||
|
||||
articles = {}
|
||||
for box in box_list:
|
||||
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
|
||||
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
|
||||
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
|
||||
print section
|
||||
if not articles.has_key(section):
|
||||
articles[section] = []
|
||||
articles[section].append( {
|
||||
'title' : self.tag_to_string(div.a),
|
||||
'url' : 'http://archiwum.polityka.pl' + div.a['href'],
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
})
|
||||
|
||||
for section in articles:
|
||||
feeds.append((section, articles[section]))
|
||||
|
||||
last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
|
||||
last = last + 1
|
||||
|
||||
return feeds
|
||||
|
69
resources/recipes/rollingstone.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
rollingstone.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RollingStone(BasicNewsRecipe):
|
||||
title = 'Rolling Stone Magazine - free content'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
|
||||
publisher = 'Werner Media inc.'
|
||||
category = 'news, music, USA, world'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
|
||||
extra_css = """
|
||||
body{font-family: Georgia,Times,serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
|
||||
,(re.compile(r'</title>.*?</head>' , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n' )
|
||||
]
|
||||
|
||||
keep_only_tags=[
|
||||
dict(attrs={'class':['headerImgHolder','headerContent']})
|
||||
,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
|
||||
,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','iframe','object','embed'])
|
||||
,dict(attrs={'id':'mpStoryHeader'})
|
||||
,dict(attrs={'class':'relatedTopics'})
|
||||
]
|
||||
remove_attributes=['lang','onclick','width','height','name']
|
||||
remove_tags_before=dict(attrs={'class':'bloggerInfo'})
|
||||
remove_tags_after=dict(attrs={'class':'relatedTopics'})
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'All News' , u'http://www.rollingstone.com/siteServices/rss/allNews' )
|
||||
,(u'All Blogs' , u'http://www.rollingstone.com/siteServices/rss/allBlogs' )
|
||||
,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
|
||||
,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
|
||||
,(u'Song Reviews' , u'http://www.rollingstone.com/siteServices/rss/songReviews' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
53
resources/recipes/rue89.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Louis Gesbert <meta at antislash dot info>'
|
||||
'''
|
||||
Rue89
|
||||
'''
|
||||
|
||||
__author__ = '2010, Louis Gesbert <meta at antislash dot info>'
|
||||
|
||||
import re
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Rue89(BasicNewsRecipe):
|
||||
title = 'Rue89'
|
||||
__author__ = 'Louis Gesbert'
|
||||
description = 'Popular free french news website'
|
||||
title = u'Rue89'
|
||||
language = 'fr'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
|
||||
feeds = [(u'La Une', u'http://www.rue89.com/homepage/feed')]
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<(/?)h2>', re.IGNORECASE|re.DOTALL),
|
||||
lambda match : '<'+match.group(1)+'h3>'),
|
||||
(re.compile(r'<div class="print-title">([^>]+)</div>', re.IGNORECASE|re.DOTALL),
|
||||
lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||
(re.compile(r'<img[^>]+src="[^"]*/numeros/(\d+)[^0-9.">]*.gif"[^>]*/>', re.IGNORECASE|re.DOTALL),
|
||||
lambda match : '<span style="font-family: Sans-serif; color: red; font-size:24pt; padding=2pt;">'+match.group(1)+'</span>'),
|
||||
(re.compile(r'\''), lambda match: '’'),
|
||||
]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
body = Tag(soup, 'body')
|
||||
title = soup.find('h1', {'class':'title'})
|
||||
content = soup.find('div', {'class':'content'})
|
||||
soup.body.replaceWith(body)
|
||||
body.insert(0, title)
|
||||
body.insert(1, content)
|
||||
return soup
|
||||
|
||||
remove_tags = [ #dict(name='div', attrs={'class':'print-source_url'}),
|
||||
#dict(name='div', attrs={'class':'print-links'}),
|
||||
#dict(name='img', attrs={'class':'print-logo'}),
|
||||
dict(name='div', attrs={'class':'content_top'}),
|
||||
dict(name='div', attrs={'id':'sidebar-left'}), ]
|
||||
|
||||
# -- print-version has poor quality on this website, better do the conversion ourselves
|
||||
# def print_version(self, url):
|
||||
# return re.sub('^.*-([0-9]+)$', 'http://www.rue89.com/print/\\1',url)
|
73
resources/recipes/scprint.recipe
Normal file
@ -0,0 +1,73 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
|
||||
|
||||
class SCPrintMagazine(BasicNewsRecipe):
|
||||
title = u'SC Print Magazine'
|
||||
__author__ = u'Tony Maro'
|
||||
description = u'Last print version of the data security magazine'
|
||||
INDEX = "http://www.scmagazineus.com/issuearchive/"
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
keep_only_tags = [dict(id=['article','review'])]
|
||||
remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
|
||||
LOG_IN = 'http://www.scmagazineus.com/login/'
|
||||
tags = 'News,SC Magazine'
|
||||
needs_subscription = True
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
issuelink = printsections = None
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
|
||||
if sectit is not None:
|
||||
linkt = sectit.find('a')
|
||||
issuelink = linkt['href']
|
||||
imgt = sectit.find('img')
|
||||
self.cover_url = imgt['src']
|
||||
|
||||
if issuelink is not None:
|
||||
issue = self.index_to_soup(issuelink)
|
||||
if issue is not None:
|
||||
printsections = issue.findAll('div',attrs={'class':'PrintSection'})
|
||||
if printsections is not None:
|
||||
for printsection in printsections:
|
||||
onesection = []
|
||||
sectiontitle = printsection.find('h3').contents[0]
|
||||
articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
|
||||
if articlesec is not None:
|
||||
''' got articles '''
|
||||
for onearticle in articlesec:
|
||||
''' process one article '''
|
||||
arttitlet = onearticle.find('h3')
|
||||
if arttitlet is not None:
|
||||
mylink = arttitlet.find('a')
|
||||
if mylink is not None:
|
||||
if mylink.has_key('title'):
|
||||
arttitle = mylink['title']
|
||||
else:
|
||||
arttitle = 'unknown'
|
||||
if mylink.has_key('href'):
|
||||
artlink = mylink['href']
|
||||
artlink = artlink.replace("/article","/printarticle")
|
||||
artlink = artlink.replace("/review","/printreview")
|
||||
deck = onearticle.find('div',attrs={'class':'deck'})
|
||||
if deck is not None:
|
||||
deck = deck.contents[0]
|
||||
onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
|
||||
articles.append((sectiontitle, onesection))
|
||||
|
||||
return articles
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.LOG_IN)
|
||||
br.select_form(name='aspnetForm')
|
||||
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
|
||||
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
|
||||
raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
|
||||
if 'Logout</a>' not in raw:
|
||||
raise LoginFailed(
|
||||
_('Failed to log in, check your username and password for'
|
||||
' the calibre Periodicals service.'))
|
||||
return br
|
||||
|
55
resources/recipes/siol.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
www.siol.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Siol(BasicNewsRecipe):
|
||||
title = u'Siol.net'
|
||||
__author__ = u'BlonG'
|
||||
description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos"
|
||||
oldest_article = 3
|
||||
language = 'sl'
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'idContent'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class':'com1'}),
|
||||
dict(name='div', attrs={'class':'relation'}),
|
||||
dict(name='p', attrs={'class':'path'}),
|
||||
dict(name='div', attrs={'class':'clear_r'}),
|
||||
dict(name='div', attrs={'id':'appendix'}),
|
||||
dict(name='div', attrs={'id':'rail'}),
|
||||
dict(name='div', attrs={'id':'div_comments'}),
|
||||
dict(name='div', attrs={'class':'thumbs'}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija')
|
||||
,(u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice')
|
||||
,(u'EU', u'http://www.siol.net/rss.aspx?path=EU')
|
||||
,(u'Svet', u'http://www.siol.net/rss.aspx?path=Svet')
|
||||
,(u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo')
|
||||
,(u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal')
|
||||
,(u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi')
|
||||
,(u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto')
|
||||
,(u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija')
|
||||
,(u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV')
|
||||
]
|
60
resources/recipes/stnn.recipe
Normal file
@ -0,0 +1,60 @@
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Larry Chan <larry1chan at gmail.com>'
|
||||
'''
|
||||
Singtao STNN
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class SingtaoSTNN(BasicNewsRecipe):
|
||||
title = 'Singtao STNN'
|
||||
__author__ = 'Larry Chan, larry1chan'
|
||||
description = 'Chinese News'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'gb2312'
|
||||
publisher = 'Singtao STNN'
|
||||
category = 'news, China, world'
|
||||
language = 'zh'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif'
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class':['page_box']})
|
||||
remove_tags_after = dict(name='div', attrs={'class':['pagelist']})
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['font_title clearfix']}),
|
||||
dict(name='div', attrs={'id':['content_zoom']})
|
||||
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height','href']
|
||||
|
||||
# for a full list of rss check out [url]http://www.stnn.cc/rss/[/url]
|
||||
|
||||
feeds = [ (u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'),
|
||||
(u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'),
|
||||
(u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'),
|
||||
(u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'),
|
||||
(u'International', u'http://www.stnn.cc/rss/guoji/index.xml'),
|
||||
(u'China', u'http://www.stnn.cc/rss/china/index.xml'),
|
||||
(u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'),
|
||||
(u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'),
|
||||
(u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml')
|
||||
|
||||
]
|
||||
|
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
|
||||
__author__ = 'noxxx'
|
||||
max_articles_per_feed = 100
|
||||
description = 'tagesanzeiger.ch: Nichts verpassen'
|
||||
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
|
||||
category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
|
||||
language = 'de'
|
||||
|
||||
conversion_options = {
|
||||
|
24
resources/recipes/taggeschau_de.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Tagesschau(BasicNewsRecipe):
|
||||
title = 'Tagesschau'
|
||||
description = 'Nachrichten der ARD'
|
||||
publisher = 'ARD'
|
||||
language = 'de'
|
||||
|
||||
__author__ = 'Florian Andreas Pfaff'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio']}),
|
||||
dict(name='div',
|
||||
attrs={'id':['socialBookmarks','seitenanfang']}),
|
||||
dict(name='ul',
|
||||
attrs={'class':['directLinks','directLinks weltatlas']}),
|
||||
dict(name='strong', attrs={'class':['boxTitle inv','inv']})
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'centerCol'})]
|
46
resources/recipes/theecocolapse.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
theeconomiccollapseblog.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class TheEconomicCollapse(BasicNewsRecipe):
|
||||
title = 'The Economic Collapse'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Are You Prepared For The Coming Economic Collapse And The Next Great Depression?'
|
||||
publisher = 'The Economic Collapse'
|
||||
category = 'news, politics, USA, economy'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
extra_css = """
|
||||
body{font-family: Tahoma,Arial,sans-serif }
|
||||
img{margin-bottom: 0.4em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class':'sociable'})
|
||||
,dict(name=['iframe','object','embed','meta','link','base'])
|
||||
]
|
||||
remove_attributes=['lang','onclick','width','height']
|
||||
keep_only_tags=[dict(attrs={'class':['post-headline','post-bodycopy clearfix','']})]
|
||||
|
||||
feeds = [(u'Posts', u'http://theeconomiccollapseblog.com/feed')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
@ -19,20 +19,22 @@ class TheEconomicTimes(BasicNewsRecipe):
|
||||
simultaneous_downloads = 1
|
||||
encoding = 'utf-8'
|
||||
language = 'en_IN'
|
||||
publication_type = 'newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://economictimes.indiatimes.com/photo/2676871.cms'
|
||||
extra_css = """ body{font-family: Arial,Helvetica,sans-serif}
|
||||
.heading1{font-size: xx-large; font-weight: bold} """
|
||||
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'printdiv'})]
|
||||
remove_tags = [dict(name=['object','link','embed','iframe','base','table','meta'])]
|
||||
remove_attributes = ['name']
|
||||
|
||||
feeds = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
|
||||
|
||||
@ -48,5 +50,5 @@ class TheEconomicTimes(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
195
resources/recipes/vedomosti.recipe
Normal file
@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
u'''
|
||||
Ведомости
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class VedomostiRecipe(BasicNewsRecipe):
|
||||
title = u'Ведомости'
|
||||
__author__ = 'Nikolai Kotchetkov'
|
||||
publisher = 'vedomosti.ru'
|
||||
category = 'press, Russia'
|
||||
description = u'Ежедневная деловая газета'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
|
||||
masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
|
||||
cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
|
||||
|
||||
#Add feed names if you want them to be sorted (feeds of this list appear first)
|
||||
sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
|
||||
|
||||
encoding = 'cp1251'
|
||||
language = 'ru'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='td', attrs={'class' : ['second_content']})]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class' : 'article_text'})]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class' : ['sep', 'choice', 'articleRightTbl']})]
|
||||
|
||||
feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
|
||||
|
||||
#base URL for relative links
|
||||
base_url = u'http://www.vedomosti.ru'
|
||||
|
||||
extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
|
||||
'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
|
||||
'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
|
||||
'.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
|
||||
'.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
|
||||
'.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
|
||||
'.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
|
||||
'.article_desc {font-size: 1em; font-style:italic;}'
|
||||
|
||||
def parse_index(self):
|
||||
try:
|
||||
feedData = parse(self.feeds[0])
|
||||
if not feedData:
|
||||
raise NotImplementedError
|
||||
self.log("parse_index: Feed loaded successfully.")
|
||||
if feedData.feed.has_key('title'):
|
||||
self.title = feedData.feed.title
|
||||
self.log("parse_index: Title updated to: ", self.title)
|
||||
if feedData.feed.has_key('description'):
|
||||
self.description = feedData.feed.description
|
||||
self.log("parse_index: Description updated to: ", self.description)
|
||||
|
||||
def get_virtual_feed_articles(feed):
|
||||
if feeds.has_key(feed):
|
||||
return feeds[feed][1]
|
||||
self.log("Adding new feed: ", feed)
|
||||
articles = []
|
||||
feeds[feed] = (feed, articles)
|
||||
return articles
|
||||
|
||||
feeds = {}
|
||||
|
||||
#Iterate feed items and distribute articles using tags
|
||||
for item in feedData.entries:
|
||||
link = item.get('link', '');
|
||||
title = item.get('title', '');
|
||||
if '' == link or '' == title:
|
||||
continue
|
||||
article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
|
||||
if not item.has_key('tags'):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
for tag in item.tags:
|
||||
addedToDefault = False
|
||||
term = tag.get('term', '')
|
||||
if '' == term:
|
||||
if (not addedToDefault):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
get_virtual_feed_articles(term).append(article)
|
||||
|
||||
#Get feed list
|
||||
#Select sorted feeds first of all
|
||||
result = []
|
||||
for feedName in self.sortOrder:
|
||||
if (not feeds.has_key(feedName)): continue
|
||||
result.append(feeds[feedName])
|
||||
del feeds[feedName]
|
||||
result = result + feeds.values()
|
||||
|
||||
return result
|
||||
|
||||
except Exception, err:
|
||||
self.log(err)
|
||||
raise NotImplementedError
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
#self.log('Original: ', soup.prettify())
|
||||
|
||||
#Find article
|
||||
contents = soup.find('div', {'class':['article_text']})
|
||||
if not contents:
|
||||
self.log('postprocess_html: article div not found!')
|
||||
return soup
|
||||
contents.extract()
|
||||
|
||||
#Find title
|
||||
title = soup.find('h1')
|
||||
if title:
|
||||
contents.insert(0, title)
|
||||
|
||||
#Find article image
|
||||
newstop = soup.find('div', {'class':['newstop']})
|
||||
if newstop:
|
||||
img = newstop.find('img')
|
||||
if img:
|
||||
imgDiv = Tag(soup, 'div')
|
||||
imgDiv['class'] = 'article_img'
|
||||
|
||||
if img.has_key('width'):
|
||||
del(img['width'])
|
||||
if img.has_key('height'):
|
||||
del(img['height'])
|
||||
|
||||
#find description
|
||||
element = img.parent.nextSibling
|
||||
|
||||
img.extract()
|
||||
imgDiv.insert(0, img)
|
||||
|
||||
while element:
|
||||
if not isinstance(element, Tag):
|
||||
continue
|
||||
nextElement = element.nextSibling
|
||||
if 'p' == element.name:
|
||||
element.extract()
|
||||
element['class'] = 'article_img_desc'
|
||||
imgDiv.insert(len(imgDiv.contents), element)
|
||||
element = nextElement
|
||||
|
||||
contents.insert(1, imgDiv)
|
||||
|
||||
#find article abstract
|
||||
abstract = soup.find('p', {'class':['subhead']})
|
||||
if abstract:
|
||||
abstract['class'] = 'article_desc'
|
||||
contents.insert(2, abstract)
|
||||
|
||||
#Find article authors
|
||||
authorsDiv = soup.find('div', {'class':['autors']})
|
||||
if authorsDiv:
|
||||
authorsP = authorsDiv.find('p')
|
||||
if authorsP:
|
||||
authorsP['class'] = 'article_authors'
|
||||
contents.insert(len(contents.contents), authorsP)
|
||||
|
||||
#Fix urls that use relative path
|
||||
urls = contents.findAll('a');
|
||||
if urls:
|
||||
for url in urls:
|
||||
if not url.has_key('href'):
|
||||
continue
|
||||
if '/' == url['href'][0]:
|
||||
url['href'] = self.base_url + url['href']
|
||||
|
||||
body = soup.find('td', {'class':['second_content']})
|
||||
if body:
|
||||
body.replaceWith(contents)
|
||||
|
||||
self.log('Result: ', soup.prettify())
|
||||
return soup
|
||||
|
@ -31,8 +31,9 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
||||
('Style',
|
||||
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
|
||||
('Sports',
|
||||
'http://feeds.washingtonpost.com/wp-dyn/rss/linkset/2010/08/19/LI2010081904067_xml'),
|
||||
('NFL Sports',
|
||||
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
|
||||
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
|
||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||
]
|
||||
|
||||
|
72
resources/recipes/ynet.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import mechanize
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
description = 'This is a recipe of Ynet.co.il. The recipe opens the article page and clicks on an advertisement to not hurt the sites advertising income.'
|
||||
cover_url = 'http://www.bneiakiva.net/uploads/images/ynet%282%29.jpg'
|
||||
title = u'Ynet'
|
||||
__author__ = 'marbs'
|
||||
language = 'he'
|
||||
extra_css='img {max-width:100%;direction: rtl;} #article{direction: rtl;} div{direction: rtl;} title{direction: rtl; } article_description{direction: rtl; } a.article{direction: rtl; } calibre_feed_description{direction: rtl; } body{direction: ltr;}'
|
||||
remove_attributes = ['width']
|
||||
simultaneous_downloads = 5
|
||||
keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
|
||||
remove_javascript = True
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1
|
||||
remove_tags = [dict(name='p', attrs={'text':[' ']})]
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir'] = 'rtl'
|
||||
soup.body['dir'] = 'rtl'
|
||||
return soup
|
||||
|
||||
feeds =[(u'\u05d7\u05d3\u05e9\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss2.xml'),
|
||||
(u'\u05db\u05dc\u05db\u05dc\u05d4',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss6.xml'),
|
||||
(u'\u05e6\u05e8\u05db\u05e0\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss437.xml'),
|
||||
(u'\u05e1\u05e4\u05d5\u05e8\u05d8',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3.xml'),
|
||||
(u'\u05ea\u05e8\u05d1\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss538.xml'),
|
||||
(u'\u05de\u05e2\u05d5\u05e8\u05d1\u05d5\u05ea \u05d5\u05d7\u05d1\u05e8\u05d4',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3262.xml'),
|
||||
(u'\u05d1\u05e8\u05d9\u05d0\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss1208.xml'),
|
||||
(u'\u05d9\u05e8\u05d5\u05e7',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss4872.xml'),
|
||||
(u'\u05de\u05d7\u05e9\u05d1\u05d9\u05dd',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss544.xml'),
|
||||
(u'\u05e8\u05db\u05d1', u'http://www.ynet.co.il/Integration/StoryRss550.xml'),
|
||||
(u'\u05ea\u05d9\u05d9\u05e8\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss598.xml'),
|
||||
(u'\u05d4\u05d5\u05e8\u05d9\u05dd',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3052.xml'),
|
||||
(u'\u05d0\u05d5\u05db\u05dc',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss975.xml'),
|
||||
(u'\u05d9\u05d4\u05d3\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss4403.xml'),
|
||||
(u'\u05de\u05d3\u05e2 \u05d5\u05d8\u05d1\u05e2',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss2142.xml'),
|
||||
(u'\u05d9\u05d7\u05e1\u05d9\u05dd',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3925.xml'),
|
||||
(u'\u05d3\u05e2\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss194.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
#remove from here
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(url)
|
||||
br.follow_link(mechanize.Link(base_url = '', url =url, text = '', tag = 'a', attrs = [{'id':'buzzerATop'}]))
|
||||
#to here to stop supporting ynet...
|
||||
split1 = url.split("-")
|
||||
print_url = 'http://www.ynet.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
|
||||
return print_url
|
@ -6,22 +6,25 @@ Fetch Die Zeit.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ZeitDe(BasicNewsRecipe):
|
||||
|
||||
title = 'ZEIT Online'
|
||||
description = 'ZEIT Online'
|
||||
title = 'Zeit Online'
|
||||
description = 'Zeit Online'
|
||||
language = 'de'
|
||||
lang = 'de_DE'
|
||||
|
||||
__author__ = 'Martin Pitt, Sujata Raman and Ingo Paschke'
|
||||
use_embedded_content = False
|
||||
__author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
|
||||
|
||||
max_articles_per_feed = 40
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
no_javascript = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
|
||||
dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
|
||||
dict(name='div', attrs={'id':["place_5","place_4","comments"]})
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(id=['main'])]
|
||||
|
||||
feeds = [
|
||||
('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
|
||||
@ -40,71 +43,31 @@ class ZeitDe(BasicNewsRecipe):
|
||||
('Sport', 'http://newsfeed.zeit.de/sport/index'),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
.supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
|
||||
.excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:small;}
|
||||
.title{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
|
||||
.caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
|
||||
.copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
|
||||
.article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
|
||||
.quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
|
||||
.quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
|
||||
.headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
|
||||
.inline{float:left;margin-top:0;margin-right:15px;position:relative;width:180px; }
|
||||
img.inline{float:none}
|
||||
.intertitle{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small;font-weight:700}
|
||||
.ebinfobox{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small;list-style-type:none;float:right;margin-top:0;border-left-style:solid;border-left-width:1px;padding-left:10px;}
|
||||
.infobox {border-style: solid; border-width: 1px;padding:8px;}
|
||||
.infobox dt {font-weight:700;}
|
||||
'''
|
||||
extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
|
||||
|
||||
#filter_regexps = [r'ad.de.doubleclick.net/']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':["article"]}) ,
|
||||
dict(name='ul', attrs={'class':["tools"]}) ,
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),
|
||||
dict(name='div', attrs={'class':["pagination block","pagenav","inline link", "copyright"] }),
|
||||
dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
|
||||
dict(name='div', attrs={'id':["place_5","place_4","comments"]})
|
||||
]
|
||||
|
||||
remove_attributes = ['style', 'font']
|
||||
|
||||
def get_article_url(self, article):
|
||||
ans = article.get('link',None)
|
||||
ans += "?page=all"
|
||||
ans += "?page=all&print=true"
|
||||
|
||||
if 'video' in ans or 'quiz' in ans :
|
||||
if 'video' in ans or 'quiz' in ans or 'blog' in ans :
|
||||
ans = None
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name=['ul','li']):
|
||||
tag.name = 'div'
|
||||
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
try:
|
||||
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
|
||||
return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
|
||||
except:
|
||||
return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
title = soup.find('h2', attrs={'class':'title'})
|
||||
if title is None:
|
||||
print "no title"
|
||||
return soup
|
||||
info = Tag(soup,'ul',[('class','ebinfobox')])
|
||||
tools = soup.find('ul', attrs={'class':'tools'})
|
||||
#author = tools.find('li','author first')
|
||||
for tag in ['author first', 'date', 'date first', 'author', 'source']:
|
||||
line = tools.find('li', tag)
|
||||
if line:
|
||||
info.insert(0,line)
|
||||
title.parent.insert(0,info)
|
||||
tools.extract()
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -30,23 +30,40 @@
|
||||
<title>
|
||||
<xsl:value-of select="fb:description/fb:title-info/fb:book-title"/>
|
||||
</title>
|
||||
<style type="text/x-oeb1-css">
|
||||
A { color : #0002CC }
|
||||
A:HOVER { color : #BF0000 }
|
||||
BODY { background-color : #FEFEFE; color : #000000; font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; text-align : justify }
|
||||
H1{ font-size : 160%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Black; background-color : #E7E7E7; margin-left : 0px; page-break-before : always; }
|
||||
H2{ font-size : 130%; font-style : normal; font-weight : bold; text-align : left; background-color : #EEEEEE; border : 1px solid Gray; page-break-before : always; }
|
||||
H3{ font-size : 110%; font-style : normal; font-weight : bold; text-align : left; background-color : #F1F1F1; border : 1px solid Silver;}
|
||||
H4{ font-size : 100%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||
H5{ font-size : 100%; font-style : italic; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||
H6{ font-size : 100%; font-style : italic; font-weight : normal; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||
SMALL{ font-size : 80% }
|
||||
BLOCKQUOTE{ margin-left :4em; margin-top:1em; margin-right:0.2em;}
|
||||
HR{ color : Black }
|
||||
DIV{font-family : "Times New Roman", Times, serif; text-align : justify}
|
||||
UL{margin-left: 0}
|
||||
.epigraph{width:50%; margin-left : 35%;}
|
||||
<style type="text/css">
|
||||
a { color : #0002CC }
|
||||
|
||||
a:hover { color : #BF0000 }
|
||||
|
||||
body { background-color : #FEFEFE; color : #000000; font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; text-align : justify }
|
||||
|
||||
h1{ font-size : 160%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Black; background-color : #E7E7E7; margin-left : 0px; page-break-before : always; }
|
||||
|
||||
h2{ font-size : 130%; font-style : normal; font-weight : bold; text-align : left; background-color : #EEEEEE; border : 1px solid Gray; page-break-before : always; }
|
||||
|
||||
h3{ font-size : 110%; font-style : normal; font-weight : bold; text-align : left; background-color : #F1F1F1; border : 1px solid Silver;}
|
||||
|
||||
h4{ font-size : 100%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||
|
||||
h5{ font-size : 100%; font-style : italic; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||
|
||||
h6{ font-size : 100%; font-style : italic; font-weight : normal; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||
|
||||
small { font-size : 80% }
|
||||
|
||||
blockquote { margin-left :4em; margin-top:1em; margin-right:0.2em;}
|
||||
|
||||
hr { color : Black }
|
||||
|
||||
div {font-family : "Times New Roman", Times, serif; text-align : justify}
|
||||
|
||||
ul {margin-left: 0}
|
||||
|
||||
.epigraph{width:50%; margin-left : 35%;}
|
||||
|
||||
div.paragraph { text-align: justify; text-indent: 2em; }
|
||||
</style>
|
||||
<link rel="stylesheet" type="text/css" href="inline-styles.css" />
|
||||
</head>
|
||||
<body>
|
||||
<xsl:for-each select="fb:description/fb:title-info/fb:annotation">
|
||||
@ -136,12 +153,13 @@
|
||||
</xsl:choose>
|
||||
</xsl:variable>
|
||||
<xsl:if test="$section_has_title = 'None'">
|
||||
<a name="TOC_{generate-id()}" />
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<div id="TOC_{generate-id()}">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
</div>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates>
|
||||
<xsl:with-param name="section_toc_id" select="$section_has_title" />
|
||||
@ -160,13 +178,13 @@
|
||||
</xsl:if>
|
||||
<xsl:if test="$section_toc_id != 'None'">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||
<xsl:attribute name="id">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<a name="TOC_{generate-id()}"></a>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
@ -176,7 +194,7 @@
|
||||
<xsl:element name="h6">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
@ -207,11 +225,18 @@
|
||||
</xsl:template>
|
||||
<!-- p -->
|
||||
<xsl:template match="fb:p">
|
||||
<div align="justify"><xsl:if test="@id">
|
||||
<xsl:element name="div">
|
||||
<xsl:attribute name="class">paragraph</xsl:attribute>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>    <xsl:apply-templates/></div>
|
||||
</xsl:if>
|
||||
<xsl:if test="@style">
|
||||
<xsl:attribute name="style"><xsl:value-of select="@style"/></xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<!-- strong -->
|
||||
<xsl:template match="fb:strong">
|
||||
|
60
resources/templates/html_export_default.css
Normal file
@ -0,0 +1,60 @@
|
||||
body{
|
||||
margin:0px;
|
||||
padding: 0.5em;
|
||||
background-color:#F6F3E9;
|
||||
font-size:12px;
|
||||
font-family:Arial, Helvetica, sans-serif;
|
||||
}
|
||||
|
||||
.calibreMeta{
|
||||
background-color:#39322B;
|
||||
color:white;
|
||||
padding:10px;
|
||||
}
|
||||
|
||||
.calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
|
||||
color:white;
|
||||
}
|
||||
|
||||
.calibreMeta h1{
|
||||
margin:0px;
|
||||
font-size:18px;
|
||||
background-color:#39322B;
|
||||
}
|
||||
|
||||
.calibreEbookContent{
|
||||
padding:20px;
|
||||
}
|
||||
|
||||
.calibreEbNav, .calibreEbNavTop{
|
||||
clear:both;
|
||||
background-color:#39322B;
|
||||
color:white;
|
||||
padding:10px;
|
||||
text-align:center;
|
||||
}
|
||||
|
||||
.calibreEbNavTop{
|
||||
margin-bottom:20px;
|
||||
}
|
||||
|
||||
.calibreEbNav a, .calibreEbNavTop a{
|
||||
padding:0px 5px;
|
||||
}
|
||||
|
||||
.calibreTocIndex{
|
||||
line-height:18px;
|
||||
}
|
||||
|
||||
.calibreToc{
|
||||
float:left;
|
||||
margin:20px;
|
||||
width:300px;
|
||||
background-color:#39322B;
|
||||
color:white;
|
||||
padding:10px;
|
||||
}
|
||||
.calibreEbookContent{
|
||||
width:600px;
|
||||
float:left;
|
||||
}
|
74
resources/templates/html_export_default.tmpl
Normal file
@ -0,0 +1,74 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
${head_content}$
|
||||
|
||||
<link href="${cssLink}$" type="text/css" rel="stylesheet" />
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="calibreMeta">
|
||||
<div class="calibreMetaTitle">
|
||||
${pos1=1}$
|
||||
${for title in meta.titles():}$
|
||||
${if pos1:}$
|
||||
<h1>
|
||||
<a href="${tocUrl}$">${print title}$</a>
|
||||
</h1>
|
||||
${:else:}$
|
||||
<div class="calibreMetaSubtitle">${print title}$</div>
|
||||
${:endif}$
|
||||
${pos1=0}$
|
||||
${:endfor}$
|
||||
</div>
|
||||
<div class="calibreMetaAuthor">
|
||||
${print ', '.join(meta.creators())}$
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="calibreMain">
|
||||
|
||||
<div class="calibreEbookContent">
|
||||
${if prevLink or nextLink:}$
|
||||
<div class="calibreEbNavTop">
|
||||
${if prevLink:}$
|
||||
<a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||
${:else:}$
|
||||
<a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||
${:endif}$
|
||||
|
||||
${if nextLink:}$
|
||||
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
|
||||
${:endif}$
|
||||
</div>
|
||||
${:endif}$
|
||||
|
||||
${ebookContent}$
|
||||
</div>
|
||||
|
||||
${if has_toc:}$
|
||||
<div class="calibreToc">
|
||||
<h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
|
||||
${print toc()}$
|
||||
</div>
|
||||
${:endif}$
|
||||
|
||||
<div class="calibreEbNav">
|
||||
${if prevLink:}$
|
||||
<a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||
${:else:}$
|
||||
<a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||
${:endif}$
|
||||
|
||||
<a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
|
||||
|
||||
${if nextLink:}$
|
||||
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
|
||||
${:endif}$
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
61
resources/templates/html_export_default_index.tmpl
Normal file
@ -0,0 +1,61 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
|
||||
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
|
||||
<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
|
||||
|
||||
<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
|
||||
|
||||
${for item in meta:}$
|
||||
<meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
|
||||
${:endfor}$
|
||||
|
||||
<link href="${cssLink}$" type="text/css" rel="stylesheet" />
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="calibreMeta">
|
||||
<div class="calibreMetaTitle">
|
||||
${pos1=1}$
|
||||
${for title in meta.titles():}$
|
||||
${if pos1:}$
|
||||
<h1>
|
||||
<a href="${tocUrl}$">${print title}$</a>
|
||||
</h1>
|
||||
${:else:}$
|
||||
<div class="calibreMetaSubtitle">${print title}$</div>
|
||||
${:endif}$
|
||||
${pos1=0}$
|
||||
${:endfor}$
|
||||
</div>
|
||||
<div class="calibreMetaAuthor">
|
||||
${print ', '.join(meta.creators()),}$
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="calibreMain">
|
||||
<div class="calibreEbookContent">
|
||||
|
||||
${if has_toc:}$
|
||||
<div class="calibreTocIndex">
|
||||
<h2>${print _('Table of contents'),}$</h2>
|
||||
${toc}$
|
||||
</div>
|
||||
${:else:}$
|
||||
<h2>${print _('No table of contents present'),}$</h2>
|
||||
<div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
|
||||
${:endif}$
|
||||
|
||||
</div>
|
||||
|
||||
<div class="calibreEbNav">
|
||||
${if nextLink:}$
|
||||
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
|
||||
${:endif}$
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -20,20 +20,4 @@ function setup_image_scaling_handlers() {
|
||||
});
|
||||
}
|
||||
|
||||
function extract_svged_images() {
|
||||
$("svg").each(function() {
|
||||
var children = $(this).children("img");
|
||||
if (children.length == 1) {
|
||||
var img = $(children[0]);
|
||||
var href = img.attr('xlink:href');
|
||||
if (href != undefined) {
|
||||
$(this).replaceWith('<div style="text-align:center; margin: 0; padding: 0"><img style="height: 98%" alt="SVG Image" src="' + href +'"></img></div>');
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
//extract_svged_images();
|
||||
});
|
||||
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
__all__ = [
|
||||
'pot', 'translations', 'get_translations', 'iso639',
|
||||
'build', 'build_pdf2xml',
|
||||
'build', 'build_pdf2xml', 'server',
|
||||
'gui',
|
||||
'develop', 'install',
|
||||
'resources',
|
||||
@ -35,6 +35,9 @@ from setup.extensions import Build, BuildPDF2XML
|
||||
build = Build()
|
||||
build_pdf2xml = BuildPDF2XML()
|
||||
|
||||
from setup.server import Server
|
||||
server = Server()
|
||||
|
||||
from setup.install import Develop, Install, Sdist
|
||||
develop = Develop()
|
||||
install = Install()
|
||||
|
@ -54,7 +54,7 @@ reflow_error = poppler_error if poppler_error else magick_error
|
||||
|
||||
pdfreflow_libs = []
|
||||
if iswindows:
|
||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib']
|
||||
|
||||
extensions = [
|
||||
|
||||
@ -348,8 +348,12 @@ class Build(Command):
|
||||
VERSION = 1.0.0
|
||||
CONFIG += %s
|
||||
''')%(ext.name, ' '.join(ext.headers), ' '.join(ext.sources), archs)
|
||||
pro = pro.replace('\\', '\\\\')
|
||||
open(ext.name+'.pro', 'wb').write(pro)
|
||||
subprocess.check_call([QMAKE, '-o', 'Makefile', ext.name+'.pro'])
|
||||
qmc = [QMAKE, '-o', 'Makefile']
|
||||
if iswindows:
|
||||
qmc += ['-spec', 'win32-msvc2008']
|
||||
subprocess.check_call(qmc + [ext.name+'.pro'])
|
||||
subprocess.check_call([make, '-f', 'Makefile'])
|
||||
objects = glob.glob(obj_pat)
|
||||
return list(map(self.a, objects))
|
||||
|
@ -11,7 +11,7 @@ import subprocess, tempfile, os, time
|
||||
from setup import Command, installer_name
|
||||
from setup.build_environment import HOST, PROJECT
|
||||
|
||||
BASE_RSYNC = 'rsync -avz --delete'.split()
|
||||
BASE_RSYNC = ['rsync', '-avz', '--delete']
|
||||
EXCLUDES = []
|
||||
for x in [
|
||||
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
|
||||
@ -42,13 +42,13 @@ class Push(Command):
|
||||
threads = []
|
||||
for host in (
|
||||
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
|
||||
'kovid@ox:calibre'
|
||||
'kovid@ox:calibre',
|
||||
r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
|
||||
):
|
||||
rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
|
||||
print '\n\nPushing to:', host, '\n'
|
||||
threads.append(Thread(target=subprocess.check_call, args=(rcmd,)))
|
||||
threads[-1].start()
|
||||
subprocess.check_call(rcmd)
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
|
@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
|
||||
from setup.build_environment import msvc, MT, RC
|
||||
from setup.installer.windows.wix import WixMixIn
|
||||
|
||||
QT_DIR = 'C:\\Qt\\4.6.3'
|
||||
QT_DIR = 'Q:\\Qt\\4.7.1'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUSB_DIR = 'C:\\libusb'
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
|
@ -28,15 +28,16 @@ If there are no windows binaries already compiled for the version of python you
|
||||
|
||||
Run the following command to install python dependencies::
|
||||
|
||||
easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
|
||||
easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform
|
||||
|
||||
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
|
||||
|
||||
Qt
|
||||
--------
|
||||
|
||||
Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
|
||||
|
||||
configure -opensource -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc -no-qt3support -webkit -xmlpatterns -no-phonon
|
||||
nmake
|
||||
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license && nmake
|
||||
|
||||
SIP
|
||||
-----
|
||||
@ -213,7 +214,7 @@ It contains correct fonts.conf etc.
|
||||
poppler
|
||||
-------------
|
||||
|
||||
In Cmake: disable GTK, Qt, OPenjpeg, zlib, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
|
||||
In Cmake: disable GTK, Qt, OPenjpeg, cpp, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
|
||||
|
||||
NOTE: poppler must be built as a static library, unless you build the qt4 bindings
|
||||
|
||||
|
128
setup/server.py
Normal file
@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import subprocess, tempfile, os, time, sys, telnetlib
|
||||
from threading import RLock
|
||||
|
||||
from setup import Command
|
||||
|
||||
try:
|
||||
from pyinotify import WatchManager, ThreadedNotifier, EventsCodes, ProcessEvent
|
||||
except:
|
||||
wm = None
|
||||
else:
|
||||
wm = WatchManager()
|
||||
flags = EventsCodes.ALL_FLAGS
|
||||
mask = flags['IN_MODIFY']
|
||||
|
||||
class ProcessEvents(ProcessEvent):
|
||||
|
||||
def __init__(self, command):
|
||||
ProcessEvent.__init__(self)
|
||||
self.command = command
|
||||
|
||||
def process_default(self, event):
|
||||
name = getattr(event,
|
||||
'name', None)
|
||||
if not name:
|
||||
return
|
||||
ext = os.path.splitext(name)[1]
|
||||
reload = False
|
||||
if ext == '.py':
|
||||
reload = True
|
||||
print
|
||||
print name, 'changed'
|
||||
self.command.kill_server()
|
||||
self.command.launch_server()
|
||||
print self.command.prompt,
|
||||
sys.stdout.flush()
|
||||
|
||||
if reload:
|
||||
self.command.reload_browser(delay=1)
|
||||
|
||||
|
||||
class Server(Command):
|
||||
|
||||
description = 'Run the calibre server in development mode conveniently'
|
||||
|
||||
MONOCLE_PATH = '../monocle'
|
||||
|
||||
def rebuild_monocole(self):
|
||||
subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH,
|
||||
'-I', 'src', 'src/monocle.js'],
|
||||
stdout=open('resources/content_server/read/monocle.js', 'wb'))
|
||||
|
||||
def launch_server(self):
|
||||
print 'Starting server...\n'
|
||||
with self.lock:
|
||||
self.rebuild_monocole()
|
||||
self.server_proc = p = subprocess.Popen(['calibre-server', '--develop'],
|
||||
stderr=subprocess.STDOUT, stdout=self.server_log)
|
||||
time.sleep(0.2)
|
||||
if p.poll() is not None:
|
||||
print 'Starting server failed'
|
||||
raise SystemExit(1)
|
||||
return p
|
||||
|
||||
def kill_server(self):
|
||||
print 'Killing server...\n'
|
||||
if self.server_proc is not None:
|
||||
with self.lock:
|
||||
if self.server_proc.poll() is None:
|
||||
self.server_proc.terminate()
|
||||
while self.server_proc.poll() is None:
|
||||
time.sleep(0.1)
|
||||
|
||||
def watch(self):
|
||||
if wm is not None:
|
||||
self.notifier = ThreadedNotifier(wm, ProcessEvents(self))
|
||||
self.notifier.start()
|
||||
self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
|
||||
|
||||
def reload_browser(self, delay=0.1):
|
||||
time.sleep(delay)
|
||||
try:
|
||||
t = telnetlib.Telnet('localhost', 4242)
|
||||
t.read_until("repl>")
|
||||
t.write('BrowserReload();')
|
||||
t.read_until("repl>")
|
||||
t.close()
|
||||
except:
|
||||
print 'Failed to reload browser'
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def run(self, opts):
|
||||
self.lock = RLock()
|
||||
tdir = tempfile.gettempdir()
|
||||
logf = os.path.join(tdir, 'calibre-server.log')
|
||||
self.server_log = open(logf, 'ab')
|
||||
self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: '
|
||||
print 'Server log available at:', logf
|
||||
print
|
||||
self.watch()
|
||||
|
||||
first = True
|
||||
while True:
|
||||
self.launch_server()
|
||||
if not first:
|
||||
self.reload_browser()
|
||||
first = False
|
||||
|
||||
try:
|
||||
raw_input(self.prompt)
|
||||
except:
|
||||
print
|
||||
self.kill_server()
|
||||
break
|
||||
else:
|
||||
self.kill_server()
|
||||
print
|
||||
|
||||
if hasattr(self, 'notifier'):
|
||||
self.notifier.stop()
|
||||
|
@ -21,8 +21,6 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
|
||||
filesystem_encoding, plugins, config_dir
|
||||
from calibre.startup import winutil, winutilerror
|
||||
|
||||
import mechanize
|
||||
|
||||
uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
|
||||
|
||||
if False:
|
||||
@ -269,7 +267,8 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
|
||||
:param honor_time: If True honors pause time in refresh requests
|
||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||
'''
|
||||
opener = mechanize.Browser()
|
||||
from calibre.utils.browser import Browser
|
||||
opener = Browser()
|
||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||
opener.set_handle_robots(False)
|
||||
opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
||||
@ -445,6 +444,9 @@ xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions = {
|
||||
def replace_entities(raw):
|
||||
return _ent_pat.sub(entity_to_unicode, raw)
|
||||
|
||||
def xml_replace_entities(raw):
|
||||
return _ent_pat.sub(xml_entity_to_unicode, raw)
|
||||
|
||||
def prepare_string_for_xml(raw, attribute=False):
|
||||
raw = _ent_pat.sub(entity_to_unicode, raw)
|
||||
raw = raw.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.23'
|
||||
__version__ = '0.7.28'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
@ -105,7 +105,9 @@ else:
|
||||
os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
|
||||
except:
|
||||
pass
|
||||
if not os.access(config_dir, os.W_OK) or not os.access(config_dir, os.X_OK):
|
||||
if not os.path.exists(config_dir) or \
|
||||
not os.access(config_dir, os.W_OK) or not \
|
||||
os.access(config_dir, os.X_OK):
|
||||
print 'No write acces to', config_dir, 'using a temporary dir instead'
|
||||
import tempfile, atexit
|
||||
config_dir = tempfile.mkdtemp(prefix='calibre-config-')
|
||||
|
@ -2,9 +2,7 @@ import os.path
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap
|
||||
import os
|
||||
import glob
|
||||
import textwrap, os, glob, functools
|
||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
|
||||
from calibre.constants import numeric_version
|
||||
@ -95,10 +93,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
def get_metadata(self, stream, ftype):
|
||||
if ftype == 'cbr':
|
||||
from calibre.libunrar import extract_member as extract_first
|
||||
from calibre.libunrar import extract_first_alphabetically as extract_first
|
||||
extract_first
|
||||
else:
|
||||
from calibre.libunzip import extract_member as extract_first
|
||||
from calibre.libunzip import extract_member
|
||||
extract_first = functools.partial(extract_member,
|
||||
sort_alphabetically=True)
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
ret = extract_first(stream)
|
||||
mi = MetaInformation(None, None)
|
||||
@ -292,7 +292,7 @@ class RTFMetadataReader(MetadataReaderPlugin):
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.rtf import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
|
||||
class SNBMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read SNB metadata'
|
||||
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
|
||||
from calibre.ebooks.rtf.output import RTFOutput
|
||||
from calibre.ebooks.tcr.output import TCROutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
from calibre.ebooks.html.output import HTMLOutput
|
||||
from calibre.ebooks.snb.output import SNBOutput
|
||||
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
@ -453,7 +454,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
|
||||
from calibre.devices.apple.driver import ITUNES
|
||||
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||
from calibre.devices.cybook.driver import CYBOOK
|
||||
from calibre.devices.cybook.driver import CYBOOK, ORIZON
|
||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR
|
||||
@ -461,7 +462,7 @@ from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||
from calibre.devices.nook.driver import NOOK
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||
from calibre.devices.prs505.driver import PRS505
|
||||
from calibre.devices.android.driver import ANDROID, S60
|
||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||
@ -471,10 +472,11 @@ from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
|
||||
from calibre.devices.edge.driver import EDGE
|
||||
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS
|
||||
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
|
||||
SOVOS, PICO
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
|
||||
@ -524,6 +526,7 @@ plugins += [
|
||||
RTFOutput,
|
||||
TCROutput,
|
||||
TXTOutput,
|
||||
HTMLOutput,
|
||||
SNBOutput,
|
||||
]
|
||||
# Order here matters. The first matched device is the one used.
|
||||
@ -532,6 +535,7 @@ plugins += [
|
||||
HANLINV5,
|
||||
BLACKBERRY,
|
||||
CYBOOK,
|
||||
ORIZON,
|
||||
ILIAD,
|
||||
IREXDR1000,
|
||||
IREXDR800,
|
||||
@ -545,6 +549,7 @@ plugins += [
|
||||
KINDLE2,
|
||||
KINDLE_DX,
|
||||
NOOK,
|
||||
NOOK_COLOR,
|
||||
PRS505,
|
||||
ANDROID,
|
||||
S60,
|
||||
@ -572,6 +577,7 @@ plugins += [
|
||||
ELONEX,
|
||||
TECLAST_K3,
|
||||
NEWSMY,
|
||||
PICO,
|
||||
IPAPYRUS,
|
||||
SOVOS,
|
||||
EDGE,
|
||||
@ -584,6 +590,7 @@ plugins += [
|
||||
AVANT,
|
||||
MENTOR,
|
||||
SWEEX,
|
||||
Q600,
|
||||
KOGAN,
|
||||
PDNOVEL,
|
||||
SPECTRA,
|
||||
@ -890,4 +897,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
|
||||
Email, Server, Plugins, Tweaks, Misc]
|
||||
|
||||
#}}}
|
||||
|
||||
|
@ -120,6 +120,11 @@ class InputFormatPlugin(Plugin):
|
||||
#: to make its output suitable for viewing
|
||||
for_viewer = False
|
||||
|
||||
#: The encoding that this input plugin creates files in. A value of
|
||||
#: None means that the encoding is undefined and must be
|
||||
#: detected individually
|
||||
output_encoding = 'utf-8'
|
||||
|
||||
#: Options shared by all Input format plugins. Do not override
|
||||
#: in sub-classes. Use :attr:`options` instead. Every option must be an
|
||||
#: instance of :class:`OptionRecommendation`.
|
||||
@ -289,3 +294,8 @@ class OutputFormatPlugin(Plugin):
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def is_periodical(self):
|
||||
return self.oeb.metadata.publication_type and \
|
||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||
|
||||
|
@ -4,6 +4,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
from itertools import izip
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
@ -249,8 +250,11 @@ class OutputProfile(Plugin):
|
||||
#: If True, the date is appended to the title of downloaded news
|
||||
periodical_date_in_title = True
|
||||
|
||||
#: The character used to represent a star in ratings
|
||||
#: Characters used in jackets and catalogs
|
||||
missing_char = u'x'
|
||||
ratings_char = u'*'
|
||||
empty_ratings_char = u' '
|
||||
read_char = u'+'
|
||||
|
||||
#: Unsupported unicode characters to be replaced during preprocessing
|
||||
unsupported_unicode_chars = []
|
||||
@ -258,6 +262,9 @@ class OutputProfile(Plugin):
|
||||
#: Number of ems that the left margin of a blockquote is rendered as
|
||||
mobi_ems_per_blockquote = 1.0
|
||||
|
||||
#: Special periodical formatting needed in EPUB
|
||||
epub_periodical_format = None
|
||||
|
||||
@classmethod
|
||||
def tags_to_string(cls, tags):
|
||||
return escape(', '.join(tags))
|
||||
@ -283,7 +290,12 @@ class iPadOutput(OutputProfile):
|
||||
'macros': {'border-width': '{length}|medium|thick|thin'}
|
||||
}
|
||||
]
|
||||
ratings_char = u'\u2605'
|
||||
|
||||
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
|
||||
ratings_char = u'\u2605' # filled star
|
||||
empty_ratings_char = u'\u2606' # hollow star
|
||||
read_char = u'\u2713' # check mark
|
||||
|
||||
touchscreen = True
|
||||
# touchscreen_news_css {{{
|
||||
touchscreen_news_css = u'''
|
||||
@ -417,6 +429,13 @@ class iPadOutput(OutputProfile):
|
||||
'''
|
||||
# }}}
|
||||
|
||||
class TabletOutput(iPadOutput):
|
||||
name = 'Tablet'
|
||||
short_name = 'tablet'
|
||||
description = _('Intended for generic tablet devices, does no resizing of images')
|
||||
|
||||
screen_size = (sys.maxint, sys.maxint)
|
||||
comic_screen_size = (sys.maxint, sys.maxint)
|
||||
|
||||
class SonyReaderOutput(OutputProfile):
|
||||
|
||||
@ -431,6 +450,9 @@ class SonyReaderOutput(OutputProfile):
|
||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||
unsupported_unicode_chars = [u'\u201f', u'\u201b']
|
||||
|
||||
epub_periodical_format = 'sony'
|
||||
#periodical_date_in_title = False
|
||||
|
||||
|
||||
class KoboReaderOutput(OutputProfile):
|
||||
|
||||
@ -484,7 +506,6 @@ class SonyReaderLandscapeOutput(SonyReaderOutput):
|
||||
screen_size = (784, 1012)
|
||||
comic_screen_size = (784, 1012)
|
||||
|
||||
|
||||
class MSReaderOutput(OutputProfile):
|
||||
|
||||
name = 'Microsoft Reader'
|
||||
@ -553,6 +574,8 @@ class CybookOpusOutput(SonyReaderOutput):
|
||||
fbase = 16
|
||||
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
||||
|
||||
epub_periodical_format = None
|
||||
|
||||
class KindleOutput(OutputProfile):
|
||||
|
||||
name = 'Kindle'
|
||||
@ -566,7 +589,12 @@ class KindleOutput(OutputProfile):
|
||||
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
ratings_char = u'\u2605'
|
||||
read_char = u'\u2713'
|
||||
|
||||
mobi_ems_per_blockquote = 2.0
|
||||
|
||||
@classmethod
|
||||
@ -583,9 +611,12 @@ class KindleDXOutput(OutputProfile):
|
||||
# Screen size is a best guess
|
||||
screen_size = (744, 1022)
|
||||
dpi = 150.0
|
||||
comic_screen_size = (741, 1022)
|
||||
comic_screen_size = (771, 1116)
|
||||
#comic_screen_size = (741, 1022)
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
ratings_char = u'\u2605'
|
||||
read_char = u'\u2713'
|
||||
mobi_ems_per_blockquote = 2.0
|
||||
|
||||
@classmethod
|
||||
@ -649,13 +680,14 @@ class NookOutput(OutputProfile):
|
||||
|
||||
class BambookOutput(OutputProfile):
|
||||
|
||||
author = 'Li Fanxi'
|
||||
name = 'Sanda Bambook'
|
||||
short_name = 'bambook'
|
||||
description = _('This profile is intended for the Sanda Bambook.')
|
||||
|
||||
# Screen size is a best guess
|
||||
screen_size = (800, 600)
|
||||
comic_screen_size = (700, 540)
|
||||
screen_size = (600, 800)
|
||||
comic_screen_size = (540, 700)
|
||||
dpi = 168.451
|
||||
fbase = 12
|
||||
fsizes = [10, 12, 14, 16]
|
||||
@ -663,7 +695,7 @@ class BambookOutput(OutputProfile):
|
||||
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
||||
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
|
||||
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
|
||||
iPadOutput, KoboReaderOutput,
|
||||
iPadOutput, KoboReaderOutput, TabletOutput,
|
||||
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
||||
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
||||
BambookOutput, ]
|
||||
|
@ -20,7 +20,11 @@ class ANDROID(USBMS):
|
||||
VENDOR_ID = {
|
||||
# HTC
|
||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226]},
|
||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||
0xc92 : [0x100]},
|
||||
|
||||
# Eken
|
||||
0x040d : { 0x8510 : [0x0001] },
|
||||
|
||||
# Motorola
|
||||
0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
|
||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.metadata.epub import set_metadata
|
||||
from calibre.library.server.utils import strftime
|
||||
from calibre.utils.config import config_dir, prefs
|
||||
from calibre.utils.date import isoformat, now, parse_date
|
||||
from calibre.utils.date import now, parse_date
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
@ -1221,12 +1221,19 @@ class ITUNES(DriverBase):
|
||||
return thumb
|
||||
|
||||
if isosx:
|
||||
# The following commands generate an error, but the artwork does in fact
|
||||
# get sent to the device. Seems like a bug in Apple's automation interface?
|
||||
# Could also be a problem with the integrity of the cover data?
|
||||
if lb_added:
|
||||
lb_added.artworks[1].data_.set(cover_data)
|
||||
try:
|
||||
lb_added.artworks[1].data_.set(cover_data)
|
||||
except:
|
||||
if DEBUG:
|
||||
self.log.warning(" iTunes automation interface reported an error"
|
||||
" when adding artwork to '%s' in the iTunes Library" % metadata.title)
|
||||
pass
|
||||
|
||||
if db_added:
|
||||
# The following command generates an error, but the artwork does in fact
|
||||
# get sent to the device. Seems like a bug in Apple's automation interface
|
||||
try:
|
||||
db_added.artworks[1].data_.set(cover_data)
|
||||
except:
|
||||
@ -2521,11 +2528,11 @@ class ITUNES(DriverBase):
|
||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||
else:
|
||||
metadata.timestamp = isoformat(now())
|
||||
metadata.timestamp = now()
|
||||
if DEBUG:
|
||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||
else:
|
||||
metadata.timestamp = isoformat(now())
|
||||
metadata.timestamp = now()
|
||||
if DEBUG:
|
||||
self.log.warning(" missing <metadata> block in OPF file")
|
||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||
|