mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merged main branch & added --extracted option
This commit is contained in:
commit
a1c53e48e8
158
Changelog.yaml
158
Changelog.yaml
@ -4,6 +4,164 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.7.26
|
||||||
|
date: 2010-10-30
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Check library: Allow wildcards in ignore names field"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression in 0.7.25 that broke reading metadata from filenames."
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.7.25 that caused original files to be mistakenly removed when adding books recursively"
|
||||||
|
|
||||||
|
- title: "Fix long series/publisher causing edit metadata in bulk dialog to become very large"
|
||||||
|
tickets: [7332]
|
||||||
|
|
||||||
|
- title: "Only add SONY periodical code to downloaded news if output profile is set to one of the SONY reader profiles. This is needed because the ever delightful Stanza crashes and burns when an EPUB has the periodical code"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- El Periodico
|
||||||
|
- New Zealand Herald
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Taggeschau.de"
|
||||||
|
author: "Florian Andreas Pfaff"
|
||||||
|
|
||||||
|
- title: "Gamespot Reviews"
|
||||||
|
author: "Marc Tonsing"
|
||||||
|
|
||||||
|
- version: 0.7.25
|
||||||
|
date: 2010-10-29
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Add support for the SONY periodical format."
|
||||||
|
description: "This means that news downloaded by calibre and sent to a newer SONY device (350/650/900) should appear in the Periodicals section and have the special periodicals navigation user interface"
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- title: "Content server: Make the new browsing interface the default. The old interface can be accessed at /old"
|
||||||
|
|
||||||
|
- title: "Content server: Allow running of content server as a WSGI application within another server. Add tutorial for this to the User Manual."
|
||||||
|
|
||||||
|
- title: "Support for the Pico Life reader, Kobo Wifi and HTC Aria"
|
||||||
|
|
||||||
|
- title: "Content server: Add a new --url-prefix command line option to ease the use of the server with a reverse proxy"
|
||||||
|
|
||||||
|
- title: "New social metadata plugin for Amazon that does not rely on AWS. Since Amazon broke AWS, it is recommended you upgrade to this version if you use metadata from Amazon"
|
||||||
|
|
||||||
|
- title: "Add a tweak to specify the fonts used when geenrating the default cover"
|
||||||
|
|
||||||
|
- title: "Add an output profile for generic Tablet devices"
|
||||||
|
tickets: [7289]
|
||||||
|
|
||||||
|
- title: "SONY driver: Allow sorting of collections by arbitrary field via a new tweak."
|
||||||
|
|
||||||
|
- title: "Content server: Make /mobile a little prettier"
|
||||||
|
|
||||||
|
- title: "Add button to 'Library Check' to automatically delete spurious files and folders"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "FB2 Input: Lots of love. Handle stylesheets and style attributes. Make parsinf malformed FB2 files more robust."
|
||||||
|
tickets: [7219, 7230]
|
||||||
|
|
||||||
|
- title: "Fix auto send of news to device with multiple calibre libraries. The fix means that if you have any pending news to be sent, it will be ignored after the update. Future news downloads will once again be automatically sent to the device."
|
||||||
|
|
||||||
|
- title: "MOBI Output: Conversion of super/sub scripts now handles nested tags."
|
||||||
|
tickets: [7264]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Fix parsing of XML encoding declarations."
|
||||||
|
tickets: [7328]
|
||||||
|
|
||||||
|
- title: "Pandigital (Kobo): Upload thumbnails to correct location"
|
||||||
|
tickets: [7165]
|
||||||
|
|
||||||
|
- title: "Fix auto emailed news with non asci characters in title not being deliverd to Kindle"
|
||||||
|
tickets: [7322]
|
||||||
|
|
||||||
|
- title: "Read metadata only after on import plugins have run when adding books to GUI"
|
||||||
|
tickets: [7245]
|
||||||
|
|
||||||
|
- title: "Various fixes for bugs caused by non ascii temporary paths on windows with non UTF-8 filesystem encodings"
|
||||||
|
tickets: [7288]
|
||||||
|
|
||||||
|
- title: "Various fixes/enhancements to SNB Output"
|
||||||
|
|
||||||
|
- title: "Allow Tag editor in edit metadata dialog to be used even if tags have been changed"
|
||||||
|
tickets: [7298]
|
||||||
|
|
||||||
|
- title: "Fix crash on some OS X machines when Preferences->Conversion->Output is clicked"
|
||||||
|
|
||||||
|
- title: "MOBI indexing: Fix last entry missing sometimes"
|
||||||
|
tickets: [6595]
|
||||||
|
|
||||||
|
- title: "Fix regression causing books to be deselected after sending to device"
|
||||||
|
tickets: [7271]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Fix rescaling of GIF images not working"
|
||||||
|
tickets: [7306]
|
||||||
|
|
||||||
|
- title: "Update PDF metadata/conversion libraries in windows build"
|
||||||
|
|
||||||
|
- title: "Fix timezone bug when searching on date fields"
|
||||||
|
tickets: [7300]
|
||||||
|
|
||||||
|
- title: "Fix regression that caused the viewr to crash if the main application is closed"
|
||||||
|
tickets: [7276]
|
||||||
|
|
||||||
|
- title: "Fix bug causing a spurious metadata.opf file to be written at the root of the calibre library when adding books"
|
||||||
|
|
||||||
|
- title: "Use the same title casing algorithm in all places"
|
||||||
|
|
||||||
|
- title: "Fix bulk edit of dual state boolean custom columns"
|
||||||
|
|
||||||
|
- title: "Increase image size for comics in Kindle DX profile for better conversion of comics to PDF"
|
||||||
|
|
||||||
|
- title: "Fix restore db to not dies when conflicting custom columns are encountered and report conflicting columns errors. Fix exceptions when referencing invalid _index fields."
|
||||||
|
|
||||||
|
- title: "Fix auto merge books not respecting article sort tweak"
|
||||||
|
tickets: [7147]
|
||||||
|
|
||||||
|
- title: "Linux device drivers: Fix udisks based ejecting for devices with multiple nodes"
|
||||||
|
|
||||||
|
- title: "Linux device mounting: Mount the drive with the lowest kernel name as main memory"
|
||||||
|
|
||||||
|
- title: "Fix use of numeric fields in templates"
|
||||||
|
|
||||||
|
- title: "EPUB Input: Handle EPUB files with multiple OPF files."
|
||||||
|
tickets: [7229]
|
||||||
|
|
||||||
|
- title: "Setting EPUB metadata: Fix date format. Fix language being overwritten by und when unspecified. Fix empty ISBN identifier being created"
|
||||||
|
|
||||||
|
- title: "Fix cannot delete a Series listing from List view also dismiss fetch metadata dialog when no metadata found automatically"
|
||||||
|
tickets: [7221, 7220]
|
||||||
|
|
||||||
|
- title: "Content server: Handle switch library in GUI gracefully"
|
||||||
|
|
||||||
|
- title: "calibre-server: Use cherrypy implementation of --pidfile and --daemonize"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Ming Pao"
|
||||||
|
author: "Eddie Lau"
|
||||||
|
|
||||||
|
- title: "lenta.ru"
|
||||||
|
author: "Nikolai Kotchetkov"
|
||||||
|
|
||||||
|
- title: "frazpc.pl"
|
||||||
|
author: "Tomasz Dlugosz"
|
||||||
|
|
||||||
|
- title: "Perfil and The Economic Collapse Blog"
|
||||||
|
author: "Darko Miletic"
|
||||||
|
|
||||||
|
- title: "STNN"
|
||||||
|
author: "Larry Chan"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- CubaDebate
|
||||||
|
- El Pais
|
||||||
|
- Fox News
|
||||||
|
- New Scientist
|
||||||
|
- The Economic Times of India
|
||||||
|
|
||||||
- version: 0.7.24
|
- version: 0.7.24
|
||||||
date: 2010-10-17
|
date: 2010-10-17
|
||||||
|
|
||||||
|
@ -208,6 +208,8 @@ h2.library_name {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.toplevel li a { text-decoration: none; }
|
||||||
|
|
||||||
.toplevel li img {
|
.toplevel li img {
|
||||||
vertical-align: middle;
|
vertical-align: middle;
|
||||||
margin-right: 1em;
|
margin-right: 1em;
|
||||||
@ -261,9 +263,16 @@ h2.library_name {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.category div.category-item span.href { display: none }
|
.category div.category-item a { text-decoration: none; color: inherit; }
|
||||||
|
|
||||||
#groups span.load_href { display: none }
|
#groups a.load_href {
|
||||||
|
text-decoration: none;
|
||||||
|
color: inherit;
|
||||||
|
font-size: medium;
|
||||||
|
font-weight: normal;
|
||||||
|
padding: 0;
|
||||||
|
padding-left: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
#groups h3 {
|
#groups h3 {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
@ -8,24 +8,25 @@
|
|||||||
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
||||||
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
||||||
|
|
||||||
<link rel="stylesheet" type="text/css" href="/static/browse/browse.css" />
|
<link rel="stylesheet" type="text/css" href="{prefix}/static/browse/browse.css" />
|
||||||
<link type="text/css" href="/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
<link type="text/css" href="{prefix}/static/jquery_ui/css/humanity-custom/jquery-ui-1.8.5.custom.css" rel="stylesheet" />
|
||||||
<link rel="stylesheet" type="text/css" href="/static/jquery.multiselect.css" />
|
<link rel="stylesheet" type="text/css" href="{prefix}/static/jquery.multiselect.css" />
|
||||||
|
|
||||||
<script type="text/javascript" src="/static/jquery.js"></script>
|
<script type="text/javascript" src="{prefix}/static/jquery.js"></script>
|
||||||
<script type="text/javascript" src="/static/jquery.corner.js"></script>
|
<script type="text/javascript" src="{prefix}/static/jquery.corner.js"></script>
|
||||||
|
|
||||||
<script type="text/javascript"
|
<script type="text/javascript"
|
||||||
src="/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"></script>
|
src="{prefix}/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"></script>
|
||||||
<script type="text/javascript"
|
<script type="text/javascript"
|
||||||
src="/static/jquery.multiselect.min.js"></script>
|
src="{prefix}/static/jquery.multiselect.min.js"></script>
|
||||||
|
|
||||||
|
|
||||||
<script type="text/javascript" src="/static/browse/browse.js"></script>
|
<script type="text/javascript" src="{prefix}/static/browse/browse.js"></script>
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
var sort_cookie_name = "{sort_cookie_name}";
|
var sort_cookie_name = "{sort_cookie_name}";
|
||||||
var sort_select_label = "{sort_select_label}";
|
var sort_select_label = "{sort_select_label}";
|
||||||
|
var url_prefix = "{prefix}";
|
||||||
$(document).ready(function() {{
|
$(document).ready(function() {{
|
||||||
init();
|
init();
|
||||||
{script}
|
{script}
|
||||||
@ -39,16 +40,16 @@
|
|||||||
<div id="header">
|
<div id="header">
|
||||||
<div class="area">
|
<div class="area">
|
||||||
<div class="bubble">
|
<div class="bubble">
|
||||||
<p><a href="/browse" title="Return to top level"
|
<p><a href="{prefix}/browse" title="Return to top level"
|
||||||
>→ home ←</a></p>
|
>→ home ←</a></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div id="nav-container">
|
<div id="nav-container">
|
||||||
<ul id="primary-nav">
|
<ul id="primary-nav">
|
||||||
<li><a id="nav-mobile" href="/mobile" title="A version of this website suited for mobile browsers">Mobile</a></li>
|
<li><a id="nav-mobile" href="{prefix}/mobile" title="A version of this website suited for mobile browsers">Mobile</a></li>
|
||||||
|
|
||||||
<li><a id="nav-demo" href="/old" title="The old version of this webiste">Old</a></li>
|
<li><a id="nav-demo" href="{prefix}/old" title="The old version of this webiste">Old</a></li>
|
||||||
<li><a id="nav-download" href="/opds" title="An OPDS feed based version of this website, used in special purpose applications">Feed</a></li>
|
<li><a id="nav-download" href="{prefix}/opds" title="An OPDS feed based version of this website, used in special purpose applications">Feed</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -58,7 +59,7 @@
|
|||||||
<input type="hidden" name="cmd" value="_s-xclick"></input>
|
<input type="hidden" name="cmd" value="_s-xclick"></input>
|
||||||
<input type="hidden" name="hosted_button_id" value="3028915"></input>
|
<input type="hidden" name="hosted_button_id" value="3028915"></input>
|
||||||
<input type="image"
|
<input type="image"
|
||||||
src="http://calibre-ebook.com/site_media//img/button-donate.png"
|
src="{prefix}/static/button-donate.png"
|
||||||
name="submit"></input>
|
name="submit"></input>
|
||||||
<img alt="" src="https://www.paypal.com/en_US/i/scr/pixel.gif"
|
<img alt="" src="https://www.paypal.com/en_US/i/scr/pixel.gif"
|
||||||
width="1" height="1"></img>
|
width="1" height="1"></img>
|
||||||
@ -76,7 +77,7 @@
|
|||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
<div id="search_box">
|
<div id="search_box">
|
||||||
<form name="search_form" action="/browse/search" method="get" accept-charset="UTF-8">
|
<form name="search_form" action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
|
||||||
<input value="{initial_search}" type="text" title="Search" name="query"
|
<input value="{initial_search}" type="text" title="Search" name="query"
|
||||||
class="search_input" />
|
class="search_input" />
|
||||||
<input type="submit" value="Search" title="Search" alt="Search" />
|
<input type="submit" value="Search" title="Search" alt="Search" />
|
||||||
|
@ -109,14 +109,14 @@ function toplevel_layout() {
|
|||||||
var last = $(".toplevel li").last();
|
var last = $(".toplevel li").last();
|
||||||
var title = $('.toplevel h3').first();
|
var title = $('.toplevel h3').first();
|
||||||
var bottom = last.position().top + last.height() - title.position().top;
|
var bottom = last.position().top + last.height() - title.position().top;
|
||||||
$("#main").height(Math.max(200, bottom));
|
$("#main").height(Math.max(200, bottom+75));
|
||||||
}
|
}
|
||||||
|
|
||||||
function toplevel() {
|
function toplevel() {
|
||||||
$(".sort_select").hide();
|
$(".sort_select").hide();
|
||||||
|
|
||||||
$(".toplevel li").click(function() {
|
$(".toplevel li").click(function() {
|
||||||
var href = $(this).children("span.url").text();
|
var href = $(this).children("a").attr('href');
|
||||||
window.location = href;
|
window.location = href;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -133,7 +133,7 @@ function render_error(msg) {
|
|||||||
// Category feed {{{
|
// Category feed {{{
|
||||||
|
|
||||||
function category_clicked() {
|
function category_clicked() {
|
||||||
var href = $(this).find("span.href").html();
|
var href = $(this).find("a").attr('href');
|
||||||
window.location = href;
|
window.location = href;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,11 +151,12 @@ function category() {
|
|||||||
|
|
||||||
change: function(event, ui) {
|
change: function(event, ui) {
|
||||||
if (ui.newContent) {
|
if (ui.newContent) {
|
||||||
var href = ui.newContent.children("span.load_href").html();
|
var href = ui.newContent.prev().children("a.load_href").attr('href');
|
||||||
ui.newContent.children(".loading").show();
|
ui.newContent.children(".loading").show();
|
||||||
if (href) {
|
if (href) {
|
||||||
$.ajax({
|
$.ajax({
|
||||||
url:href,
|
url:href,
|
||||||
|
cache: false,
|
||||||
data:{'sort':cookie(sort_cookie_name)},
|
data:{'sort':cookie(sort_cookie_name)},
|
||||||
success: function(data) {
|
success: function(data) {
|
||||||
this.children(".loaded").html(data);
|
this.children(".loaded").html(data);
|
||||||
@ -212,6 +213,7 @@ function load_page(elem) {
|
|||||||
url: href,
|
url: href,
|
||||||
context: elem,
|
context: elem,
|
||||||
dataType: "json",
|
dataType: "json",
|
||||||
|
cache : false,
|
||||||
type: 'POST',
|
type: 'POST',
|
||||||
timeout: 600000, //milliseconds (10 minutes)
|
timeout: 600000, //milliseconds (10 minutes)
|
||||||
data: {'ids': ids},
|
data: {'ids': ids},
|
||||||
@ -255,7 +257,7 @@ function booklist(hide_sort) {
|
|||||||
function show_details(a_dom) {
|
function show_details(a_dom) {
|
||||||
var book = $(a_dom).closest('div.summary');
|
var book = $(a_dom).closest('div.summary');
|
||||||
var bd = $('#book_details_dialog');
|
var bd = $('#book_details_dialog');
|
||||||
bd.html('<span class="loading"><img src="/static/loading.gif" alt="Loading" />Loading, please wait…</span>');
|
bd.html('<span class="loading"><img src="'+url_prefix+'/static/loading.gif" alt="Loading" />Loading, please wait…</span>');
|
||||||
bd.dialog('option', 'width', $(window).width() - 100);
|
bd.dialog('option', 'width', $(window).width() - 100);
|
||||||
bd.dialog('option', 'height', $(window).height() - 100);
|
bd.dialog('option', 'height', $(window).height() - 100);
|
||||||
bd.dialog('option', 'title', book.find('.title').text());
|
bd.dialog('option', 'title', book.find('.title').text());
|
||||||
@ -263,6 +265,7 @@ function show_details(a_dom) {
|
|||||||
$.ajax({
|
$.ajax({
|
||||||
url: book.find('.details-href').attr('title'),
|
url: book.find('.details-href').attr('title'),
|
||||||
context: bd,
|
context: bd,
|
||||||
|
cache: false,
|
||||||
dataType: "json",
|
dataType: "json",
|
||||||
timeout: 600000, //milliseconds (10 minutes)
|
timeout: 600000, //milliseconds (10 minutes)
|
||||||
error: function(xhr, stat, err) {
|
error: function(xhr, stat, err) {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<div id="details_{id}" class="details">
|
<div id="details_{id}" class="details">
|
||||||
<div class="left">
|
<div class="left">
|
||||||
<img alt="Cover of {title}" src="/get/cover/{id}" />
|
<img alt="Cover of {title}" src="{prefix}/get/cover/{id}" />
|
||||||
</div>
|
</div>
|
||||||
<div class="right">
|
<div class="right">
|
||||||
<div class="field formats">{formats}</div>
|
<div class="field formats">{formats}</div>
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<div id="summary_{id}" class="summary">
|
<div id="summary_{id}" class="summary">
|
||||||
<div class="left">
|
<div class="left">
|
||||||
<img alt="Cover of {title}" src="/get/thumb_90_120/{id}" />
|
<img alt="Cover of {title}" src="{prefix}/get/thumb_90_120/{id}" />
|
||||||
{get_button}
|
{get_button}
|
||||||
</div>
|
</div>
|
||||||
<div class="right">
|
<div class="right">
|
||||||
@ -8,7 +8,7 @@
|
|||||||
<span class="rating_container">{stars}</span>
|
<span class="rating_container">{stars}</span>
|
||||||
<span class="series">{series}</span>
|
<span class="series">{series}</span>
|
||||||
<a href="#" onclick="show_details(this); return false;" title="{details_tt}">{details}</a>
|
<a href="#" onclick="show_details(this); return false;" title="{details_tt}">{details}</a>
|
||||||
<a href="/browse/book/{id}" title="{permalink_tt}">{permalink}</a>
|
<a href="{prefix}/browse/book/{id}" title="{permalink_tt}">{permalink}</a>
|
||||||
</div>
|
</div>
|
||||||
<div class="title"><strong>{title}</strong></div>
|
<div class="title"><strong>{title}</strong></div>
|
||||||
<div class="authors">{authors}</div>
|
<div class="authors">{authors}</div>
|
||||||
|
BIN
resources/content_server/button-donate.png
Normal file
BIN
resources/content_server/button-donate.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
@ -40,7 +40,7 @@ function create_table_headers() {
|
|||||||
|
|
||||||
|
|
||||||
function format_url(format, id, title) {
|
function format_url(format, id, title) {
|
||||||
return 'get/'+format.toLowerCase() + '/'+encodeURIComponent(title) + '_' + id+'.'+format.toLowerCase();
|
return url_prefix + '/get/'+format.toLowerCase() + '/'+encodeURIComponent(title) + '_' + id+'.'+format.toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
function render_book(book) {
|
function render_book(book) {
|
||||||
@ -101,7 +101,7 @@ function render_book(book) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
title += '</span>'
|
title += '</span>'
|
||||||
title += '<img style="display:none" alt="" src="get/cover/{0}" /></span>'.format(id);
|
title += '<img style="display:none" alt="" src="{1}/get/cover/{0}" /></span>'.format(id, url_prefix);
|
||||||
title += '<div class="comments">{0}</div>'.format(comments)
|
title += '<div class="comments">{0}</div>'.format(comments)
|
||||||
// Render authors cell
|
// Render authors cell
|
||||||
var _authors = new Array();
|
var _authors = new Array();
|
||||||
|
@ -3,26 +3,27 @@
|
|||||||
<html xmlns="http://www.w3.org/1999/xhtml" version="XHTML 1.1" xml:lang="en">
|
<html xmlns="http://www.w3.org/1999/xhtml" version="XHTML 1.1" xml:lang="en">
|
||||||
<head>
|
<head>
|
||||||
<title>calibre library</title>
|
<title>calibre library</title>
|
||||||
<link rel="stylesheet" type="text/css" href="/static/gui.css" charset="utf-8" />
|
<script type="text/javascript">var url_prefix='{prefix}';</script>
|
||||||
<script type="text/javascript" src="/static/date.js" charset="utf-8"></script>
|
<link rel="stylesheet" type="text/css" href="{prefix}/static/gui.css" charset="utf-8" />
|
||||||
<script type="text/javascript" src="/static/jquery.js" charset="utf-8"></script>
|
<script type="text/javascript" src="{prefix}/static/date.js" charset="utf-8"></script>
|
||||||
<script type="text/javascript" src="/static/gui.js" charset="utf-8"></script>
|
<script type="text/javascript" src="{prefix}/static/jquery.js" charset="utf-8"></script>
|
||||||
|
<script type="text/javascript" src="{prefix}/static/gui.js" charset="utf-8"></script>
|
||||||
<link rel="icon" href="http://calibre-ebook.com/favicon.ico" type="image/x-icon" />
|
<link rel="icon" href="http://calibre-ebook.com/favicon.ico" type="image/x-icon" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="banner">
|
<div id="banner">
|
||||||
<a style="border: 0pt" href="http://calibre-ebook.com" alt="calibre" title="calibre"><img style="border:0pt" src="/static/calibre_banner.png" alt="calibre" /></a>
|
<a style="border: 0pt" href="http://calibre-ebook.com" alt="calibre" title="calibre"><img style="border:0pt" src="{prefix}/static/calibre_banner.png" alt="calibre" /></a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="search_box">
|
<div id="search_box">
|
||||||
<form name="search_form" onsubmit="search();return false;" action="./" method="get" accept-charset="UTF-8">
|
<form name="search_form" onsubmit="search();return false;" action="{prefix}/old" method="get" accept-charset="UTF-8">
|
||||||
<input value="" id="s" type="text" />
|
<input value="" id="s" type="text" />
|
||||||
<input type="image" src="/static/btn_search_box.png" width="27" height="24" id="go" alt="Search" title="Search" />
|
<input type="image" src="{prefix}/static/btn_search_box.png" width="27" height="24" id="go" alt="Search" title="Search" />
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="count_bar">
|
<div id="count_bar">
|
||||||
<span id="left"><img src="/static/first.png" alt="Show first set of books" title="Show first set of books"/> <img src="/static/previous.png" alt="Show previous set of books" title="Show previous set of books"/> </span><span id="count"> </span> <span id="right"><img src="/static/next.png" alt="Show next set of books" title="Show next set of books"/> <img src="/static/last.png" alt="Show last set of books" title="Show last set of books" /></span>
|
<span id="left"><img src="{prefix}/static/first.png" alt="Show first set of books" title="Show first set of books"/> <img src="{prefix}/static/previous.png" alt="Show previous set of books" title="Show previous set of books"/> </span><span id="count"> </span> <span id="right"><img src="{prefix}/static/next.png" alt="Show next set of books" title="Show next set of books"/> <img src="{prefix}/static/last.png" alt="Show last set of books" title="Show last set of books" /></span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="main">
|
<div id="main">
|
||||||
@ -38,7 +39,7 @@
|
|||||||
|
|
||||||
<div id="loading">
|
<div id="loading">
|
||||||
<div>
|
<div>
|
||||||
<img align="top" src="/static/loading.gif" alt="Loading..." title="Loading..."/> <span id="loading_msg">Loading…</span>
|
<img align="top" src="{prefix}/static/loading.gif" alt="Loading..." title="Loading..."/> <span id="loading_msg">Loading…</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
/* CSS for the mobile version of the content server webpage */
|
/* CSS for the mobile version of the content server webpage */
|
||||||
|
|
||||||
|
.body {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
.navigation table.buttons {
|
.navigation table.buttons {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
}
|
}
|
||||||
@ -53,6 +57,7 @@ div.navigation {
|
|||||||
}
|
}
|
||||||
#listing td {
|
#listing td {
|
||||||
padding: 0.25em;
|
padding: 0.25em;
|
||||||
|
vertical-align: middle;
|
||||||
}
|
}
|
||||||
|
|
||||||
#listing td.thumbnail {
|
#listing td.thumbnail {
|
||||||
@ -73,6 +78,7 @@ div.navigation {
|
|||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
|
vertical-align: middle;
|
||||||
}
|
}
|
||||||
|
|
||||||
#logo {
|
#logo {
|
||||||
@ -83,4 +89,17 @@ div.navigation {
|
|||||||
clear: both;
|
clear: both;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.data-container {
|
||||||
|
display: inline-block;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
.first-line {
|
||||||
|
font-size: larger;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.second-line {
|
||||||
|
margin-top: 0.75ex;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
3385
resources/content_server/read/monocle.js
Normal file
3385
resources/content_server/read/monocle.js
Normal file
File diff suppressed because it is too large
Load Diff
@ -106,7 +106,8 @@ title_sort_articles=r'^(A|The|An)\s+'
|
|||||||
auto_connect_to_folder = ''
|
auto_connect_to_folder = ''
|
||||||
|
|
||||||
|
|
||||||
# Specify renaming rules for sony collections. Collections on Sonys are named
|
# Specify renaming rules for sony collections. This tweak is only applicable if
|
||||||
|
# metadata management is set to automatic. Collections on Sonys are named
|
||||||
# depending upon whether the field is standard or custom. A collection derived
|
# depending upon whether the field is standard or custom. A collection derived
|
||||||
# from a standard field is named for the value in that field. For example, if
|
# from a standard field is named for the value in that field. For example, if
|
||||||
# the standard 'series' column contains the name 'Darkover', then the series
|
# the standard 'series' column contains the name 'Darkover', then the series
|
||||||
@ -137,6 +138,24 @@ auto_connect_to_folder = ''
|
|||||||
sony_collection_renaming_rules={}
|
sony_collection_renaming_rules={}
|
||||||
|
|
||||||
|
|
||||||
|
# Specify how sony collections are sorted. This tweak is only applicable if
|
||||||
|
# metadata management is set to automatic. You can indicate which metadata is to
|
||||||
|
# be used to sort on a collection-by-collection basis. The format of the tweak
|
||||||
|
# is a list of metadata fields from which collections are made, followed by the
|
||||||
|
# name of the metadata field containing the sort value.
|
||||||
|
# Example: The following indicates that collections built from pubdate and tags
|
||||||
|
# are to be sorted by the value in the custom column '#mydate', that collections
|
||||||
|
# built from 'series' are to be sorted by 'series_index', and that all other
|
||||||
|
# collections are to be sorted by title. If a collection metadata field is not
|
||||||
|
# named, then if it is a series- based collection it is sorted by series order,
|
||||||
|
# otherwise it is sorted by title order.
|
||||||
|
# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')]
|
||||||
|
# Note that the bracketing and parentheses are required. The syntax is
|
||||||
|
# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ]
|
||||||
|
# Default: empty (no rules), so no collection attributes are named.
|
||||||
|
sony_collection_sorting_rules = []
|
||||||
|
|
||||||
|
|
||||||
# Create search terms to apply a query across several built-in search terms.
|
# Create search terms to apply a query across several built-in search terms.
|
||||||
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
|
# Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
|
||||||
# Example: create the term 'myseries' that when used as myseries:foo would
|
# Example: create the term 'myseries' that when used as myseries:foo would
|
||||||
@ -184,3 +203,11 @@ content_server_wont_display = ['']
|
|||||||
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
|
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
|
||||||
maximum_resort_levels = 5
|
maximum_resort_levels = 5
|
||||||
|
|
||||||
|
# Absolute path to a TTF font file to use as the font for the title and author
|
||||||
|
# when generating a default cover. Useful if the default font (Liberation
|
||||||
|
# Serif) does not contain glyphs for the language of the books in your library.
|
||||||
|
generate_cover_title_font = None
|
||||||
|
|
||||||
|
# Absolute path to a TTF font file to use as the font for the footer in the
|
||||||
|
# default cover
|
||||||
|
generate_cover_foot_font = None
|
||||||
|
BIN
resources/images/news/perfil.png
Normal file
BIN
resources/images/news/perfil.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 781 B |
BIN
resources/images/news/theecocolapse.png
Normal file
BIN
resources/images/news/theecocolapse.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
||||||
title = self.tag_to_string(poem.find('h4'))
|
title = self.tag_to_string(poem.find('h4'))
|
||||||
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
||||||
url = 'http://www.theatlantic.com'+poem.find('a')['href']
|
url = poem.find('a')['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.theatlantic.com' + url
|
||||||
self.log('\tFound article:', title, 'at', url)
|
self.log('\tFound article:', title, 'at', url)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
poems.append({'title':title, 'url':url, 'description':desc,
|
poems.append({'title':title, 'url':url, 'description':desc,
|
||||||
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
if div is not None:
|
if div is not None:
|
||||||
self.log('Found section: Advice')
|
self.log('Found section: Advice')
|
||||||
title = self.tag_to_string(div.find('h4'))
|
title = self.tag_to_string(div.find('h4'))
|
||||||
url = 'http://www.theatlantic.com'+div.find('a')['href']
|
url = div.find('a')['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.theatlantic.com' + url
|
||||||
desc = self.tag_to_string(div.find('p'))
|
desc = self.tag_to_string(div.find('p'))
|
||||||
self.log('\tFound article:', title, 'at', url)
|
self.log('\tFound article:', title, 'at', url)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
|
@ -1,37 +1,37 @@
|
|||||||
import datetime
|
import datetime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1286242553(BasicNewsRecipe):
|
class AdvancedUserRecipe1286242553(BasicNewsRecipe):
|
||||||
title = u'CACM'
|
title = u'CACM'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')]
|
feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')]
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'jonmisurda'
|
__author__ = 'jonmisurda'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \
|
dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \
|
||||||
'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']})
|
'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']})
|
||||||
]
|
]
|
||||||
cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
|
cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('https://cacm.acm.org/login')
|
br.open('https://cacm.acm.org/login')
|
||||||
br.select_form(nr=1)
|
br.select_form(nr=1)
|
||||||
br['current_member[user]'] = self.username
|
br['current_member[user]'] = self.username
|
||||||
br['current_member[passwd]'] = self.password
|
br['current_member[passwd]'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month))
|
soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month))
|
||||||
cover_item = soup.find('img',attrs={'alt':'magazine cover image'})
|
cover_item = soup.find('img',attrs={'alt':'magazine cover image'})
|
||||||
if cover_item:
|
if cover_item:
|
||||||
cover_url = cover_item['src']
|
cover_url = cover_item['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
43
resources/recipes/calcalist.recipe
Normal file
43
resources/recipes/calcalist.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import re
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
|
description = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.'
|
||||||
|
cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG'
|
||||||
|
title = u'Calcalist'
|
||||||
|
language = 'he'
|
||||||
|
__author__ = 'marbs'
|
||||||
|
extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_attributes = ['width']
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
|
||||||
|
remove_tags = [dict(name='p', attrs={'text':[' ']})]
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'),
|
||||||
|
(u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
|
||||||
|
(u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
|
||||||
|
(u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'),
|
||||||
|
(u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'),
|
||||||
|
(u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'),
|
||||||
|
(u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'),
|
||||||
|
(u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'),
|
||||||
|
(u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'),
|
||||||
|
(u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'),
|
||||||
|
(u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'),
|
||||||
|
(u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'),
|
||||||
|
(u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
split1 = url.split("-")
|
||||||
|
print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
|
||||||
|
return print_url
|
50
resources/recipes/clic_rbs.recipe
Normal file
50
resources/recipes/clic_rbs.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ClicRBS(BasicNewsRecipe):
|
||||||
|
title = u'ClicRBS'
|
||||||
|
language = 'pt'
|
||||||
|
__author__ = 'arvoredo'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 9
|
||||||
|
cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'descricao'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'coluna'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'extra'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
|
||||||
|
remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
|
||||||
|
remove_tags_after = dict(name='ul', attrs={'class':'lista'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
|
||||||
|
, (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
|
||||||
|
, (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
|
||||||
|
, (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
|
||||||
|
, (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
|
||||||
|
, (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
|
||||||
|
, (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
|
||||||
|
, (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
|
||||||
|
, (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
|
||||||
|
, (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
|
||||||
|
, (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
|
||||||
|
, (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
|
||||||
|
, (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
|
||||||
|
, (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
|
||||||
|
, (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
|
||||||
|
, (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
cite{color:#007BB5; font-size:xx-small; font-style:italic;}
|
||||||
|
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
|
h3{font-size:large; color:#082963; font-weight:bold;}
|
||||||
|
#ident{color:#0179B4; font-size:xx-small;}
|
||||||
|
p{color:#000000;font-weight:normal;}
|
||||||
|
.commentario p{color:#007BB5; font-style:italic;}
|
||||||
|
'''
|
44
resources/recipes/cm_journal.recipe
Normal file
44
resources/recipes/cm_journal.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CMJornal_pt(BasicNewsRecipe):
|
||||||
|
title = 'Correio da Manha - Portugal'
|
||||||
|
__author__ = 'jmst'
|
||||||
|
description = 'As noticias de Portugal e do Mundo'
|
||||||
|
publisher = 'Cofina Media'
|
||||||
|
category = ''
|
||||||
|
oldest_article = 1
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'pt'
|
||||||
|
extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name=['h2','h1'])
|
||||||
|
, dict(name='div', attrs={'class': ['news']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','embed','iframe'])
|
||||||
|
,dict(name='a',attrs={'href':['#']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
|
||||||
|
,(u'Portugal' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010' )
|
||||||
|
,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
|
||||||
|
,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
|
||||||
|
,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012' )
|
||||||
|
,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('noticia.aspx', 'Imprimir.aspx')
|
||||||
|
|
@ -1,9 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
newyorker.com
|
cubadebate.cu
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
@ -13,32 +11,44 @@ class CubaDebate(BasicNewsRecipe):
|
|||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Contra el Terorismo Mediatico'
|
description = 'Contra el Terorismo Mediatico'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
publisher = 'Cubadebate'
|
publisher = 'Cubadebate'
|
||||||
category = 'news, politics, Cuba'
|
category = 'news, politics, Cuba'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
extra_css = """
|
||||||
|
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||||
|
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : 'es'
|
,'language' : language
|
||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
,'pretty_print': True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
||||||
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [
|
||||||
|
dict(name=['link','base','embed','object','meta','iframe'])
|
||||||
|
,dict(attrs={'id':'addthis_container'})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||||
|
remove_attributes=['width','height','lang']
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'print/'
|
return url + 'print/'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
77
resources/recipes/el_faro.recipe
Normal file
77
resources/recipes/el_faro.recipe
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElFaroDeVigo(BasicNewsRecipe):
|
||||||
|
title = u'El Faro de Vigo'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'Jefferson Frantz'
|
||||||
|
description = 'Noticias de Vigo'
|
||||||
|
timefmt = ' [%d %b, %Y]'
|
||||||
|
language = 'es'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
## (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
|
||||||
|
## (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
|
||||||
|
(u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
|
||||||
|
(u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
|
||||||
|
(u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
|
||||||
|
## (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
|
||||||
|
(u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
|
||||||
|
(u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
|
||||||
|
(u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
|
||||||
|
(u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
|
||||||
|
(u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
|
||||||
|
(u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
|
||||||
|
(u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
|
||||||
|
(u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
|
||||||
|
|
||||||
|
extra_css = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
|
||||||
|
h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
|
||||||
|
h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
|
||||||
|
.enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
|
||||||
|
.noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
|
||||||
|
url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
divs = soup.findAll(True, {'class':'enlacenegrita10'})
|
||||||
|
for div in divs:
|
||||||
|
div['align'] = 'left'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','script','ul','iframe','ol'])
|
||||||
|
,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
|
||||||
|
,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
|
__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
|
description = 'Main daily newspaper from Spain - v1.04 (19, October 2010)'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -32,19 +32,16 @@ class ElPais(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
|
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
p{style:normal size:12 serif}
|
|
||||||
|
|
||||||
'''
|
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
|
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
|
||||||
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
|
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos estirar','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
|
||||||
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
|
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
|
||||||
dict(name='p', attrs={'class':'nav_meses'}),
|
dict(name='p', attrs={'class':'nav_meses'}),
|
||||||
dict(attrs={'class':['enlaces_m','miniaturas_m']})
|
dict(attrs={'class':['enlaces_m','miniaturas_m','nav_miniaturas_m']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elperiodico.cat
|
elperiodico.cat
|
||||||
'''
|
'''
|
||||||
@ -12,8 +12,8 @@ from calibre.ebooks.BeautifulSoup import Tag
|
|||||||
|
|
||||||
class ElPeriodico_cat(BasicNewsRecipe):
|
class ElPeriodico_cat(BasicNewsRecipe):
|
||||||
title = 'El Periodico de Catalunya'
|
title = 'El Periodico de Catalunya'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Jordi Balcells/Darko Miletic'
|
||||||
description = 'Noticias desde Catalunya'
|
description = 'Noticies des de Catalunya'
|
||||||
publisher = 'elperiodico.cat'
|
publisher = 'elperiodico.cat'
|
||||||
category = 'news, politics, Spain, Catalunya'
|
category = 'news, politics, Spain, Catalunya'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
@ -33,15 +33,25 @@ class ElPeriodico_cat(BasicNewsRecipe):
|
|||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
|
feeds = [(u'Portada', u'http://www.elperiodico.cat/ca/rss/rss_portada.xml'),
|
||||||
|
(u'Internacional', u'http://www.elperiodico.cat/ca/rss/internacional/rss.xml'),
|
||||||
|
(u'Societat', u'http://www.elperiodico.cat/ca/rss/societat/rss.xml'),
|
||||||
|
(u'Ci\xe8ncia i tecnologia', u'http://www.elperiodico.cat/ca/rss/ciencia-i-tecnologia/rss.xml'),
|
||||||
|
(u'Esports', u'http://www.elperiodico.cat/ca/rss/esports/rss.xml'),
|
||||||
|
(u'Gent', u'http://www.elperiodico.cat/ca/rss/gent/rss.xml'),
|
||||||
|
(u'Opini\xf3', u'http://www.elperiodico.cat/ca/rss/opinio/rss.xml'),
|
||||||
|
(u'Pol\xedtica', u'http://www.elperiodico.cat/ca/rss/politica/rss.xml'),
|
||||||
|
(u'Barcelona', u'http://www.elperiodico.cat/ca/rss/barcelona/rss.xml'),
|
||||||
|
(u'Economia', u'http://www.elperiodico.cat/ca/rss/economia/rss.xml'),
|
||||||
|
(u'Cultura i espectacles', u'http://www.elperiodico.cat/ca/rss/cultura-i-espectacles/rss.xml'),
|
||||||
|
(u'Tele', u'http://www.elperiodico.cat/ca/rss/tele/rss.xml')]
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'titularnoticia'}),
|
||||||
|
dict(name='div', attrs={'class':'noticia_completa'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [dict(name='div', attrs={'class':['opcionb','opcionb last','columna_noticia']}),
|
||||||
dict(name=['object','link','script'])
|
dict(name='span', attrs={'class':'opcionesnoticia'})
|
||||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
|
||||||
,dict(name='div', attrs={'id':'inferiores'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
@ -2,17 +2,17 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elperiodico.com
|
elperiodico.cat
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class ElPeriodico_esp(BasicNewsRecipe):
|
class ElPeriodico_cat(BasicNewsRecipe):
|
||||||
title = 'El Periodico de Catalunya'
|
title = 'El Periodico de Catalunya'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Jordi Balcells/Darko Miletic'
|
||||||
description = 'Noticias desde Catalunya'
|
description = 'Noticias desde Catalunya'
|
||||||
publisher = 'elperiodico.com'
|
publisher = 'elperiodico.com'
|
||||||
category = 'news, politics, Spain, Catalunya'
|
category = 'news, politics, Spain, Catalunya'
|
||||||
@ -33,15 +33,25 @@ class ElPeriodico_esp(BasicNewsRecipe):
|
|||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
|
feeds = [(u'Portada', u'http://www.elperiodico.com/es/rss/rss_portada.xml'),
|
||||||
|
(u'Internacional', u'http://elperiodico.com/es/rss/internacional/rss.xml'),
|
||||||
|
(u'Sociedad', u'http://elperiodico.com/es/rss/sociedad/rss.xml'),
|
||||||
|
(u'Ciencia y Tecnolog\xeda', u'http://elperiodico.com/es/rss/ciencia-y-tecnologia/rss.xml'),
|
||||||
|
(u'Deportes', u'http://elperiodico.com/es/rss/deportes/rss.xml'),
|
||||||
|
(u'Gente', u'http://elperiodico.com/es/rss/gente/rss.xml'),
|
||||||
|
(u'Opini\xf3n', u'http://elperiodico.com/es/rss/opinion/rss.xml'),
|
||||||
|
(u'Pol\xedtica', u'http://elperiodico.com/es/rss/politica/rss.xml'),
|
||||||
|
(u'Barcelona', u'http://elperiodico.com/es/rss/barcelona/rss.xml'),
|
||||||
|
(u'Econom\xeda', u'http://elperiodico.com/es/rss/economia/rss.xml'),
|
||||||
|
(u'Cultura y espect\xe1culos', u'http://elperiodico.com/es/rss/cultura-y-espectaculos/rss.xml'),
|
||||||
|
(u'Tele', u'http://elperiodico.com/es/rss/cultura-y-espectaculos/rss.xml')]
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'titularnoticia'}),
|
||||||
|
dict(name='div', attrs={'class':'noticia_completa'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [dict(name='div', attrs={'class':['opcionb','opcionb last','columna_noticia']}),
|
||||||
dict(name=['object','link','script'])
|
dict(name='span', attrs={'class':'opcionesnoticia'})
|
||||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
|
||||||
,dict(name='div', attrs={'id':'inferiores'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
foxnews.com
|
foxnews.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class FoxNews(BasicNewsRecipe):
|
class FoxNews(BasicNewsRecipe):
|
||||||
@ -21,11 +20,10 @@ class FoxNews(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} .caption{font-size: x-small} '
|
extra_css = """
|
||||||
|
body{font-family: Arial,sans-serif }
|
||||||
preprocess_regexps = [
|
.caption{font-size: x-small}
|
||||||
(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
"""
|
||||||
]
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -34,27 +32,15 @@ class FoxNews(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_attributes = ['xmlns']
|
remove_attributes = ['xmlns','lang']
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id' :['story','browse-story-content']})
|
|
||||||
,dict(name='div', attrs={'class':['posts articles','slideshow']})
|
|
||||||
,dict(name='h4' , attrs={'class':'storyDate'})
|
|
||||||
,dict(name='h1' , attrs={'xmlns:functx':'http://www.functx.com'})
|
|
||||||
,dict(name='div', attrs={'class':'authInfo'})
|
|
||||||
,dict(name='div', attrs={'id':'articleCont'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['share-links','quigo quigo2','share-text','storyControls','socShare','btm-links']})
|
dict(name=['object','embed','link','script','iframe','meta','base'])
|
||||||
,dict(name='div', attrs={'id' :['otherMedia','loomia_display','img-all-path','story-vcmId','story-url','pane-browse-story-comments','story_related']})
|
,dict(attrs={'class':['user-control','url-description','ad-context']})
|
||||||
,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2','tabs']})
|
|
||||||
,dict(name='a' , attrs={'class':'join-discussion'})
|
|
||||||
,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2']})
|
|
||||||
,dict(name='p' , attrs={'class':'see_fullarchive'})
|
|
||||||
,dict(name=['object','embed','link','script'])
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_tags_before=dict(name='h1')
|
||||||
|
remove_tags_after =dict(attrs={'class':'url-description'})
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest' )
|
(u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest' )
|
||||||
@ -67,8 +53,5 @@ class FoxNews(BasicNewsRecipe):
|
|||||||
,(u'Entertainment' , u'http://feeds.foxnews.com/foxnews/entertainment' )
|
,(u'Entertainment' , u'http://feeds.foxnews.com/foxnews/entertainment' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def print_version(self, url):
|
||||||
for item in soup.findAll(style=True):
|
return url + 'print'
|
||||||
del item['style']
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
|
||||||
|
41
resources/recipes/gamespot.recipe
Normal file
41
resources/recipes/gamespot.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = u'Marc T\xf6nsing'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class GamespotCom(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Gamespot.com Reviews'
|
||||||
|
description = 'review articles from gamespot.com'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = u'Marc T\xf6nsing'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
no_javascript = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
|
||||||
|
('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
|
||||||
|
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
|
||||||
|
('PlayStation 3 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1028'),
|
||||||
|
('PlayStation 2 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=7'),
|
||||||
|
('PlayStation Portable Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1024'),
|
||||||
|
('Nintendo DS Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1026'),
|
||||||
|
('iPhone Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1049'),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':'top_bar'}),
|
||||||
|
dict(name='div', attrs={'class':'video_embed'})
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return 'http://image.gamespotcdn.net/gamespot/shared/gs5/gslogo_bw.gif'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('link') + '?print=1'
|
||||||
|
|
||||||
|
|
177
resources/recipes/lenta_ru.recipe
Normal file
177
resources/recipes/lenta_ru.recipe
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
'''
|
||||||
|
Lenta.ru
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class LentaRURecipe(BasicNewsRecipe):
|
||||||
|
title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438'
|
||||||
|
__author__ = 'Nikolai Kotchetkov'
|
||||||
|
publisher = 'lenta.ru'
|
||||||
|
category = 'news, Russia'
|
||||||
|
description = u'''\u0415\u0436\u0435\u0434\u043d\u0435\u0432\u043d\u0430\u044f
|
||||||
|
\u0438\u043d\u0442\u0435\u0440\u043d\u0435\u0442-\u0433\u0430\u0437\u0435\u0442\u0430.
|
||||||
|
\u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u0441\u043e
|
||||||
|
\u0432\u0441\u0435\u0433\u043e \u043c\u0438\u0440\u0430 \u043d\u0430
|
||||||
|
\u0440\u0443\u0441\u0441\u043a\u043e\u043c
|
||||||
|
\u044f\u0437\u044b\u043a\u0435'''
|
||||||
|
description = u'Ежедневная интернет-газета. Новости со всего мира на русском языке'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
masthead_url = u'http://img.lenta.ru/i/logowrambler.gif'
|
||||||
|
cover_url = u'http://img.lenta.ru/i/logowrambler.gif'
|
||||||
|
|
||||||
|
#Add feed names if you want them to be sorted (feeds of this list appear first)
|
||||||
|
sortOrder = [u'_default', u'В России', u'б.СССР', u'В мире']
|
||||||
|
|
||||||
|
encoding = 'cp1251'
|
||||||
|
language = 'ru'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
recursions = 0
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='td', attrs={'class':['statya','content']})]
|
||||||
|
|
||||||
|
remove_tags_after = [dict(name='p', attrs={'class':'links'}), dict(name='div', attrs={'id':'readers-block'})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='table', attrs={'class':['vrezka','content']}), dict(name='div', attrs={'class':'b240'}), dict(name='div', attrs={'id':'readers-block'}), dict(name='p', attrs={'class':'links'})]
|
||||||
|
|
||||||
|
feeds = [u'http://lenta.ru/rss/']
|
||||||
|
|
||||||
|
extra_css = 'h1 {font-size: 1.2em; margin: 0em 0em 0em 0em;} h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;} h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
try:
|
||||||
|
feedData = parse(self.feeds[0])
|
||||||
|
if not feedData:
|
||||||
|
raise NotImplementedError
|
||||||
|
self.log("parse_index: Feed loaded successfully.")
|
||||||
|
if feedData.feed.has_key('title'):
|
||||||
|
self.title = feedData.feed.title
|
||||||
|
self.log("parse_index: Title updated to: ", self.title)
|
||||||
|
if feedData.feed.has_key('image'):
|
||||||
|
self.log("HAS IMAGE!!!!")
|
||||||
|
|
||||||
|
def get_virtual_feed_articles(feed):
|
||||||
|
if feeds.has_key(feed):
|
||||||
|
return feeds[feed][1]
|
||||||
|
self.log("Adding new feed: ", feed)
|
||||||
|
articles = []
|
||||||
|
feeds[feed] = (feed, articles)
|
||||||
|
return articles
|
||||||
|
|
||||||
|
feeds = {}
|
||||||
|
|
||||||
|
#Iterate feed items and distribute articles using tags
|
||||||
|
for item in feedData.entries:
|
||||||
|
link = item.get('link', '');
|
||||||
|
title = item.get('title', '');
|
||||||
|
if '' == link or '' == title:
|
||||||
|
continue
|
||||||
|
article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
|
||||||
|
if not item.has_key('tags'):
|
||||||
|
get_virtual_feed_articles('_default').append(article)
|
||||||
|
continue
|
||||||
|
for tag in item.tags:
|
||||||
|
addedToDefault = False
|
||||||
|
term = tag.get('term', '')
|
||||||
|
if '' == term:
|
||||||
|
if (not addedToDefault):
|
||||||
|
get_virtual_feed_articles('_default').append(article)
|
||||||
|
continue
|
||||||
|
get_virtual_feed_articles(term).append(article)
|
||||||
|
|
||||||
|
#Get feed list
|
||||||
|
#Select sorted feeds first of all
|
||||||
|
result = []
|
||||||
|
for feedName in self.sortOrder:
|
||||||
|
if (not feeds.has_key(feedName)): continue
|
||||||
|
result.append(feeds[feedName])
|
||||||
|
del feeds[feedName]
|
||||||
|
result = result + feeds.values()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception, err:
|
||||||
|
self.log(err)
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
#self.log('Original: ', soup.prettify())
|
||||||
|
|
||||||
|
contents = Tag(soup, 'div')
|
||||||
|
|
||||||
|
#Extract tags with given attributes
|
||||||
|
extractElements = {'div' : [{'id' : 'readers-block'}]}
|
||||||
|
|
||||||
|
#Remove all elements that were not extracted before
|
||||||
|
for tag, attrs in extractElements.iteritems():
|
||||||
|
for attr in attrs:
|
||||||
|
garbage = soup.findAll(tag, attr)
|
||||||
|
if garbage:
|
||||||
|
for pieceOfGarbage in garbage:
|
||||||
|
pieceOfGarbage.extract()
|
||||||
|
|
||||||
|
#Find article text using header
|
||||||
|
#and add all elements to contents
|
||||||
|
element = soup.find({'h1' : True, 'h2' : True})
|
||||||
|
if (element):
|
||||||
|
element.name = 'h1'
|
||||||
|
while element:
|
||||||
|
nextElement = element.nextSibling
|
||||||
|
element.extract()
|
||||||
|
contents.insert(len(contents.contents), element)
|
||||||
|
element = nextElement
|
||||||
|
|
||||||
|
#Place article date after header
|
||||||
|
dates = soup.findAll(text=re.compile('\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}'))
|
||||||
|
if dates:
|
||||||
|
for date in dates:
|
||||||
|
for string in date:
|
||||||
|
parent = date.parent
|
||||||
|
if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == parent['class']):
|
||||||
|
#Date div found
|
||||||
|
parent.extract()
|
||||||
|
parent['style'] = 'font-size: 0.5em; color: gray; font-family: monospace;'
|
||||||
|
contents.insert(1, parent)
|
||||||
|
break
|
||||||
|
|
||||||
|
#Place article picture after date
|
||||||
|
pic = soup.find('img')
|
||||||
|
if pic:
|
||||||
|
picDiv = Tag(soup, 'div')
|
||||||
|
picDiv['style'] = 'width: 100%; text-align: center;'
|
||||||
|
pic.extract()
|
||||||
|
picDiv.insert(0, pic)
|
||||||
|
title = pic.get('title', None)
|
||||||
|
if title:
|
||||||
|
titleDiv = Tag(soup, 'div')
|
||||||
|
titleDiv['style'] = 'font-size: 0.5em;'
|
||||||
|
titleDiv.insert(0, title)
|
||||||
|
picDiv.insert(1, titleDiv)
|
||||||
|
contents.insert(2, picDiv)
|
||||||
|
|
||||||
|
body = soup.find('td', {'class':['statya','content']})
|
||||||
|
if body:
|
||||||
|
body.replaceWith(contents)
|
||||||
|
|
||||||
|
#self.log('Result: ', soup.prettify())
|
||||||
|
return soup
|
||||||
|
|
@ -1,53 +1,79 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>'
|
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, Louis Gesbert <meta at antislash dot info>'
|
||||||
'''
|
'''
|
||||||
Mediapart
|
Mediapart
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re, string
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Mediapart(BasicNewsRecipe):
|
class Mediapart(BasicNewsRecipe):
|
||||||
title = 'Mediapart'
|
title = 'Mediapart'
|
||||||
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
|
__author__ = 'Mathieu Godlewski'
|
||||||
description = 'Global news in french from online newspapers'
|
description = 'Global news in french from online newspapers'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
html2lrf_options = ['--base-font-size', '10']
|
cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Les articles', 'http://www.mediapart.fr/articles/feed'),
|
('Les articles', 'http://www.mediapart.fr/articles/feed'),
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
# -- print-version has poor quality on this website, better do the conversion ourselves
|
||||||
[
|
#
|
||||||
(r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
|
# preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||||
(r'<p>Mediapart\.fr</p>', lambda match : ''),
|
# [
|
||||||
(r'<p[^>]*>[\s]*</p>', lambda match : ''),
|
# (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||||
(r'<p><a href="[^\.]+\.pdf">[^>]*</a></p>', lambda match : ''),
|
# (r'<span class=\'auteur_staff\'>[^>]+<a title=\'[^\']*\'[^>]*>([^<]*)</a>[^<]*</span>',
|
||||||
|
# lambda match : '<i>'+match.group(1)+'</i>'),
|
||||||
|
# (r'\'', lambda match: '’'),
|
||||||
|
# ]
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
# remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
|
||||||
|
# dict(name='div', attrs={'class':'print-links'}),
|
||||||
|
# dict(name='img', attrs={'src':'entete_article.png'}),
|
||||||
|
# dict(name='br') ]
|
||||||
|
#
|
||||||
|
# def print_version(self, url):
|
||||||
|
# raw = self.browser.open(url).read()
|
||||||
|
# soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||||
|
# div = soup.find('div', {'id':re.compile('node-\d+')})
|
||||||
|
# if div is None:
|
||||||
|
# return None
|
||||||
|
# article_id = string.replace(div['id'], 'node-', '')
|
||||||
|
# if article_id is None:
|
||||||
|
# return None
|
||||||
|
# return 'http://www.mediapart.fr/print/'+article_id
|
||||||
|
|
||||||
|
# -- Non-print version [dict(name='div', attrs={'class':'advert'})]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':'title'}),
|
||||||
|
dict(name='div', attrs={'class':'page_papier_detail'}),
|
||||||
]
|
]
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
|
def preprocess_html(self,soup):
|
||||||
dict(name='div', attrs={'class':'print-links'}),
|
for title in soup.findAll('div', {'class':'titre'}):
|
||||||
dict(name='img', attrs={'src':'entete_article.png'}),
|
tag = Tag(soup, 'h3')
|
||||||
]
|
title.replaceWith(tag)
|
||||||
|
tag.insert(0,title)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
# -- Handle login
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://www.mediapart.fr/')
|
||||||
|
br.select_form(nr=1)
|
||||||
|
br['name'] = self.username
|
||||||
|
br['pass'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
raw = self.browser.open(url).read()
|
|
||||||
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
|
||||||
div = soup.find('div', {'class':'node node-type-article'})
|
|
||||||
if div is None:
|
|
||||||
return None
|
|
||||||
article_id = string.replace(div['id'], 'node-', '')
|
|
||||||
if article_id is None:
|
|
||||||
return None
|
|
||||||
return 'http://www.mediapart.fr/print/'+article_id
|
|
||||||
|
61
resources/recipes/ming_pao.recipe
Normal file
61
resources/recipes/ming_pao.recipe
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Eddie Lau'
|
||||||
|
'''
|
||||||
|
modified from Singtao Toronto calibre recipe by rty
|
||||||
|
Change Log:
|
||||||
|
2010/10/31: skip repeated articles in section pages
|
||||||
|
'''
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||||
|
title = 'Ming Pao - Hong Kong'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'Eddie Lau'
|
||||||
|
description = 'Hong Kong Chinese Newspaper'
|
||||||
|
publisher = 'news.mingpao.com'
|
||||||
|
category = 'Chinese, News, Hong Kong'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'zh'
|
||||||
|
encoding = 'Big5-HKSCS'
|
||||||
|
recursions = 0
|
||||||
|
conversion_options = {'linearize_tables':True}
|
||||||
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
keep_only_tags = [dict(name='h1'),
|
||||||
|
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||||
|
|
||||||
|
def get_fetchdate(self):
|
||||||
|
dt_utc = datetime.datetime.utcnow()
|
||||||
|
# convert UTC to local hk time - at around HKT 5.30am, all news are available
|
||||||
|
dt_local = dt_utc - datetime.timedelta(-2.5/24)
|
||||||
|
return dt_local.strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def parse_section(self, url):
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in divs:
|
||||||
|
a = i.find('a', href = True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
if url not in included_urls:
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
return current_articles
|
||||||
|
|
@ -8,11 +8,11 @@ import re
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class NewScientist(BasicNewsRecipe):
|
class NewScientist(BasicNewsRecipe):
|
||||||
title = 'New Scientist - Online News'
|
title = 'New Scientist - Online News w. subscription'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Science news and science articles from New Scientist.'
|
description = 'Science news and science articles from New Scientist.'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'New Scientist'
|
publisher = 'Reed Business Information Ltd.'
|
||||||
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
|
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -21,7 +21,12 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.newscientist.com/currentcover.jpg'
|
cover_url = 'http://www.newscientist.com/currentcover.jpg'
|
||||||
masthead_url = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
|
masthead_url = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '
|
needs_subscription = 'optional'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,sans-serif}
|
||||||
|
img{margin-bottom: 0.8em}
|
||||||
|
.quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -33,15 +38,27 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open('http://www.newscientist.com/')
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('https://www.newscientist.com/user/login?redirectURL=')
|
||||||
|
br.select_form(nr=2)
|
||||||
|
br['loginId' ] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
|
||||||
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial','sharebtns']})
|
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial','sharebtns']})
|
||||||
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
,dict(name='p' , attrs={'class':['marker','infotext' ]})
|
||||||
,dict(name='meta' , attrs={'name' :'description' })
|
,dict(name='meta' , attrs={'name' :'description' })
|
||||||
,dict(name='a' , attrs={'rel' :'tag' })
|
,dict(name='a' , attrs={'rel' :'tag' })
|
||||||
|
,dict(name=['link','base','meta','iframe','object','embed'])
|
||||||
]
|
]
|
||||||
remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
|
remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
|
||||||
remove_attributes = ['height','width']
|
remove_attributes = ['height','width','lang']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
(u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' )
|
||||||
@ -62,6 +79,8 @@ class NewScientist(BasicNewsRecipe):
|
|||||||
return url + '?full=true&print=true'
|
return url + '?full=true&print=true'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(['quote','quotetext']):
|
||||||
|
item.name='p'
|
||||||
for tg in soup.findAll('a'):
|
for tg in soup.findAll('a'):
|
||||||
if tg.string == 'Home':
|
if tg.string == 'Home':
|
||||||
tg.parent.extract()
|
tg.parent.extract()
|
||||||
|
@ -5,62 +5,59 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
'''
|
'''
|
||||||
import re
|
import re, string, time
|
||||||
import time
|
from calibre import entity_to_unicode, strftime
|
||||||
from calibre import entity_to_unicode
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||||
Comment, BeautifulStoneSoup
|
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'New York Times Top Stories'
|
# set headlinesOnly to True for the headlines-only version
|
||||||
__author__ = 'GRiker'
|
headlinesOnly = True
|
||||||
language = 'en'
|
|
||||||
requires_version = (0, 7, 5)
|
|
||||||
description = 'Top Stories from the New York Times'
|
|
||||||
|
|
||||||
# List of sections typically included in Top Stories. Use a keyword from the
|
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||||
# right column in the excludeSectionKeywords[] list to skip downloading that section
|
# Otherwise, only the sections named will be included. For example,
|
||||||
sections = {
|
#
|
||||||
'arts' : 'Arts',
|
# includeSections = ['Politics','Sports']
|
||||||
'business' : 'Business',
|
#
|
||||||
'diningwine' : 'Dining & Wine',
|
# would cause only the Politics and Sports sections to be included.
|
||||||
'editorials' : 'Editorials',
|
|
||||||
'health' : 'Health',
|
|
||||||
'magazine' : 'Magazine',
|
|
||||||
'mediaadvertising' : 'Media & Advertising',
|
|
||||||
'newyorkregion' : 'New York/Region',
|
|
||||||
'oped' : 'Op-Ed',
|
|
||||||
'politics' : 'Politics',
|
|
||||||
'science' : 'Science',
|
|
||||||
'sports' : 'Sports',
|
|
||||||
'technology' : 'Technology',
|
|
||||||
'topstories' : 'Top Stories',
|
|
||||||
'travel' : 'Travel',
|
|
||||||
'us' : 'U.S.',
|
|
||||||
'world' : 'World'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add section keywords from the right column above to skip that section
|
includeSections = [] # by default, all sections included
|
||||||
# For example, to skip sections containing the word 'Sports' or 'Dining', use:
|
|
||||||
# excludeSectionKeywords = ['Sports', 'Dining']
|
# excludeSections: List of sections to exclude. If empty, all sections found will be included.
|
||||||
# Fetch only Business and Technology
|
# Otherwise, the sections named will be excluded. For example,
|
||||||
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
|
#
|
||||||
# Fetch only Top Stories
|
# excludeSections = ['Politics','Sports']
|
||||||
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
|
#
|
||||||
# By default, no sections are skipped.
|
# would cause the Politics and Sports sections to be excluded. This parameter can be used
|
||||||
excludeSectionKeywords = []
|
# in conjuction with includeSections although in most cases using one or the other, but
|
||||||
|
# not both, is sufficient.
|
||||||
|
|
||||||
|
excludeSections = []
|
||||||
|
|
||||||
# one_picture_per_article specifies that calibre should only use the first image
|
# one_picture_per_article specifies that calibre should only use the first image
|
||||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||||
# will be moved to a location between the headline and the byline.
|
# will be moved to a location between the headline and the byline.
|
||||||
# If one_picture_per_article = False, all images from the article will be included
|
# If one_picture_per_article = False, all images from the article will be included
|
||||||
|
|
||||||
# and shown in their original location.
|
# and shown in their original location.
|
||||||
one_picture_per_article = True
|
one_picture_per_article = True
|
||||||
|
|
||||||
# The maximum number of articles that will be downloaded
|
# The maximum number of articles that will be downloaded
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
|
||||||
|
if headlinesOnly:
|
||||||
|
title='New York Times Headlines'
|
||||||
|
description = 'Headlines from the New York Times'
|
||||||
|
else:
|
||||||
|
title='New York Times'
|
||||||
|
description = 'Today\'s New York Times'
|
||||||
|
|
||||||
|
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||||
|
language = 'en'
|
||||||
|
requires_version = (0, 7, 5)
|
||||||
|
|
||||||
|
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
@ -82,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'entry-response module',
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
|
'metaFootnote',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
'nextArticleLink',
|
'nextArticleLink',
|
||||||
'nextArticleLink clearfix',
|
'nextArticleLink clearfix',
|
||||||
@ -89,12 +87,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
'subNavigation clearfix',
|
re.compile('^subNavigation'),
|
||||||
'subNavigation tabContent active',
|
re.compile('^leaderboard'),
|
||||||
'subNavigation tabContent active clearfix',
|
re.compile('^module'),
|
||||||
]}),
|
]}),
|
||||||
dict(id=[
|
dict(id=[
|
||||||
'adxLeaderboard',
|
'adxLeaderboard',
|
||||||
|
'adxSponLink',
|
||||||
'archive',
|
'archive',
|
||||||
'articleExtras',
|
'articleExtras',
|
||||||
'articleInline',
|
'articleInline',
|
||||||
@ -105,87 +104,98 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'footer',
|
'footer',
|
||||||
'header',
|
'header',
|
||||||
'header_search',
|
'header_search',
|
||||||
|
'inlineBox',
|
||||||
'login',
|
'login',
|
||||||
'masthead',
|
'masthead',
|
||||||
'masthead-nav',
|
'masthead-nav',
|
||||||
'memberTools',
|
'memberTools',
|
||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
|
'readerReviews',
|
||||||
|
'readerReviewsCount',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'relatedTopics',
|
||||||
'respond',
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'toolsRight',
|
'toolsRight',
|
||||||
]),
|
]),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.headline {text-align: left;}\n \
|
extra_css = '''
|
||||||
.byline {font-family: monospace; \
|
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||||
text-align: left; \
|
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-top: 0px; \
|
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-bottom: 0px;}\n \
|
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.dateline {font-size: small; \
|
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-top: 0px; \
|
.timestamp { text-align: left; font-size: small; }
|
||||||
margin-bottom: 0px;}\n \
|
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.timestamp {font-size: small; \
|
a:link {text-decoration: none; }
|
||||||
margin-top: 0px; \
|
.articleBody { }
|
||||||
margin-bottom: 0px;}\n \
|
.authorId {text-align: left; }
|
||||||
.source {text-align: left;}\n \
|
.image {text-align: center;}
|
||||||
.image {text-align: center;}\n \
|
.source {text-align: left; }'''
|
||||||
.credit {text-align: right; \
|
|
||||||
font-size: small; \
|
|
||||||
margin-top: 0px; \
|
|
||||||
margin-bottom: 0px;}\n \
|
|
||||||
.articleBody {text-align: left;}\n \
|
|
||||||
.authorId {text-align: left; \
|
|
||||||
font-style: italic;}\n '
|
|
||||||
|
|
||||||
def dump_ans(self, ans) :
|
def filter_ans(self, ans) :
|
||||||
total_article_count = 0
|
total_article_count = 0
|
||||||
for section in ans :
|
idx = 0
|
||||||
|
idx_max = len(ans)-1
|
||||||
|
while idx <= idx_max:
|
||||||
|
if self.includeSections != []:
|
||||||
|
if ans[idx][0] not in self.includeSections:
|
||||||
|
print "SECTION NOT INCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
|
if ans[idx][0] in self.excludeSections:
|
||||||
|
print "SECTION EXCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("section %s: %d articles" % (section[0], len(section[1])) )
|
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||||
for article in section[1]:
|
for article in ans[idx][1]:
|
||||||
total_article_count += 1
|
total_article_count += 1
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||||
article['url'].encode('cp1252','replace')))
|
article['url'].encode('cp1252','replace')))
|
||||||
|
idx = idx+1
|
||||||
|
|
||||||
self.log( "Queued %d articles" % total_article_count )
|
self.log( "Queued %d articles" % total_article_count )
|
||||||
|
return ans
|
||||||
|
|
||||||
def fixChars(self,string):
|
def fixChars(self,string):
|
||||||
# Replace lsquo (\x91)
|
# Replace lsquo (\x91)
|
||||||
fixed = re.sub("\x91","‘",string)
|
fixed = re.sub("\x91","‘",string)
|
||||||
|
|
||||||
# Replace rsquo (\x92)
|
# Replace rsquo (\x92)
|
||||||
fixed = re.sub("\x92","’",fixed)
|
fixed = re.sub("\x92","’",fixed)
|
||||||
|
|
||||||
# Replace ldquo (\x93)
|
# Replace ldquo (\x93)
|
||||||
fixed = re.sub("\x93","“",fixed)
|
fixed = re.sub("\x93","“",fixed)
|
||||||
|
|
||||||
# Replace rdquo (\x94)
|
# Replace rdquo (\x94)
|
||||||
fixed = re.sub("\x94","”",fixed)
|
fixed = re.sub("\x94","”",fixed)
|
||||||
|
|
||||||
# Replace ndash (\x96)
|
# Replace ndash (\x96)
|
||||||
fixed = re.sub("\x96","–",fixed)
|
fixed = re.sub("\x96","–",fixed)
|
||||||
|
|
||||||
# Replace mdash (\x97)
|
# Replace mdash (\x97)
|
||||||
fixed = re.sub("\x97","—",fixed)
|
fixed = re.sub("\x97","—",fixed)
|
||||||
|
|
||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
try:
|
br.open('http://www.nytimes.com/auth/login')
|
||||||
br.open('http://www.nytimes.com/auth/login')
|
br.select_form(name='login')
|
||||||
br.select_form(name='login')
|
br['USERID'] = self.username
|
||||||
br['USERID'] = self.username
|
br['PASSWORD'] = self.password
|
||||||
br['PASSWORD'] = self.password
|
raw = br.submit().read()
|
||||||
br.submit()
|
if 'Please try again' in raw:
|
||||||
except:
|
raise Exception('Your username and password are incorrect')
|
||||||
self.log("\nFailed to login")
|
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
@ -213,6 +223,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
cover = None
|
cover = None
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
|
def short_title(self):
|
||||||
|
return self.title
|
||||||
|
|
||||||
def index_to_soup(self, url_or_raw, raw=False):
|
def index_to_soup(self, url_or_raw, raw=False):
|
||||||
'''
|
'''
|
||||||
OVERRIDE of class method
|
OVERRIDE of class method
|
||||||
@ -255,157 +268,184 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# Kindle TOC descriptions won't render certain characters
|
# Kindle TOC descriptions won't render certain characters
|
||||||
if description:
|
if description:
|
||||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||||
# Replace '&' with '&'
|
# Replace '&' with '&'
|
||||||
massaged = re.sub("&","&", massaged)
|
massaged = re.sub("&","&", massaged)
|
||||||
return self.fixChars(massaged)
|
return self.fixChars(massaged)
|
||||||
else:
|
else:
|
||||||
return description
|
return description
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_todays_index(self):
|
||||||
|
|
||||||
|
def feed_title(div):
|
||||||
|
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = None
|
||||||
|
ans = []
|
||||||
|
url_list = []
|
||||||
|
|
||||||
|
def handle_article(div):
|
||||||
|
a = div.find('a', href=True)
|
||||||
|
if not a:
|
||||||
|
return
|
||||||
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
if not url.startswith("http"):
|
||||||
|
return
|
||||||
|
if not url.endswith(".html"):
|
||||||
|
return
|
||||||
|
if 'podcast' in url:
|
||||||
|
return
|
||||||
|
if '/video/' in url:
|
||||||
|
return
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if url in url_list:
|
||||||
|
return
|
||||||
|
url_list.append(url)
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
description = ''
|
||||||
|
pubdate = strftime('%a, %d %b')
|
||||||
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
|
if summary:
|
||||||
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
author = ''
|
||||||
|
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||||
|
if authorAttribution:
|
||||||
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
|
else:
|
||||||
|
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||||
|
if authorAttribution:
|
||||||
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
|
feed = key if key is not None else 'Uncategorized'
|
||||||
|
if not articles.has_key(feed):
|
||||||
|
ans.append(feed)
|
||||||
|
articles[feed] = []
|
||||||
|
articles[feed].append(
|
||||||
|
dict(title=title, url=url, date=pubdate,
|
||||||
|
description=description, author=author,
|
||||||
|
content=''))
|
||||||
|
|
||||||
|
|
||||||
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
|
|
||||||
|
|
||||||
|
# Find each article
|
||||||
|
for div in soup.findAll(True,
|
||||||
|
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||||
|
|
||||||
|
if div['class'] in ['section-headline','sectionHeader']:
|
||||||
|
key = string.capwords(feed_title(div))
|
||||||
|
key = key.replace('Op-ed','Op-Ed')
|
||||||
|
key = key.replace('U.s.','U.S.')
|
||||||
|
elif div['class'] in ['story', 'story headline'] :
|
||||||
|
handle_article(div)
|
||||||
|
elif div['class'] == 'headlinesOnly multiline flush':
|
||||||
|
for lidiv in div.findAll('li'):
|
||||||
|
handle_article(lidiv)
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return self.filter_ans(ans)
|
||||||
|
|
||||||
|
def parse_headline_index(self):
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
ans = []
|
ans = []
|
||||||
|
url_list = []
|
||||||
feed = key = 'All Top Stories'
|
|
||||||
articles[key] = []
|
|
||||||
ans.append(key)
|
|
||||||
self.log("Scanning 1 section ...")
|
|
||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the outer table
|
# Fetch the content table
|
||||||
table = soup.find('table')
|
content_table = soup.find('table',{'id':'content'})
|
||||||
previousTable = table
|
if content_table is None:
|
||||||
|
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
||||||
|
return None
|
||||||
|
|
||||||
# Find the deepest table containing the stories
|
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
||||||
while True :
|
|
||||||
table = table.find('table')
|
|
||||||
if table.find(text=re.compile('top stories start')) :
|
|
||||||
previousTable = table
|
|
||||||
continue
|
|
||||||
else :
|
|
||||||
table = previousTable
|
|
||||||
break
|
|
||||||
|
|
||||||
# There are multiple subtables, find the one containing the stories
|
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||||
for block in table.findAll('table') :
|
for div_sec in td_col.findAll('div',recursive=False):
|
||||||
if block.find(text=re.compile('top stories start')) :
|
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||||
table = block
|
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
break
|
section_name = re.sub(r'^ *$','',section_name)
|
||||||
else :
|
if section_name == '':
|
||||||
continue
|
continue
|
||||||
|
section_name=string.capwords(section_name)
|
||||||
|
if section_name == 'U.s.':
|
||||||
|
section_name = 'U.S.'
|
||||||
|
elif section_name == 'Op-ed':
|
||||||
|
section_name = 'Op-Ed'
|
||||||
|
pubdate = strftime('%a, %d %b')
|
||||||
|
|
||||||
# Again there are multiple subtables, find the one containing the stories
|
search_div = div_sec
|
||||||
for storyblock in table.findAll('table') :
|
for next_tag in h6_sec_name.findNextSiblings(True):
|
||||||
if storyblock.find(text=re.compile('top stories start')) :
|
if next_tag.__class__.__name__ == 'Tag':
|
||||||
break
|
if next_tag.name == 'div':
|
||||||
else :
|
search_div = next_tag
|
||||||
continue
|
|
||||||
|
|
||||||
skipThisSection = False
|
|
||||||
todays_article_count = 0
|
|
||||||
# Within this table are <font face="times new roman, times, san serif"> entries
|
|
||||||
self.log("Fetching feed Top Stories")
|
|
||||||
for tr in storyblock.findAllNext('tr'):
|
|
||||||
if tr.find('span') is not None :
|
|
||||||
|
|
||||||
sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
|
|
||||||
'times new roman,times, sans serif',
|
|
||||||
'times new roman, times, sans serif']})
|
|
||||||
section = None
|
|
||||||
bylines = []
|
|
||||||
descriptions = []
|
|
||||||
pubdate = None
|
|
||||||
|
|
||||||
# Get the Section title
|
|
||||||
for (x,i) in enumerate(sectionblock.contents) :
|
|
||||||
skipThisSection = False
|
|
||||||
# Extract the section title
|
|
||||||
if ('Comment' in str(i.__class__)) :
|
|
||||||
if 'start(name=' in i :
|
|
||||||
section = i[i.find('=')+1:-2]
|
|
||||||
|
|
||||||
if not self.sections.has_key(section) :
|
|
||||||
skipThisSection = True
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check for excluded section
|
# Get the articles
|
||||||
if len(self.excludeSectionKeywords):
|
for h3_item in search_div.findAll('h3'):
|
||||||
key = self.sections[section]
|
byline = h3_item.h6
|
||||||
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
if byline is not None:
|
||||||
if excluded.search(key) or articles.has_key(key):
|
author = self.tag_to_string(byline,usa_alt=False)
|
||||||
skipThisSection = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get the bylines and descriptions
|
|
||||||
if not skipThisSection :
|
|
||||||
lines = sectionblock.contents
|
|
||||||
contentStrings = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if not isinstance(line, Comment) and line.strip and line.strip() > "":
|
|
||||||
contentStrings.append(line.strip())
|
|
||||||
|
|
||||||
# Gather the byline/description pairs
|
|
||||||
bylines = []
|
|
||||||
descriptions = []
|
|
||||||
for contentString in contentStrings:
|
|
||||||
if contentString[0:3] == 'By ' and contentString[3].isupper() :
|
|
||||||
bylines.append(contentString)
|
|
||||||
else:
|
else:
|
||||||
descriptions.append(contentString)
|
author = ''
|
||||||
|
a = h3_item.find('a', href=True)
|
||||||
# Fetch the article titles and URLs
|
if not a:
|
||||||
articleCount = len(sectionblock.findAll('span'))
|
continue
|
||||||
todays_article_count += articleCount
|
|
||||||
for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
|
|
||||||
a = span.find('a', href=True)
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
if not url.startswith("http"):
|
||||||
|
continue
|
||||||
|
if not url.endswith(".html"):
|
||||||
|
continue
|
||||||
|
if 'podcast' in url:
|
||||||
|
continue
|
||||||
|
if 'video' in url:
|
||||||
|
continue
|
||||||
url += '?pagewanted=all'
|
url += '?pagewanted=all'
|
||||||
|
if url in url_list:
|
||||||
|
continue
|
||||||
|
url_list.append(url)
|
||||||
|
self.log("URL %s" % url)
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
desc = h3_item.find('p')
|
||||||
|
if desc is not None:
|
||||||
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
|
else:
|
||||||
|
description = ''
|
||||||
|
if not articles.has_key(section_name):
|
||||||
|
ans.append(section_name)
|
||||||
|
articles[section_name] = []
|
||||||
|
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
|
|
||||||
title = self.tag_to_string(a, use_alt=True)
|
|
||||||
# prepend the section name
|
|
||||||
title = self.sections[section] + " · " + title
|
|
||||||
|
|
||||||
if not isinstance(title, unicode):
|
|
||||||
title = title.decode('utf-8', 'replace')
|
|
||||||
|
|
||||||
# Allow for unattributed, undescribed entries "Editor's Note"
|
|
||||||
if i >= len(descriptions) :
|
|
||||||
description = None
|
|
||||||
else :
|
|
||||||
description = descriptions[i]
|
|
||||||
|
|
||||||
if len(bylines) == articleCount :
|
|
||||||
author = bylines[i]
|
|
||||||
else :
|
|
||||||
author = None
|
|
||||||
|
|
||||||
# Check for duplicates
|
|
||||||
duplicateFound = False
|
|
||||||
if len(articles[feed]) > 1:
|
|
||||||
for article in articles[feed] :
|
|
||||||
if url == article['url'] :
|
|
||||||
duplicateFound = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if duplicateFound:
|
|
||||||
# Continue fetching, don't add this article
|
|
||||||
todays_article_count -= 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not articles.has_key(feed):
|
|
||||||
articles[feed] = []
|
|
||||||
articles[feed].append(
|
|
||||||
dict(title=title, url=url, date=pubdate,
|
|
||||||
description=description, author=author, content=''))
|
|
||||||
# self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
|
|
||||||
|
|
||||||
ans = self.sort_index_by(ans, {'Top Stories':-1})
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
self.dump_ans(ans)
|
return self.filter_ans(ans)
|
||||||
return ans
|
|
||||||
|
def parse_index(self):
|
||||||
|
if self.headlinesOnly:
|
||||||
|
return self.parse_headline_index()
|
||||||
|
else:
|
||||||
|
return self.parse_todays_index()
|
||||||
|
|
||||||
|
def strip_anchors(self,soup):
|
||||||
|
paras = soup.findAll(True)
|
||||||
|
for para in paras:
|
||||||
|
aTags = para.findAll('a')
|
||||||
|
for a in aTags:
|
||||||
|
if a.img is None:
|
||||||
|
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
|
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||||
|
if kicker_tag: # remove Op_Ed author head shots
|
||||||
|
tagline = self.tag_to_string(kicker_tag)
|
||||||
|
if tagline=='Op-Ed Columnist':
|
||||||
|
img_div = soup.find('div','inlineImage module')
|
||||||
|
if img_div:
|
||||||
|
img_div.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
def postprocess_html(self,soup, True):
|
||||||
@ -422,8 +462,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
firstImg = inlineImgs[0]
|
firstImg = inlineImgs[0]
|
||||||
for inlineImg in inlineImgs[1:]:
|
for inlineImg in inlineImgs[1:]:
|
||||||
inlineImg.extract()
|
inlineImg.extract()
|
||||||
# Move firstImg after headline
|
# Move firstImg before article body
|
||||||
cgFirst = soup.find(True, {'class':'columnGroup first'})
|
#article_body = soup.find(True, {'id':'articleBody'})
|
||||||
|
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||||
if cgFirst:
|
if cgFirst:
|
||||||
# Strip all sibling NavigableStrings: noise
|
# Strip all sibling NavigableStrings: noise
|
||||||
navstrings = cgFirst.findAll(text=True, recursive=False)
|
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||||
@ -443,30 +484,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if headline_found:
|
if headline_found:
|
||||||
cgFirst.insert(insertLoc,firstImg)
|
cgFirst.insert(insertLoc,firstImg)
|
||||||
else:
|
else:
|
||||||
self.log(">>> No class:'columnGroup first' found <<<")
|
self.log(">>> No class:'columnGroup first' found <<<")
|
||||||
# Change class="kicker" to <h3>
|
|
||||||
kicker = soup.find(True, {'class':'kicker'})
|
|
||||||
if kicker and kicker.contents[0]:
|
|
||||||
h3Tag = Tag(soup, "h3")
|
|
||||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
|
||||||
use_alt=False)))
|
|
||||||
kicker.replaceWith(h3Tag)
|
|
||||||
|
|
||||||
# Change captions to italic -1
|
# Change captions to italic
|
||||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||||
if caption and caption.contents[0]:
|
if caption and caption.contents[0]:
|
||||||
emTag = Tag(soup, "em")
|
cTag = Tag(soup, "p", [("class", "caption")])
|
||||||
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||||
mp_off = c.find("More Photos")
|
mp_off = c.find("More Photos")
|
||||||
if mp_off >= 0:
|
if mp_off >= 0:
|
||||||
c = c[:mp_off]
|
c = c[:mp_off]
|
||||||
emTag.insert(0, c)
|
cTag.insert(0, c)
|
||||||
#hrTag = Tag(soup, 'hr')
|
caption.replaceWith(cTag)
|
||||||
#hrTag['class'] = 'caption_divider'
|
|
||||||
hrTag = Tag(soup, 'div')
|
|
||||||
hrTag['class'] = 'divider'
|
|
||||||
emTag.insert(1, hrTag)
|
|
||||||
caption.replaceWith(emTag)
|
|
||||||
|
|
||||||
# Change <nyt_headline> to <h2>
|
# Change <nyt_headline> to <h2>
|
||||||
h1 = soup.find('h1')
|
h1 = soup.find('h1')
|
||||||
@ -506,17 +535,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
bTag.insert(0, subhead.contents[0])
|
bTag.insert(0, subhead.contents[0])
|
||||||
subhead.replaceWith(bTag)
|
subhead.replaceWith(bTag)
|
||||||
|
|
||||||
# Synthesize a section header
|
|
||||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
|
||||||
if dsk and dsk.has_key('content'):
|
|
||||||
hTag = Tag(soup,'h3')
|
|
||||||
hTag['class'] = 'section'
|
|
||||||
hTag.insert(0,NavigableString(dsk['content']))
|
|
||||||
articleTag = soup.find(True, attrs={'id':'article'})
|
|
||||||
if articleTag:
|
|
||||||
articleTag.insert(0,hTag)
|
|
||||||
|
|
||||||
# Add class="articleBody" to <div> so we can format with CSS
|
|
||||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||||
if divTag:
|
if divTag:
|
||||||
divTag['class'] = divTag['id']
|
divTag['class'] = divTag['id']
|
||||||
@ -532,11 +550,3 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
|
||||||
paras = soup.findAll(True)
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
|
||||||
return soup
|
|
||||||
|
@ -4,56 +4,66 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
V5 - One picture per article, moved to top:
|
|
||||||
Headline
|
|
||||||
Image
|
|
||||||
Byline
|
|
||||||
Story
|
|
||||||
'''
|
'''
|
||||||
import re, string, time
|
import re, string, time
|
||||||
from calibre import strftime
|
from calibre import entity_to_unicode, strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The New York Times'
|
# set headlinesOnly to True for the headlines-only version
|
||||||
__author__ = 'GRiker'
|
headlinesOnly = False
|
||||||
language = 'en'
|
|
||||||
requires_version = (0, 7, 5)
|
|
||||||
|
|
||||||
description = 'Daily news from the New York Times (subscription version)'
|
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||||
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
|
# Otherwise, only the sections named will be included. For example,
|
||||||
'New York','Business Day','Science Times','Sports','Dining','Arts',
|
#
|
||||||
'Home','Styles','Sunday Business','Week In Review','Travel','Magazine',
|
# includeSections = ['Politics','Sports']
|
||||||
'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion",
|
#
|
||||||
"T Women's Fashion"]
|
# would cause only the Politics and Sports sections to be included.
|
||||||
|
|
||||||
# List of sections to exclude
|
includeSections = [] # by default, all sections included
|
||||||
# To add a section, copy the section name from the allSectionKeywords list above
|
|
||||||
# For example, to exclude 'Dining' and 'Weddings':
|
|
||||||
#excludeSectionKeywords = ['Dining','Weddings']
|
|
||||||
excludeSectionKeywords = []
|
|
||||||
|
|
||||||
# List of sections to include (test and debug only)
|
# excludeSections: List of sections to exclude. If empty, all sections found will be included.
|
||||||
# By default, any sections in today's paper that are not listed in excludeSectionKeywords
|
# Otherwise, the sections named will be excluded. For example,
|
||||||
# are downloaded. fetch_only specifies that only certain sections are to be downloaded.
|
#
|
||||||
# This should only be used for testing and debugging.
|
# excludeSections = ['Politics','Sports']
|
||||||
# For example, to download only 'The Front Page' section:
|
#
|
||||||
# fetch_only = set(['The Front Page'])
|
# would cause the Politics and Sports sections to be excluded. This parameter can be used
|
||||||
fetch_only = set([])
|
# in conjuction with includeSections although in most cases using one or the other, but
|
||||||
if fetch_only:
|
# not both, is sufficient.
|
||||||
excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only)
|
|
||||||
|
excludeSections = []
|
||||||
|
|
||||||
# one_picture_per_article specifies that calibre should only use the first image
|
# one_picture_per_article specifies that calibre should only use the first image
|
||||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||||
# will be moved to a location between the headline and the byline.
|
# will be moved to a location between the headline and the byline.
|
||||||
# If one_picture_per_article = False, all images from the article will be included
|
# If one_picture_per_article = False, all images from the article will be included
|
||||||
|
|
||||||
# and shown in their original location.
|
# and shown in their original location.
|
||||||
one_picture_per_article = True
|
one_picture_per_article = True
|
||||||
|
|
||||||
|
# The maximum number of articles that will be downloaded
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
|
||||||
|
if headlinesOnly:
|
||||||
|
title='New York Times Headlines'
|
||||||
|
description = 'Headlines from the New York Times'
|
||||||
|
else:
|
||||||
|
title='New York Times'
|
||||||
|
description = 'Today\'s New York Times'
|
||||||
|
|
||||||
|
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||||
|
language = 'en'
|
||||||
|
requires_version = (0, 7, 5)
|
||||||
|
|
||||||
|
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
cover_margins = (18,18,'grey99')
|
||||||
|
|
||||||
remove_tags_before = dict(id='article')
|
remove_tags_before = dict(id='article')
|
||||||
remove_tags_after = dict(id='article')
|
remove_tags_after = dict(id='article')
|
||||||
remove_tags = [dict(attrs={'class':[
|
remove_tags = [dict(attrs={'class':[
|
||||||
@ -69,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'entry-response module',
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
|
'metaFootnote',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
'nextArticleLink',
|
'nextArticleLink',
|
||||||
'nextArticleLink clearfix',
|
'nextArticleLink clearfix',
|
||||||
@ -76,12 +87,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
'subNavigation clearfix',
|
re.compile('^subNavigation'),
|
||||||
'subNavigation tabContent active',
|
re.compile('^leaderboard'),
|
||||||
'subNavigation tabContent active clearfix',
|
re.compile('^module'),
|
||||||
]}),
|
]}),
|
||||||
dict(id=[
|
dict(id=[
|
||||||
'adxLeaderboard',
|
'adxLeaderboard',
|
||||||
|
'adxSponLink',
|
||||||
'archive',
|
'archive',
|
||||||
'articleExtras',
|
'articleExtras',
|
||||||
'articleInline',
|
'articleInline',
|
||||||
@ -92,61 +104,110 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'footer',
|
'footer',
|
||||||
'header',
|
'header',
|
||||||
'header_search',
|
'header_search',
|
||||||
|
'inlineBox',
|
||||||
'login',
|
'login',
|
||||||
'masthead',
|
'masthead',
|
||||||
'masthead-nav',
|
'masthead-nav',
|
||||||
'memberTools',
|
'memberTools',
|
||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
|
'readerReviews',
|
||||||
|
'readerReviewsCount',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'relatedTopics',
|
||||||
'respond',
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'toolsRight',
|
'toolsRight',
|
||||||
]),
|
]),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
|
||||||
cover_margins = (18,18,'grey99')
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.headline {text-align: left;}\n \
|
extra_css = '''
|
||||||
.byline {font-family: monospace; \
|
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||||
text-align: left; \
|
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-top: 0px; \
|
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-bottom: 0px;}\n \
|
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.dateline {font-size: small; \
|
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-top: 0px; \
|
.timestamp { text-align: left; font-size: small; }
|
||||||
margin-bottom: 0px;}\n \
|
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.timestamp {font-size: small; \
|
a:link {text-decoration: none; }
|
||||||
margin-top: 0px; \
|
.articleBody { }
|
||||||
margin-bottom: 0px;}\n \
|
.authorId {text-align: left; }
|
||||||
.source {text-align: left;}\n \
|
.image {text-align: center;}
|
||||||
.image {text-align: center;}\n \
|
.source {text-align: left; }'''
|
||||||
.credit {text-align: right; \
|
|
||||||
font-size: small; \
|
def filter_ans(self, ans) :
|
||||||
margin-top: 0px; \
|
total_article_count = 0
|
||||||
margin-bottom: 0px;}\n \
|
idx = 0
|
||||||
.articleBody {text-align: left;}\n \
|
idx_max = len(ans)-1
|
||||||
.authorId {text-align: left; \
|
while idx <= idx_max:
|
||||||
font-style: italic;}\n '
|
if self.includeSections != []:
|
||||||
|
if ans[idx][0] not in self.includeSections:
|
||||||
|
print "SECTION NOT INCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
|
if ans[idx][0] in self.excludeSections:
|
||||||
|
print "SECTION EXCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
|
if self.verbose:
|
||||||
|
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||||
|
for article in ans[idx][1]:
|
||||||
|
total_article_count += 1
|
||||||
|
if self.verbose:
|
||||||
|
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||||
|
article['url'].encode('cp1252','replace')))
|
||||||
|
idx = idx+1
|
||||||
|
|
||||||
|
self.log( "Queued %d articles" % total_article_count )
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def fixChars(self,string):
|
||||||
|
# Replace lsquo (\x91)
|
||||||
|
fixed = re.sub("\x91","‘",string)
|
||||||
|
|
||||||
|
# Replace rsquo (\x92)
|
||||||
|
fixed = re.sub("\x92","’",fixed)
|
||||||
|
|
||||||
|
# Replace ldquo (\x93)
|
||||||
|
fixed = re.sub("\x93","“",fixed)
|
||||||
|
|
||||||
|
# Replace rdquo (\x94)
|
||||||
|
fixed = re.sub("\x94","”",fixed)
|
||||||
|
|
||||||
|
# Replace ndash (\x96)
|
||||||
|
fixed = re.sub("\x96","–",fixed)
|
||||||
|
|
||||||
|
# Replace mdash (\x97)
|
||||||
|
fixed = re.sub("\x97","—",fixed)
|
||||||
|
|
||||||
|
return fixed
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
try:
|
br.open('http://www.nytimes.com/auth/login')
|
||||||
br.open('http://www.nytimes.com/auth/login')
|
br.select_form(name='login')
|
||||||
br.select_form(name='login')
|
br['USERID'] = self.username
|
||||||
br['USERID'] = self.username
|
br['PASSWORD'] = self.password
|
||||||
br['PASSWORD'] = self.password
|
raw = br.submit().read()
|
||||||
raw = br.submit().read()
|
if 'Please try again' in raw:
|
||||||
if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
|
raise Exception('Your username and password are incorrect')
|
||||||
raise Exception('Your username and password are incorrect')
|
|
||||||
#open('/t/log.html', 'wb').write(raw)
|
|
||||||
except:
|
|
||||||
self.log("\nFailed to login")
|
|
||||||
|
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
def skip_ad_pages(self, soup):
|
||||||
|
# Skip ad pages served before actual article
|
||||||
|
skip_tag = soup.find(True, {'name':'skip'})
|
||||||
|
if skip_tag is not None:
|
||||||
|
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||||
|
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||||
|
return self.index_to_soup(url, raw=True)
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover = None
|
cover = None
|
||||||
st = time.localtime()
|
st = time.localtime()
|
||||||
@ -162,143 +223,232 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
cover = None
|
cover = None
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
def get_masthead_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
def dump_ans(self, ans):
|
def index_to_soup(self, url_or_raw, raw=False):
|
||||||
total_article_count = 0
|
'''
|
||||||
for section in ans :
|
OVERRIDE of class method
|
||||||
if self.verbose:
|
deals with various page encodings between index and articles
|
||||||
self.log("section %s: %d articles" % (section[0], len(section[1])) )
|
'''
|
||||||
for article in section[1]:
|
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||||
total_article_count += 1
|
if re.match(r'\w+://', url_or_raw):
|
||||||
if self.verbose:
|
f = self.browser.open(url_or_raw)
|
||||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('mac-roman','replace'),
|
_raw = f.read()
|
||||||
article['url'].encode('mac-roman','replace')))
|
f.close()
|
||||||
self.log( "Queued %d articles" % total_article_count )
|
if not _raw:
|
||||||
|
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||||
|
else:
|
||||||
|
_raw = url_or_raw
|
||||||
|
if raw:
|
||||||
|
return _raw
|
||||||
|
|
||||||
def dump_hex(self, src, length=16):
|
if not isinstance(_raw, unicode) and self.encoding:
|
||||||
''' Diagnostic '''
|
_raw = _raw.decode(docEncoding, 'replace')
|
||||||
FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
|
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
N=0; result=''
|
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||||
while src:
|
return BeautifulSoup(_raw, markupMassage=massage)
|
||||||
s,src = src[:length],src[length:]
|
|
||||||
hexa = ' '.join(["%02X"%ord(x) for x in s])
|
|
||||||
s = s.translate(FILTER)
|
|
||||||
result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
|
|
||||||
N+=length
|
|
||||||
print result
|
|
||||||
|
|
||||||
def fixChars(self,string):
|
# Entry point
|
||||||
# Replace lsquo (\x91)
|
print "index_to_soup()"
|
||||||
fixed = re.sub("\x91","‘",string)
|
soup = get_the_soup( self.encoding, url_or_raw )
|
||||||
|
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||||
|
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||||
|
if docEncoding == '' :
|
||||||
|
docEncoding = self.encoding
|
||||||
|
|
||||||
# Replace rsquo (\x92)
|
if self.verbose > 2:
|
||||||
fixed = re.sub("\x92","’",fixed)
|
self.log( " document encoding: '%s'" % docEncoding)
|
||||||
|
if docEncoding != self.encoding :
|
||||||
|
soup = get_the_soup(docEncoding, url_or_raw)
|
||||||
|
|
||||||
# Replace ldquo (\x93)
|
return soup
|
||||||
fixed = re.sub("\x93","“",fixed)
|
|
||||||
|
|
||||||
# Replace rdquo (\x94)
|
|
||||||
fixed = re.sub("\x94","”",fixed)
|
|
||||||
|
|
||||||
# Replace ndash (\x96)
|
|
||||||
fixed = re.sub("\x96","–",fixed)
|
|
||||||
|
|
||||||
# Replace mdash (\x97)
|
|
||||||
fixed = re.sub("\x97","—",fixed)
|
|
||||||
|
|
||||||
return fixed
|
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
# Kindle TOC descriptions won't render certain characters
|
||||||
if description:
|
if description:
|
||||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||||
# Replace '&' with '&'
|
# Replace '&' with '&'
|
||||||
massaged = re.sub("&","&", massaged)
|
massaged = re.sub("&","&", massaged)
|
||||||
return self.fixChars(massaged)
|
return self.fixChars(massaged)
|
||||||
else:
|
else:
|
||||||
return description
|
return description
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_todays_index(self):
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
|
||||||
|
|
||||||
def feed_title(div):
|
def feed_title(div):
|
||||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
key = None
|
key = None
|
||||||
ans = []
|
ans = []
|
||||||
# Find each instance of class="section-headline", class="story", class="story headline"
|
url_list = []
|
||||||
for div in soup.findAll(True,
|
|
||||||
attrs={'class':['section-headline', 'story', 'story headline']}):
|
|
||||||
|
|
||||||
if div['class'] == 'section-headline':
|
def handle_article(div):
|
||||||
key = string.capwords(feed_title(div))
|
a = div.find('a', href=True)
|
||||||
if self.excludeSectionKeywords:
|
if not a:
|
||||||
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
return
|
||||||
if excluded.search(key):
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
self.log("Skipping section %s" % key)
|
if not url.startswith("http"):
|
||||||
continue
|
return
|
||||||
articles[key] = []
|
if not url.endswith(".html"):
|
||||||
ans.append(key)
|
return
|
||||||
|
if 'podcast' in url:
|
||||||
elif div['class'] in ['story', 'story headline'] :
|
return
|
||||||
a = div.find('a', href=True)
|
if '/video/' in url:
|
||||||
if not a:
|
return
|
||||||
continue
|
url += '?pagewanted=all'
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
if url in url_list:
|
||||||
url += '?pagewanted=all'
|
return
|
||||||
|
url_list.append(url)
|
||||||
title = self.massageNCXText(self.tag_to_string(a, use_alt=True).strip())
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
description = ''
|
||||||
description = ''
|
pubdate = strftime('%a, %d %b')
|
||||||
pubdate = strftime('%a, %d %b')
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
if summary:
|
||||||
if summary:
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
description = self.massageNCXText(self.tag_to_string(summary, use_alt=False))
|
author = ''
|
||||||
|
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||||
author = ''
|
if authorAttribution:
|
||||||
authorAttribution = div.find(True, attrs={'class':'storyheadline-author'})
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
|
else:
|
||||||
|
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||||
if authorAttribution:
|
if authorAttribution:
|
||||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
else:
|
feed = key if key is not None else 'Uncategorized'
|
||||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
if not articles.has_key(feed):
|
||||||
if authorAttribution:
|
ans.append(feed)
|
||||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
articles[feed] = []
|
||||||
# Kill commas - Kindle switches to '&'
|
articles[feed].append(
|
||||||
author = re.sub(',','',author)
|
dict(title=title, url=url, date=pubdate,
|
||||||
|
description=description, author=author,
|
||||||
|
content=''))
|
||||||
|
|
||||||
|
|
||||||
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
|
|
||||||
|
|
||||||
|
# Find each article
|
||||||
|
for div in soup.findAll(True,
|
||||||
|
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||||
|
|
||||||
|
if div['class'] in ['section-headline','sectionHeader']:
|
||||||
|
key = string.capwords(feed_title(div))
|
||||||
|
key = key.replace('Op-ed','Op-Ed')
|
||||||
|
key = key.replace('U.s.','U.S.')
|
||||||
|
elif div['class'] in ['story', 'story headline'] :
|
||||||
|
handle_article(div)
|
||||||
|
elif div['class'] == 'headlinesOnly multiline flush':
|
||||||
|
for lidiv in div.findAll('li'):
|
||||||
|
handle_article(lidiv)
|
||||||
|
|
||||||
feed = key if key is not None else 'Uncategorized'
|
|
||||||
if not articles.has_key(feed):
|
|
||||||
articles[feed] = []
|
|
||||||
if not 'podcasts' in url:
|
|
||||||
articles[feed].append(
|
|
||||||
dict(title=title, url=url, date=pubdate,
|
|
||||||
description=description, author=author,
|
|
||||||
content=''))
|
|
||||||
ans = self.sort_index_by(ans, {'The Front Page':-1,
|
|
||||||
'Dining In, Dining Out':1,
|
|
||||||
'Obituaries':2})
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
self.dump_ans(ans)
|
return self.filter_ans(ans)
|
||||||
return ans
|
|
||||||
|
def parse_headline_index(self):
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
ans = []
|
||||||
|
url_list = []
|
||||||
|
|
||||||
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
|
# Fetch the content table
|
||||||
|
content_table = soup.find('table',{'id':'content'})
|
||||||
|
if content_table is None:
|
||||||
|
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
||||||
|
|
||||||
|
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||||
|
for div_sec in td_col.findAll('div',recursive=False):
|
||||||
|
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||||
|
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
|
section_name = re.sub(r'^ *$','',section_name)
|
||||||
|
if section_name == '':
|
||||||
|
continue
|
||||||
|
section_name=string.capwords(section_name)
|
||||||
|
if section_name == 'U.s.':
|
||||||
|
section_name = 'U.S.'
|
||||||
|
elif section_name == 'Op-ed':
|
||||||
|
section_name = 'Op-Ed'
|
||||||
|
pubdate = strftime('%a, %d %b')
|
||||||
|
|
||||||
|
search_div = div_sec
|
||||||
|
for next_tag in h6_sec_name.findNextSiblings(True):
|
||||||
|
if next_tag.__class__.__name__ == 'Tag':
|
||||||
|
if next_tag.name == 'div':
|
||||||
|
search_div = next_tag
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get the articles
|
||||||
|
for h3_item in search_div.findAll('h3'):
|
||||||
|
byline = h3_item.h6
|
||||||
|
if byline is not None:
|
||||||
|
author = self.tag_to_string(byline,usa_alt=False)
|
||||||
|
else:
|
||||||
|
author = ''
|
||||||
|
a = h3_item.find('a', href=True)
|
||||||
|
if not a:
|
||||||
|
continue
|
||||||
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
if not url.startswith("http"):
|
||||||
|
continue
|
||||||
|
if not url.endswith(".html"):
|
||||||
|
continue
|
||||||
|
if 'podcast' in url:
|
||||||
|
continue
|
||||||
|
if 'video' in url:
|
||||||
|
continue
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if url in url_list:
|
||||||
|
continue
|
||||||
|
url_list.append(url)
|
||||||
|
self.log("URL %s" % url)
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
desc = h3_item.find('p')
|
||||||
|
if desc is not None:
|
||||||
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
|
else:
|
||||||
|
description = ''
|
||||||
|
if not articles.has_key(section_name):
|
||||||
|
ans.append(section_name)
|
||||||
|
articles[section_name] = []
|
||||||
|
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
|
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return self.filter_ans(ans)
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
if self.headlinesOnly:
|
||||||
|
return self.parse_headline_index()
|
||||||
|
else:
|
||||||
|
return self.parse_todays_index()
|
||||||
|
|
||||||
|
def strip_anchors(self,soup):
|
||||||
|
paras = soup.findAll(True)
|
||||||
|
for para in paras:
|
||||||
|
aTags = para.findAll('a')
|
||||||
|
for a in aTags:
|
||||||
|
if a.img is None:
|
||||||
|
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||||
|
return soup
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
|
||||||
# Skip ad pages served before actual article
|
|
||||||
skip_tag = soup.find(True, {'name':'skip'})
|
|
||||||
if skip_tag is not None:
|
|
||||||
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
|
||||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
|
||||||
url += '?pagewanted=all'
|
|
||||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
|
||||||
return self.index_to_soup(url, raw=True)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
|
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||||
|
if kicker_tag: # remove Op_Ed author head shots
|
||||||
|
tagline = self.tag_to_string(kicker_tag)
|
||||||
|
if tagline=='Op-Ed Columnist':
|
||||||
|
img_div = soup.find('div','inlineImage module')
|
||||||
|
if img_div:
|
||||||
|
img_div.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
def postprocess_html(self,soup, True):
|
||||||
print "\npostprocess_html()\n"
|
|
||||||
|
|
||||||
if self.one_picture_per_article:
|
if self.one_picture_per_article:
|
||||||
# Remove all images after first
|
# Remove all images after first
|
||||||
@ -312,8 +462,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
firstImg = inlineImgs[0]
|
firstImg = inlineImgs[0]
|
||||||
for inlineImg in inlineImgs[1:]:
|
for inlineImg in inlineImgs[1:]:
|
||||||
inlineImg.extract()
|
inlineImg.extract()
|
||||||
# Move firstImg after headline
|
# Move firstImg before article body
|
||||||
cgFirst = soup.find(True, {'class':'columnGroup first'})
|
#article_body = soup.find(True, {'id':'articleBody'})
|
||||||
|
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||||
if cgFirst:
|
if cgFirst:
|
||||||
# Strip all sibling NavigableStrings: noise
|
# Strip all sibling NavigableStrings: noise
|
||||||
navstrings = cgFirst.findAll(text=True, recursive=False)
|
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||||
@ -333,30 +484,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if headline_found:
|
if headline_found:
|
||||||
cgFirst.insert(insertLoc,firstImg)
|
cgFirst.insert(insertLoc,firstImg)
|
||||||
else:
|
else:
|
||||||
self.log(">>> No class:'columnGroup first' found <<<")
|
self.log(">>> No class:'columnGroup first' found <<<")
|
||||||
# Change class="kicker" to <h3>
|
|
||||||
kicker = soup.find(True, {'class':'kicker'})
|
|
||||||
if kicker and kicker.contents and kicker.contents[0]:
|
|
||||||
h3Tag = Tag(soup, "h3")
|
|
||||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
|
||||||
use_alt=False)))
|
|
||||||
kicker.replaceWith(h3Tag)
|
|
||||||
|
|
||||||
# Change captions to italic -1
|
# Change captions to italic
|
||||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||||
if caption and caption.contents[0]:
|
if caption and caption.contents[0]:
|
||||||
emTag = Tag(soup, "em")
|
cTag = Tag(soup, "p", [("class", "caption")])
|
||||||
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||||
mp_off = c.find("More Photos")
|
mp_off = c.find("More Photos")
|
||||||
if mp_off >= 0:
|
if mp_off >= 0:
|
||||||
c = c[:mp_off]
|
c = c[:mp_off]
|
||||||
emTag.insert(0, c)
|
cTag.insert(0, c)
|
||||||
#hrTag = Tag(soup, 'hr')
|
caption.replaceWith(cTag)
|
||||||
#hrTag['class'] = 'caption_divider'
|
|
||||||
hrTag = Tag(soup, 'div')
|
|
||||||
hrTag['class'] = 'divider'
|
|
||||||
emTag.insert(1, hrTag)
|
|
||||||
caption.replaceWith(emTag)
|
|
||||||
|
|
||||||
# Change <nyt_headline> to <h2>
|
# Change <nyt_headline> to <h2>
|
||||||
h1 = soup.find('h1')
|
h1 = soup.find('h1')
|
||||||
@ -396,17 +535,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
bTag.insert(0, subhead.contents[0])
|
bTag.insert(0, subhead.contents[0])
|
||||||
subhead.replaceWith(bTag)
|
subhead.replaceWith(bTag)
|
||||||
|
|
||||||
# Synthesize a section header
|
|
||||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
|
||||||
if dsk and dsk.has_key('content'):
|
|
||||||
hTag = Tag(soup,'h3')
|
|
||||||
hTag['class'] = 'section'
|
|
||||||
hTag.insert(0,NavigableString(dsk['content']))
|
|
||||||
articleTag = soup.find(True, attrs={'id':'article'})
|
|
||||||
if articleTag:
|
|
||||||
articleTag.insert(0,hTag)
|
|
||||||
|
|
||||||
# Add class="articleBody" to <div> so we can format with CSS
|
|
||||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||||
if divTag:
|
if divTag:
|
||||||
divTag['class'] = divTag['id']
|
divTag['class'] = divTag['id']
|
||||||
@ -422,56 +550,3 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self,article,soup,first):
|
|
||||||
'''
|
|
||||||
Extract author and description from article, add to article metadata
|
|
||||||
'''
|
|
||||||
def extract_author(soup):
|
|
||||||
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
|
||||||
if byline :
|
|
||||||
author = byline['content']
|
|
||||||
else :
|
|
||||||
# Try for <div class="byline">
|
|
||||||
byline = soup.find('div', attrs={'class':'byline'})
|
|
||||||
if byline:
|
|
||||||
author = byline.renderContents()
|
|
||||||
else:
|
|
||||||
print soup.prettify()
|
|
||||||
return None
|
|
||||||
return author
|
|
||||||
|
|
||||||
def extract_description(soup):
|
|
||||||
description = soup.find('meta',attrs={'name':['description','description ']})
|
|
||||||
if description :
|
|
||||||
return self.massageNCXText(description['content'])
|
|
||||||
else:
|
|
||||||
# Take first paragraph of article
|
|
||||||
articlebody = soup.find('div',attrs={'id':'articlebody'})
|
|
||||||
if not articlebody:
|
|
||||||
# Try again with class instead of id
|
|
||||||
articlebody = soup.find('div',attrs={'class':'articlebody'})
|
|
||||||
if not articlebody:
|
|
||||||
print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
|
|
||||||
print soup.prettify()
|
|
||||||
return None
|
|
||||||
paras = articlebody.findAll('p')
|
|
||||||
for p in paras:
|
|
||||||
if p.renderContents() > '' :
|
|
||||||
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not article.author:
|
|
||||||
article.author = extract_author(soup)
|
|
||||||
if not article.summary:
|
|
||||||
article.summary = article.text_summary = extract_description(soup)
|
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
|
||||||
paras = soup.findAll(True)
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
a.replaceWith(a.renderContents().decode('utf-8','replace'))
|
|
||||||
#a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
@ -1,74 +1,43 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class NewZealandHerald(BasicNewsRecipe):
|
class NewZealandHerald(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'New Zealand Herald'
|
title = 'New Zealand Herald'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Daily news'
|
description = 'Daily news'
|
||||||
timefmt = ' [%d %b, %Y]'
|
timefmt = ' [%d %b, %Y]'
|
||||||
language = 'en_NZ'
|
language = 'en_NZ'
|
||||||
|
oldest_article = 2.5
|
||||||
|
|
||||||
no_stylesheets = True
|
feeds = [
|
||||||
remove_tags_before = dict(name='div', attrs={'class':'contentContainer left eight'})
|
('Business',
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'callToAction'})
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'),
|
||||||
remove_tags = [
|
('World',
|
||||||
dict(name='iframe'),
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000002.xml'),
|
||||||
dict(name='div', attrs={'class':['sectionHeader', 'tools','callToAction', 'contentContainer right two nopad relatedColumn']}),
|
('National',
|
||||||
#dict(name='div', attrs={'id':['shareContainer']}),
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000001.xml'),
|
||||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or #author')"}),
|
('Entertainment',
|
||||||
#dict(name='table', attrs={'cellspacing':'0'}),
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_001501119.xml'),
|
||||||
|
('Travel',
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000007.xml'),
|
||||||
|
('Opinion',
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'),
|
||||||
|
('Life & Style',
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'),
|
||||||
|
('Technology'
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'),
|
||||||
|
('Sport',
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'),
|
||||||
|
('Motoring',
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000009.xml'),
|
||||||
|
('Property',
|
||||||
|
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def print_version(self, url):
|
||||||
table = soup.find('table')
|
m = re.search(r'objectid=(\d+)', url)
|
||||||
if table is not None:
|
if m is None:
|
||||||
table.extract()
|
return url
|
||||||
return soup
|
return 'http://www.nzherald.co.nz/news/print.cfm?pnum=1&objectid=' + m.group(1)
|
||||||
|
|
||||||
#TO GET ARTICLES IN SECTION
|
|
||||||
def nz_parse_section(self, url):
|
|
||||||
soup = self.index_to_soup(url)
|
|
||||||
div = soup.find(attrs={'class':'col-300 categoryList'})
|
|
||||||
date = div.find(attrs={'class':'link-list-heading'})
|
|
||||||
|
|
||||||
current_articles = []
|
|
||||||
for x in date.findAllNext(attrs={'class':['linkList', 'link-list-heading']}):
|
|
||||||
if x.get('class') == 'link-list-heading': break
|
|
||||||
for li in x.findAll('li'):
|
|
||||||
a = li.find('a', href=True)
|
|
||||||
if a is None:
|
|
||||||
continue
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
url = a.get('href', False)
|
|
||||||
if not url or not title:
|
|
||||||
continue
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://www.nzherald.co.nz'+url
|
|
||||||
self.log('\t\tFound article:', title)
|
|
||||||
self.log('\t\t\t', url)
|
|
||||||
current_articles.append({'title': title, 'url':url,
|
|
||||||
'description':'', 'date':''})
|
|
||||||
|
|
||||||
return current_articles
|
|
||||||
|
|
||||||
|
|
||||||
# To GET SECTIONS
|
|
||||||
def parse_index(self):
|
|
||||||
feeds = []
|
|
||||||
for title, url in [
|
|
||||||
('National',
|
|
||||||
'http://www.nzherald.co.nz/nz/news/headlines.cfm?c_id=1'),
|
|
||||||
('World',
|
|
||||||
'http://www.nzherald.co.nz/world/news/headlines.cfm?c_id=2'),
|
|
||||||
('Politics',
|
|
||||||
'http://www.nzherald.co.nz/politics/news/headlines.cfm?c_id=280'),
|
|
||||||
('Crime',
|
|
||||||
'http://www.nzherald.co.nz/crime/news/headlines.cfm?c_id=30'),
|
|
||||||
('Environment',
|
|
||||||
'http://www.nzherald.co.nz/environment/news/headlines.cfm?c_id=39'),
|
|
||||||
]:
|
|
||||||
articles = self.nz_parse_section(url)
|
|
||||||
if articles:
|
|
||||||
feeds.append((title, articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
66
resources/recipes/perfil.recipe
Normal file
66
resources/recipes/perfil.recipe
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
perfil.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Perfil(BasicNewsRecipe):
|
||||||
|
title = 'Perfil'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Noticias de Argentina y el resto del mundo'
|
||||||
|
publisher = 'perfil.com'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
.seccion{border-bottom: 1px dotted #666666; text-transform: uppercase; font-size: x-large}
|
||||||
|
.foto1 h1{font-size: x-small}
|
||||||
|
h1{font-family: Georgia,"Times New Roman",serif}
|
||||||
|
img{margin-bottom: 0.4em}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['iframe','embed','object','base','meta','link'])
|
||||||
|
,dict(name='a', attrs={'href':'#comentarios'})
|
||||||
|
,dict(name='div', attrs={'class':'foto3'})
|
||||||
|
,dict(name='img', attrs={'alt':'ampliar'})
|
||||||
|
]
|
||||||
|
keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
|
||||||
|
remove_attributes=['onload','lang','width','height','border']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultimo momento' , u'http://www.perfil.com/rss/ultimomomento.xml')
|
||||||
|
,(u'Politica' , u'http://www.perfil.com/rss/politica.xml' )
|
||||||
|
,(u'Policia' , u'http://www.perfil.com/rss/policia.xml' )
|
||||||
|
,(u'Internacionales', u'http://www.perfil.com/rss/internacional.xml')
|
||||||
|
,(u'Economia' , u'http://www.perfil.com/rss/economia.xml' )
|
||||||
|
,(u'Deportes' , u'http://www.perfil.com/rss/deportes.xml' )
|
||||||
|
,(u'Opinion' , u'http://www.perfil.com/rss/columnistas.xml' )
|
||||||
|
,(u'Sociedad' , u'http://www.perfil.com/rss/sociedad.xml' )
|
||||||
|
,(u'Cultura' , u'http://www.perfil.com/rss/cultura.xml' )
|
||||||
|
,(u'Espectaculos' , u'http://www.perfil.com/rss/espectaculos.xml' )
|
||||||
|
,(u'Ciencia' , u'http://www.perfil.com/rss/ciencia.xml' )
|
||||||
|
,(u'Salud' , u'http://www.perfil.com/rss/salud.xml' )
|
||||||
|
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
53
resources/recipes/rue89.recipe
Normal file
53
resources/recipes/rue89.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Louis Gesbert <meta at antislash dot info>'
|
||||||
|
'''
|
||||||
|
Rue89
|
||||||
|
'''
|
||||||
|
|
||||||
|
__author__ = '2010, Louis Gesbert <meta at antislash dot info>'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Rue89(BasicNewsRecipe):
|
||||||
|
title = 'Rue89'
|
||||||
|
__author__ = 'Louis Gesbert'
|
||||||
|
description = 'Popular free french news website'
|
||||||
|
title = u'Rue89'
|
||||||
|
language = 'fr'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
|
feeds = [(u'La Une', u'http://www.rue89.com/homepage/feed')]
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<(/?)h2>', re.IGNORECASE|re.DOTALL),
|
||||||
|
lambda match : '<'+match.group(1)+'h3>'),
|
||||||
|
(re.compile(r'<div class="print-title">([^>]+)</div>', re.IGNORECASE|re.DOTALL),
|
||||||
|
lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||||
|
(re.compile(r'<img[^>]+src="[^"]*/numeros/(\d+)[^0-9.">]*.gif"[^>]*/>', re.IGNORECASE|re.DOTALL),
|
||||||
|
lambda match : '<span style="font-family: Sans-serif; color: red; font-size:24pt; padding=2pt;">'+match.group(1)+'</span>'),
|
||||||
|
(re.compile(r'\''), lambda match: '’'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
body = Tag(soup, 'body')
|
||||||
|
title = soup.find('h1', {'class':'title'})
|
||||||
|
content = soup.find('div', {'class':'content'})
|
||||||
|
soup.body.replaceWith(body)
|
||||||
|
body.insert(0, title)
|
||||||
|
body.insert(1, content)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
remove_tags = [ #dict(name='div', attrs={'class':'print-source_url'}),
|
||||||
|
#dict(name='div', attrs={'class':'print-links'}),
|
||||||
|
#dict(name='img', attrs={'class':'print-logo'}),
|
||||||
|
dict(name='div', attrs={'class':'content_top'}),
|
||||||
|
dict(name='div', attrs={'id':'sidebar-left'}), ]
|
||||||
|
|
||||||
|
# -- print-version has poor quality on this website, better do the conversion ourselves
|
||||||
|
# def print_version(self, url):
|
||||||
|
# return re.sub('^.*-([0-9]+)$', 'http://www.rue89.com/print/\\1',url)
|
60
resources/recipes/stnn.recipe
Normal file
60
resources/recipes/stnn.recipe
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Larry Chan <larry1chan at gmail.com>'
|
||||||
|
'''
|
||||||
|
Singtao STNN
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SingtaoSTNN(BasicNewsRecipe):
|
||||||
|
title = 'Singtao STNN'
|
||||||
|
__author__ = 'Larry Chan, larry1chan'
|
||||||
|
description = 'Chinese News'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'gb2312'
|
||||||
|
publisher = 'Singtao STNN'
|
||||||
|
category = 'news, China, world'
|
||||||
|
language = 'zh'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||||
|
masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif'
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':['page_box']})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':['pagelist']})
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['font_title clearfix']}),
|
||||||
|
dict(name='div', attrs={'id':['content_zoom']})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_attributes = ['width','height','href']
|
||||||
|
|
||||||
|
# for a full list of rss check out [url]http://www.stnn.cc/rss/[/url]
|
||||||
|
|
||||||
|
feeds = [ (u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'),
|
||||||
|
(u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'),
|
||||||
|
(u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'),
|
||||||
|
(u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'),
|
||||||
|
(u'International', u'http://www.stnn.cc/rss/guoji/index.xml'),
|
||||||
|
(u'China', u'http://www.stnn.cc/rss/china/index.xml'),
|
||||||
|
(u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'),
|
||||||
|
(u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'),
|
||||||
|
(u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml')
|
||||||
|
|
||||||
|
]
|
||||||
|
|
24
resources/recipes/taggeschau_de.recipe
Normal file
24
resources/recipes/taggeschau_de.recipe
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Tagesschau(BasicNewsRecipe):
|
||||||
|
title = 'Tagesschau'
|
||||||
|
description = 'Nachrichten der ARD'
|
||||||
|
publisher = 'ARD'
|
||||||
|
language = 'de_DE'
|
||||||
|
|
||||||
|
__author__ = 'Florian Andreas Pfaff'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio']}),
|
||||||
|
dict(name='div',
|
||||||
|
attrs={'id':['socialBookmarks','seitenanfang']}),
|
||||||
|
dict(name='ul',
|
||||||
|
attrs={'class':['directLinks','directLinks weltatlas']}),
|
||||||
|
dict(name='strong', attrs={'class':['boxTitle inv','inv']})
|
||||||
|
]
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'centerCol'})]
|
46
resources/recipes/theecocolapse.recipe
Normal file
46
resources/recipes/theecocolapse.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
theeconomiccollapseblog.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class TheEconomicCollapse(BasicNewsRecipe):
|
||||||
|
title = 'The Economic Collapse'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Are You Prepared For The Coming Economic Collapse And The Next Great Depression?'
|
||||||
|
publisher = 'The Economic Collapse'
|
||||||
|
category = 'news, politics, USA, economy'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Tahoma,Arial,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':'sociable'})
|
||||||
|
,dict(name=['iframe','object','embed','meta','link','base'])
|
||||||
|
]
|
||||||
|
remove_attributes=['lang','onclick','width','height']
|
||||||
|
keep_only_tags=[dict(attrs={'class':['post-headline','post-bodycopy clearfix','']})]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://theeconomiccollapseblog.com/feed')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
@ -19,20 +19,22 @@ class TheEconomicTimes(BasicNewsRecipe):
|
|||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://economictimes.indiatimes.com/photo/2676871.cms'
|
masthead_url = 'http://economictimes.indiatimes.com/photo/2676871.cms'
|
||||||
extra_css = """ body{font-family: Arial,Helvetica,sans-serif}
|
extra_css = """
|
||||||
.heading1{font-size: xx-large; font-weight: bold} """
|
body{font-family: Arial,Helvetica,sans-serif}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})]
|
keep_only_tags = [dict(attrs={'class':'printdiv'})]
|
||||||
remove_tags = [dict(name=['object','link','embed','iframe','base','table','meta'])]
|
remove_tags = [dict(name=['object','link','embed','iframe','base','table','meta'])]
|
||||||
|
remove_attributes = ['name']
|
||||||
|
|
||||||
feeds = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
|
feeds = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
|
||||||
|
|
||||||
@ -48,5 +50,5 @@ class TheEconomicTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -30,23 +30,40 @@
|
|||||||
<title>
|
<title>
|
||||||
<xsl:value-of select="fb:description/fb:title-info/fb:book-title"/>
|
<xsl:value-of select="fb:description/fb:title-info/fb:book-title"/>
|
||||||
</title>
|
</title>
|
||||||
<style type="text/x-oeb1-css">
|
<style type="text/css">
|
||||||
A { color : #0002CC }
|
a { color : #0002CC }
|
||||||
A:HOVER { color : #BF0000 }
|
|
||||||
BODY { background-color : #FEFEFE; color : #000000; font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; text-align : justify }
|
a:hover { color : #BF0000 }
|
||||||
H1{ font-size : 160%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Black; background-color : #E7E7E7; margin-left : 0px; page-break-before : always; }
|
|
||||||
H2{ font-size : 130%; font-style : normal; font-weight : bold; text-align : left; background-color : #EEEEEE; border : 1px solid Gray; page-break-before : always; }
|
body { background-color : #FEFEFE; color : #000000; font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; text-align : justify }
|
||||||
H3{ font-size : 110%; font-style : normal; font-weight : bold; text-align : left; background-color : #F1F1F1; border : 1px solid Silver;}
|
|
||||||
H4{ font-size : 100%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
h1{ font-size : 160%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Black; background-color : #E7E7E7; margin-left : 0px; page-break-before : always; }
|
||||||
H5{ font-size : 100%; font-style : italic; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
|
||||||
H6{ font-size : 100%; font-style : italic; font-weight : normal; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
h2{ font-size : 130%; font-style : normal; font-weight : bold; text-align : left; background-color : #EEEEEE; border : 1px solid Gray; page-break-before : always; }
|
||||||
SMALL{ font-size : 80% }
|
|
||||||
BLOCKQUOTE{ margin-left :4em; margin-top:1em; margin-right:0.2em;}
|
h3{ font-size : 110%; font-style : normal; font-weight : bold; text-align : left; background-color : #F1F1F1; border : 1px solid Silver;}
|
||||||
HR{ color : Black }
|
|
||||||
DIV{font-family : "Times New Roman", Times, serif; text-align : justify}
|
h4{ font-size : 100%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||||
UL{margin-left: 0}
|
|
||||||
.epigraph{width:50%; margin-left : 35%;}
|
h5{ font-size : 100%; font-style : italic; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||||
|
|
||||||
|
h6{ font-size : 100%; font-style : italic; font-weight : normal; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
|
||||||
|
|
||||||
|
small { font-size : 80% }
|
||||||
|
|
||||||
|
blockquote { margin-left :4em; margin-top:1em; margin-right:0.2em;}
|
||||||
|
|
||||||
|
hr { color : Black }
|
||||||
|
|
||||||
|
div {font-family : "Times New Roman", Times, serif; text-align : justify}
|
||||||
|
|
||||||
|
ul {margin-left: 0}
|
||||||
|
|
||||||
|
.epigraph{width:50%; margin-left : 35%;}
|
||||||
|
|
||||||
|
div.paragraph { text-align: justify; text-indent: 2em; }
|
||||||
</style>
|
</style>
|
||||||
|
<link rel="stylesheet" type="text/css" href="inline-styles.css" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<xsl:for-each select="fb:description/fb:title-info/fb:annotation">
|
<xsl:for-each select="fb:description/fb:title-info/fb:annotation">
|
||||||
@ -136,12 +153,13 @@
|
|||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:variable>
|
</xsl:variable>
|
||||||
<xsl:if test="$section_has_title = 'None'">
|
<xsl:if test="$section_has_title = 'None'">
|
||||||
<a name="TOC_{generate-id()}" />
|
<div id="TOC_{generate-id()}">
|
||||||
<xsl:if test="@id">
|
<xsl:if test="@id">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
</div>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:apply-templates>
|
<xsl:apply-templates>
|
||||||
<xsl:with-param name="section_toc_id" select="$section_has_title" />
|
<xsl:with-param name="section_toc_id" select="$section_has_title" />
|
||||||
@ -160,13 +178,13 @@
|
|||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test="$section_toc_id != 'None'">
|
<xsl:if test="$section_toc_id != 'None'">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
<xsl:attribute name="id">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<a name="TOC_{generate-id()}"></a>
|
<a name="TOC_{generate-id()}"></a>
|
||||||
<xsl:if test="@id">
|
<xsl:if test="@id">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
@ -176,7 +194,7 @@
|
|||||||
<xsl:element name="h6">
|
<xsl:element name="h6">
|
||||||
<xsl:if test="@id">
|
<xsl:if test="@id">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
@ -207,11 +225,18 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
<!-- p -->
|
<!-- p -->
|
||||||
<xsl:template match="fb:p">
|
<xsl:template match="fb:p">
|
||||||
<div align="justify"><xsl:if test="@id">
|
<xsl:element name="div">
|
||||||
|
<xsl:attribute name="class">paragraph</xsl:attribute>
|
||||||
|
<xsl:if test="@id">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>    <xsl:apply-templates/></div>
|
</xsl:if>
|
||||||
|
<xsl:if test="@style">
|
||||||
|
<xsl:attribute name="style"><xsl:value-of select="@style"/></xsl:attribute>
|
||||||
|
</xsl:if>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</xsl:element>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<!-- strong -->
|
<!-- strong -->
|
||||||
<xsl:template match="fb:strong">
|
<xsl:template match="fb:strong">
|
||||||
|
@ -20,20 +20,4 @@ function setup_image_scaling_handlers() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function extract_svged_images() {
|
|
||||||
$("svg").each(function() {
|
|
||||||
var children = $(this).children("img");
|
|
||||||
if (children.length == 1) {
|
|
||||||
var img = $(children[0]);
|
|
||||||
var href = img.attr('xlink:href');
|
|
||||||
if (href != undefined) {
|
|
||||||
$(this).replaceWith('<div style="text-align:center; margin: 0; padding: 0"><img style="height: 98%" alt="SVG Image" src="' + href +'"></img></div>');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
$(document).ready(function() {
|
|
||||||
//extract_svged_images();
|
|
||||||
});
|
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'pot', 'translations', 'get_translations', 'iso639',
|
'pot', 'translations', 'get_translations', 'iso639',
|
||||||
'build', 'build_pdf2xml',
|
'build', 'build_pdf2xml', 'server',
|
||||||
'gui',
|
'gui',
|
||||||
'develop', 'install',
|
'develop', 'install',
|
||||||
'resources',
|
'resources',
|
||||||
@ -35,6 +35,9 @@ from setup.extensions import Build, BuildPDF2XML
|
|||||||
build = Build()
|
build = Build()
|
||||||
build_pdf2xml = BuildPDF2XML()
|
build_pdf2xml = BuildPDF2XML()
|
||||||
|
|
||||||
|
from setup.server import Server
|
||||||
|
server = Server()
|
||||||
|
|
||||||
from setup.install import Develop, Install, Sdist
|
from setup.install import Develop, Install, Sdist
|
||||||
develop = Develop()
|
develop = Develop()
|
||||||
install = Install()
|
install = Install()
|
||||||
|
@ -54,7 +54,7 @@ reflow_error = poppler_error if poppler_error else magick_error
|
|||||||
|
|
||||||
pdfreflow_libs = []
|
pdfreflow_libs = []
|
||||||
if iswindows:
|
if iswindows:
|
||||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib']
|
||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
|
|
||||||
|
@ -28,7 +28,9 @@ If there are no windows binaries already compiled for the version of python you
|
|||||||
|
|
||||||
Run the following command to install python dependencies::
|
Run the following command to install python dependencies::
|
||||||
|
|
||||||
easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
|
easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython
|
||||||
|
|
||||||
|
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
|
||||||
|
|
||||||
Qt
|
Qt
|
||||||
--------
|
--------
|
||||||
@ -213,7 +215,7 @@ It contains correct fonts.conf etc.
|
|||||||
poppler
|
poppler
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
In Cmake: disable GTK, Qt, OPenjpeg, zlib, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
|
In Cmake: disable GTK, Qt, OPenjpeg, cpp, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
|
||||||
|
|
||||||
NOTE: poppler must be built as a static library, unless you build the qt4 bindings
|
NOTE: poppler must be built as a static library, unless you build the qt4 bindings
|
||||||
|
|
||||||
|
128
setup/server.py
Normal file
128
setup/server.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import subprocess, tempfile, os, time, sys, telnetlib
|
||||||
|
from threading import RLock
|
||||||
|
|
||||||
|
from setup import Command
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pyinotify import WatchManager, ThreadedNotifier, EventsCodes, ProcessEvent
|
||||||
|
except:
|
||||||
|
wm = None
|
||||||
|
else:
|
||||||
|
wm = WatchManager()
|
||||||
|
flags = EventsCodes.ALL_FLAGS
|
||||||
|
mask = flags['IN_MODIFY']
|
||||||
|
|
||||||
|
class ProcessEvents(ProcessEvent):
|
||||||
|
|
||||||
|
def __init__(self, command):
|
||||||
|
ProcessEvent.__init__(self)
|
||||||
|
self.command = command
|
||||||
|
|
||||||
|
def process_default(self, event):
|
||||||
|
name = getattr(event,
|
||||||
|
'name', None)
|
||||||
|
if not name:
|
||||||
|
return
|
||||||
|
ext = os.path.splitext(name)[1]
|
||||||
|
reload = False
|
||||||
|
if ext == '.py':
|
||||||
|
reload = True
|
||||||
|
print
|
||||||
|
print name, 'changed'
|
||||||
|
self.command.kill_server()
|
||||||
|
self.command.launch_server()
|
||||||
|
print self.command.prompt,
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if reload:
|
||||||
|
self.command.reload_browser(delay=1)
|
||||||
|
|
||||||
|
|
||||||
|
class Server(Command):
|
||||||
|
|
||||||
|
description = 'Run the calibre server in development mode conveniently'
|
||||||
|
|
||||||
|
MONOCLE_PATH = '../monocle'
|
||||||
|
|
||||||
|
def rebuild_monocole(self):
|
||||||
|
subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH,
|
||||||
|
'-I', 'src', 'src/monocle.js'],
|
||||||
|
stdout=open('resources/content_server/read/monocle.js', 'wb'))
|
||||||
|
|
||||||
|
def launch_server(self):
|
||||||
|
print 'Starting server...\n'
|
||||||
|
with self.lock:
|
||||||
|
self.rebuild_monocole()
|
||||||
|
self.server_proc = p = subprocess.Popen(['calibre-server', '--develop'],
|
||||||
|
stderr=subprocess.STDOUT, stdout=self.server_log)
|
||||||
|
time.sleep(0.2)
|
||||||
|
if p.poll() is not None:
|
||||||
|
print 'Starting server failed'
|
||||||
|
raise SystemExit(1)
|
||||||
|
return p
|
||||||
|
|
||||||
|
def kill_server(self):
|
||||||
|
print 'Killing server...\n'
|
||||||
|
if self.server_proc is not None:
|
||||||
|
with self.lock:
|
||||||
|
if self.server_proc.poll() is None:
|
||||||
|
self.server_proc.terminate()
|
||||||
|
while self.server_proc.poll() is None:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
def watch(self):
|
||||||
|
if wm is not None:
|
||||||
|
self.notifier = ThreadedNotifier(wm, ProcessEvents(self))
|
||||||
|
self.notifier.start()
|
||||||
|
self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
|
||||||
|
|
||||||
|
def reload_browser(self, delay=0.1):
|
||||||
|
time.sleep(delay)
|
||||||
|
try:
|
||||||
|
t = telnetlib.Telnet('localhost', 4242)
|
||||||
|
t.read_until("repl>")
|
||||||
|
t.write('BrowserReload();')
|
||||||
|
t.read_until("repl>")
|
||||||
|
t.close()
|
||||||
|
except:
|
||||||
|
print 'Failed to reload browser'
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def run(self, opts):
|
||||||
|
self.lock = RLock()
|
||||||
|
tdir = tempfile.gettempdir()
|
||||||
|
logf = os.path.join(tdir, 'calibre-server.log')
|
||||||
|
self.server_log = open(logf, 'ab')
|
||||||
|
self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: '
|
||||||
|
print 'Server log available at:', logf
|
||||||
|
print
|
||||||
|
self.watch()
|
||||||
|
|
||||||
|
first = True
|
||||||
|
while True:
|
||||||
|
self.launch_server()
|
||||||
|
if not first:
|
||||||
|
self.reload_browser()
|
||||||
|
first = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw_input(self.prompt)
|
||||||
|
except:
|
||||||
|
print
|
||||||
|
self.kill_server()
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.kill_server()
|
||||||
|
print
|
||||||
|
|
||||||
|
if hasattr(self, 'notifier'):
|
||||||
|
self.notifier.stop()
|
||||||
|
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.7.24'
|
__version__ = '0.7.26'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -105,7 +105,9 @@ else:
|
|||||||
os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
|
os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if not os.access(config_dir, os.W_OK) or not os.access(config_dir, os.X_OK):
|
if not os.path.exists(config_dir) or \
|
||||||
|
not os.access(config_dir, os.W_OK) or not \
|
||||||
|
os.access(config_dir, os.X_OK):
|
||||||
print 'No write acces to', config_dir, 'using a temporary dir instead'
|
print 'No write acces to', config_dir, 'using a temporary dir instead'
|
||||||
import tempfile, atexit
|
import tempfile, atexit
|
||||||
config_dir = tempfile.mkdtemp(prefix='calibre-config-')
|
config_dir = tempfile.mkdtemp(prefix='calibre-config-')
|
||||||
|
@ -472,10 +472,11 @@ from calibre.devices.iriver.driver import IRIVER_STORY
|
|||||||
from calibre.devices.binatone.driver import README
|
from calibre.devices.binatone.driver import README
|
||||||
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
|
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
|
||||||
from calibre.devices.edge.driver import EDGE
|
from calibre.devices.edge.driver import EDGE
|
||||||
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS
|
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
|
||||||
|
SOVOS, PICO
|
||||||
from calibre.devices.sne.driver import SNE
|
from calibre.devices.sne.driver import SNE
|
||||||
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
||||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
|
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
|
||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
|
|
||||||
@ -574,6 +575,7 @@ plugins += [
|
|||||||
ELONEX,
|
ELONEX,
|
||||||
TECLAST_K3,
|
TECLAST_K3,
|
||||||
NEWSMY,
|
NEWSMY,
|
||||||
|
PICO,
|
||||||
IPAPYRUS,
|
IPAPYRUS,
|
||||||
SOVOS,
|
SOVOS,
|
||||||
EDGE,
|
EDGE,
|
||||||
@ -586,6 +588,7 @@ plugins += [
|
|||||||
AVANT,
|
AVANT,
|
||||||
MENTOR,
|
MENTOR,
|
||||||
SWEEX,
|
SWEEX,
|
||||||
|
Q600,
|
||||||
KOGAN,
|
KOGAN,
|
||||||
PDNOVEL,
|
PDNOVEL,
|
||||||
SPECTRA,
|
SPECTRA,
|
||||||
|
@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_periodical(self):
|
||||||
|
return self.oeb.metadata.publication_type and \
|
||||||
|
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
@ -258,6 +259,9 @@ class OutputProfile(Plugin):
|
|||||||
#: Number of ems that the left margin of a blockquote is rendered as
|
#: Number of ems that the left margin of a blockquote is rendered as
|
||||||
mobi_ems_per_blockquote = 1.0
|
mobi_ems_per_blockquote = 1.0
|
||||||
|
|
||||||
|
#: Special periodical formatting needed in EPUB
|
||||||
|
epub_periodical_format = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
return escape(', '.join(tags))
|
return escape(', '.join(tags))
|
||||||
@ -417,6 +421,13 @@ class iPadOutput(OutputProfile):
|
|||||||
'''
|
'''
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class TabletOutput(iPadOutput):
|
||||||
|
name = 'Tablet'
|
||||||
|
short_name = 'tablet'
|
||||||
|
description = _('Intended for generic tablet devices, does no resizing of images')
|
||||||
|
|
||||||
|
screen_size = (sys.maxint, sys.maxint)
|
||||||
|
comic_screen_size = (sys.maxint, sys.maxint)
|
||||||
|
|
||||||
class SonyReaderOutput(OutputProfile):
|
class SonyReaderOutput(OutputProfile):
|
||||||
|
|
||||||
@ -431,6 +442,9 @@ class SonyReaderOutput(OutputProfile):
|
|||||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||||
unsupported_unicode_chars = [u'\u201f', u'\u201b']
|
unsupported_unicode_chars = [u'\u201f', u'\u201b']
|
||||||
|
|
||||||
|
epub_periodical_format = 'sony'
|
||||||
|
#periodical_date_in_title = False
|
||||||
|
|
||||||
|
|
||||||
class KoboReaderOutput(OutputProfile):
|
class KoboReaderOutput(OutputProfile):
|
||||||
|
|
||||||
@ -553,6 +567,8 @@ class CybookOpusOutput(SonyReaderOutput):
|
|||||||
fbase = 16
|
fbase = 16
|
||||||
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
||||||
|
|
||||||
|
epub_periodical_format = None
|
||||||
|
|
||||||
class KindleOutput(OutputProfile):
|
class KindleOutput(OutputProfile):
|
||||||
|
|
||||||
name = 'Kindle'
|
name = 'Kindle'
|
||||||
@ -583,7 +599,8 @@ class KindleDXOutput(OutputProfile):
|
|||||||
# Screen size is a best guess
|
# Screen size is a best guess
|
||||||
screen_size = (744, 1022)
|
screen_size = (744, 1022)
|
||||||
dpi = 150.0
|
dpi = 150.0
|
||||||
comic_screen_size = (741, 1022)
|
comic_screen_size = (771, 1116)
|
||||||
|
#comic_screen_size = (741, 1022)
|
||||||
supports_mobi_indexing = True
|
supports_mobi_indexing = True
|
||||||
periodical_date_in_title = False
|
periodical_date_in_title = False
|
||||||
mobi_ems_per_blockquote = 2.0
|
mobi_ems_per_blockquote = 2.0
|
||||||
@ -649,13 +666,14 @@ class NookOutput(OutputProfile):
|
|||||||
|
|
||||||
class BambookOutput(OutputProfile):
|
class BambookOutput(OutputProfile):
|
||||||
|
|
||||||
|
author = 'Li Fanxi'
|
||||||
name = 'Sanda Bambook'
|
name = 'Sanda Bambook'
|
||||||
short_name = 'bambook'
|
short_name = 'bambook'
|
||||||
description = _('This profile is intended for the Sanda Bambook.')
|
description = _('This profile is intended for the Sanda Bambook.')
|
||||||
|
|
||||||
# Screen size is a best guess
|
# Screen size is a best guess
|
||||||
screen_size = (800, 600)
|
screen_size = (600, 800)
|
||||||
comic_screen_size = (700, 540)
|
comic_screen_size = (540, 700)
|
||||||
dpi = 168.451
|
dpi = 168.451
|
||||||
fbase = 12
|
fbase = 12
|
||||||
fsizes = [10, 12, 14, 16]
|
fsizes = [10, 12, 14, 16]
|
||||||
@ -663,7 +681,7 @@ class BambookOutput(OutputProfile):
|
|||||||
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
||||||
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
|
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
|
||||||
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
|
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
|
||||||
iPadOutput, KoboReaderOutput,
|
iPadOutput, KoboReaderOutput, TabletOutput,
|
||||||
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
||||||
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
||||||
BambookOutput, ]
|
BambookOutput, ]
|
||||||
|
@ -20,7 +20,8 @@ class ANDROID(USBMS):
|
|||||||
VENDOR_ID = {
|
VENDOR_ID = {
|
||||||
# HTC
|
# HTC
|
||||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
||||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226]},
|
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||||
|
0xc92 : [0x100]},
|
||||||
|
|
||||||
# Motorola
|
# Motorola
|
||||||
0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
|
0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
|
||||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.ebooks.metadata.epub import set_metadata
|
from calibre.ebooks.metadata.epub import set_metadata
|
||||||
from calibre.library.server.utils import strftime
|
from calibre.library.server.utils import strftime
|
||||||
from calibre.utils.config import config_dir, prefs
|
from calibre.utils.config import config_dir, prefs
|
||||||
from calibre.utils.date import isoformat, now, parse_date
|
from calibre.utils.date import now, parse_date
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
@ -2521,11 +2521,11 @@ class ITUNES(DriverBase):
|
|||||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = isoformat(now())
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = isoformat(now())
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.warning(" missing <metadata> block in OPF file")
|
self.log.warning(" missing <metadata> block in OPF file")
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
|
@ -42,7 +42,7 @@ class CYBOOK(USBMS):
|
|||||||
DELETE_EXTS = ['.mbp', '.dat', '.bin', '_6090.t2b', '.thn']
|
DELETE_EXTS = ['.mbp', '.dat', '.bin', '_6090.t2b', '.thn']
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
coverdata = getattr(metadata, 'thumbnail', None)
|
coverdata = getattr(metadata, 'thumbnail', None)
|
||||||
if coverdata and coverdata[2]:
|
if coverdata and coverdata[2]:
|
||||||
coverdata = coverdata[2]
|
coverdata = coverdata[2]
|
||||||
|
@ -77,7 +77,7 @@ class ALEX(N516):
|
|||||||
name = os.path.splitext(os.path.basename(file_abspath))[0] + '.png'
|
name = os.path.splitext(os.path.basename(file_abspath))[0] + '.png'
|
||||||
return os.path.join(base, 'covers', name)
|
return os.path.join(base, 'covers', name)
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
from calibre.ebooks import calibre_cover
|
from calibre.ebooks import calibre_cover
|
||||||
from calibre.utils.magick.draw import thumbnail
|
from calibre.utils.magick.draw import thumbnail
|
||||||
coverdata = getattr(metadata, 'thumbnail', None)
|
coverdata = getattr(metadata, 'thumbnail', None)
|
||||||
@ -129,7 +129,7 @@ class AZBOOKA(ALEX):
|
|||||||
def can_handle(self, device_info, debug=False):
|
def can_handle(self, device_info, debug=False):
|
||||||
return not is_alex(device_info)
|
return not is_alex(device_info)
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class EB511(USBMS):
|
class EB511(USBMS):
|
||||||
|
@ -22,7 +22,9 @@ class KOBO(USBMS):
|
|||||||
gui_name = 'Kobo Reader'
|
gui_name = 'Kobo Reader'
|
||||||
description = _('Communicate with the Kobo Reader')
|
description = _('Communicate with the Kobo Reader')
|
||||||
author = 'Timothy Legge and Kovid Goyal'
|
author = 'Timothy Legge and Kovid Goyal'
|
||||||
version = (1, 0, 6)
|
version = (1, 0, 7)
|
||||||
|
|
||||||
|
dbversion = 0
|
||||||
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
@ -92,7 +94,7 @@ class KOBO(USBMS):
|
|||||||
if lpath.startswith(os.sep):
|
if lpath.startswith(os.sep):
|
||||||
lpath = lpath[len(os.sep):]
|
lpath = lpath[len(os.sep):]
|
||||||
lpath = lpath.replace('\\', '/')
|
lpath = lpath.replace('\\', '/')
|
||||||
# print "LPATH: " + lpath
|
# debug_print("LPATH: ", lpath, " - Title: " , title)
|
||||||
|
|
||||||
playlist_map = {}
|
playlist_map = {}
|
||||||
|
|
||||||
@ -112,7 +114,7 @@ class KOBO(USBMS):
|
|||||||
#print "Image name Normalized: " + imagename
|
#print "Image name Normalized: " + imagename
|
||||||
if imagename is not None:
|
if imagename is not None:
|
||||||
bl[idx].thumbnail = ImageWrapper(imagename)
|
bl[idx].thumbnail = ImageWrapper(imagename)
|
||||||
if ContentType != '6':
|
if (ContentType != '6'and self.dbversion < 8) or (self.dbversion >= 8):
|
||||||
if self.update_metadata_item(bl[idx]):
|
if self.update_metadata_item(bl[idx]):
|
||||||
# print 'update_metadata_item returned true'
|
# print 'update_metadata_item returned true'
|
||||||
changed = True
|
changed = True
|
||||||
@ -120,10 +122,16 @@ class KOBO(USBMS):
|
|||||||
playlist_map[lpath] not in bl[idx].device_collections:
|
playlist_map[lpath] not in bl[idx].device_collections:
|
||||||
bl[idx].device_collections.append(playlist_map[lpath])
|
bl[idx].device_collections.append(playlist_map[lpath])
|
||||||
else:
|
else:
|
||||||
if ContentType == '6':
|
if ContentType == '6' and self.dbversion < 8:
|
||||||
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
|
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
|
||||||
else:
|
else:
|
||||||
book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
|
try:
|
||||||
|
book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
|
||||||
|
except:
|
||||||
|
debug_print("prefix: ", prefix, "lpath: ", lpath, "title: ", title, "authors: ", authors, \
|
||||||
|
"mime: ", mime, "date: ", date, "ContentType: ", ContentType, "ImageID: ", ImageID)
|
||||||
|
raise
|
||||||
|
|
||||||
# print 'Update booklist'
|
# print 'Update booklist'
|
||||||
book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else []
|
book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else []
|
||||||
|
|
||||||
@ -143,6 +151,13 @@ class KOBO(USBMS):
|
|||||||
# numrows = row[0]
|
# numrows = row[0]
|
||||||
#cursor.close()
|
#cursor.close()
|
||||||
|
|
||||||
|
# Determine the database version
|
||||||
|
# 4 - Bluetooth Kobo Rev 2 (1.4)
|
||||||
|
# 8 - WIFI KOBO Rev 1
|
||||||
|
cursor.execute('select version from dbversion')
|
||||||
|
result = cursor.fetchone()
|
||||||
|
self.dbversion = result[0]
|
||||||
|
|
||||||
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||||
'ImageID, ReadStatus from content where BookID is Null'
|
'ImageID, ReadStatus from content where BookID is Null'
|
||||||
|
|
||||||
@ -153,7 +168,8 @@ class KOBO(USBMS):
|
|||||||
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
|
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
|
||||||
|
|
||||||
path = self.path_from_contentid(row[3], row[5], oncard)
|
path = self.path_from_contentid(row[3], row[5], oncard)
|
||||||
mime = mime_type_ext(path_to_ext(row[3]))
|
mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip'
|
||||||
|
# debug_print("mime:", mime)
|
||||||
|
|
||||||
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
|
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
|
||||||
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7])
|
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7])
|
||||||
@ -206,7 +222,7 @@ class KOBO(USBMS):
|
|||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
if ContentType == 6:
|
if ContentType == 6 and self.dbversion < 8:
|
||||||
# Delete the shortcover_pages first
|
# Delete the shortcover_pages first
|
||||||
cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
|
cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
|
||||||
|
|
||||||
@ -249,7 +265,7 @@ class KOBO(USBMS):
|
|||||||
path = self.normalize_path(path)
|
path = self.normalize_path(path)
|
||||||
# print "Delete file normalized path: " + path
|
# print "Delete file normalized path: " + path
|
||||||
extension = os.path.splitext(path)[1]
|
extension = os.path.splitext(path)[1]
|
||||||
ContentType = self.get_content_type_from_extension(extension)
|
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(path)
|
||||||
|
|
||||||
ContentID = self.contentid_from_path(path, ContentType)
|
ContentID = self.contentid_from_path(path, ContentType)
|
||||||
|
|
||||||
@ -332,9 +348,14 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
def contentid_from_path(self, path, ContentType):
|
def contentid_from_path(self, path, ContentType):
|
||||||
if ContentType == 6:
|
if ContentType == 6:
|
||||||
ContentID = os.path.splitext(path)[0]
|
if self.dbversion < 8:
|
||||||
# Remove the prefix on the file. it could be either
|
ContentID = os.path.splitext(path)[0]
|
||||||
ContentID = ContentID.replace(self._main_prefix, '')
|
# Remove the prefix on the file. it could be either
|
||||||
|
ContentID = ContentID.replace(self._main_prefix, '')
|
||||||
|
else:
|
||||||
|
ContentID = path
|
||||||
|
ContentID = ContentID.replace(self._main_prefix + '.kobo/kepub/', '')
|
||||||
|
|
||||||
if self._card_a_prefix is not None:
|
if self._card_a_prefix is not None:
|
||||||
ContentID = ContentID.replace(self._card_a_prefix, '')
|
ContentID = ContentID.replace(self._card_a_prefix, '')
|
||||||
elif ContentType == 999: # HTML Files
|
elif ContentType == 999: # HTML Files
|
||||||
@ -350,6 +371,13 @@ class KOBO(USBMS):
|
|||||||
ContentID = ContentID.replace("\\", '/')
|
ContentID = ContentID.replace("\\", '/')
|
||||||
return ContentID
|
return ContentID
|
||||||
|
|
||||||
|
def get_content_type_from_path(self, path):
|
||||||
|
# Strictly speaking the ContentType could be 6 or 10
|
||||||
|
# however newspapers have the same storage format
|
||||||
|
if path.find('kepub') >= 0:
|
||||||
|
ContentType = 6
|
||||||
|
return ContentType
|
||||||
|
|
||||||
def get_content_type_from_extension(self, extension):
|
def get_content_type_from_extension(self, extension):
|
||||||
if extension == '.kobo':
|
if extension == '.kobo':
|
||||||
# Kobo books do not have book files. They do have some images though
|
# Kobo books do not have book files. They do have some images though
|
||||||
@ -369,19 +397,22 @@ class KOBO(USBMS):
|
|||||||
print 'path from_contentid cardb'
|
print 'path from_contentid cardb'
|
||||||
elif oncard == 'carda':
|
elif oncard == 'carda':
|
||||||
path = path.replace("file:///mnt/sd/", self._card_a_prefix)
|
path = path.replace("file:///mnt/sd/", self._card_a_prefix)
|
||||||
# print "SD Card: " + filename
|
# print "SD Card: " + path
|
||||||
else:
|
else:
|
||||||
if ContentType == "6":
|
if ContentType == "6" and self.dbversion < 8:
|
||||||
# This is a hack as the kobo files do not exist
|
# This is a hack as the kobo files do not exist
|
||||||
# but the path is required to make a unique id
|
# but the path is required to make a unique id
|
||||||
# for calibre's reference
|
# for calibre's reference
|
||||||
path = self._main_prefix + path + '.kobo'
|
path = self._main_prefix + path + '.kobo'
|
||||||
# print "Path: " + path
|
# print "Path: " + path
|
||||||
|
elif (ContentType == "6" or ContentType == "10") and self.dbversion >= 8:
|
||||||
|
path = self._main_prefix + '.kobo/kepub/' + path
|
||||||
|
# print "Internal: " + path
|
||||||
else:
|
else:
|
||||||
# if path.startswith("file:///mnt/onboard/"):
|
# if path.startswith("file:///mnt/onboard/"):
|
||||||
path = path.replace("file:///mnt/onboard/", self._main_prefix)
|
path = path.replace("file:///mnt/onboard/", self._main_prefix)
|
||||||
path = path.replace("/mnt/onboard/", self._main_prefix)
|
path = path.replace("/mnt/onboard/", self._main_prefix)
|
||||||
# print "Internal: " + filename
|
# print "Internal: " + path
|
||||||
|
|
||||||
return path
|
return path
|
||||||
|
|
||||||
@ -469,7 +500,7 @@ class KOBO(USBMS):
|
|||||||
book.device_collections = ['Im_Reading']
|
book.device_collections = ['Im_Reading']
|
||||||
|
|
||||||
extension = os.path.splitext(book.path)[1]
|
extension = os.path.splitext(book.path)[1]
|
||||||
ContentType = self.get_content_type_from_extension(extension)
|
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||||
|
|
||||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||||
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||||
@ -505,7 +536,7 @@ class KOBO(USBMS):
|
|||||||
book.device_collections = ['Read']
|
book.device_collections = ['Read']
|
||||||
|
|
||||||
extension = os.path.splitext(book.path)[1]
|
extension = os.path.splitext(book.path)[1]
|
||||||
ContentType = self.get_content_type_from_extension(extension)
|
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||||
|
|
||||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||||
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||||
|
@ -72,6 +72,15 @@ class SWEEX(USBMS):
|
|||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class Q600(SWEEX):
|
||||||
|
|
||||||
|
name = 'Digma Q600 Device interface'
|
||||||
|
gui_name = 'Q600'
|
||||||
|
description = _('Communicate with the Digma Q600')
|
||||||
|
|
||||||
|
BCD = [0x325]
|
||||||
|
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
||||||
|
|
||||||
class KOGAN(SWEEX):
|
class KOGAN(SWEEX):
|
||||||
|
|
||||||
name = 'Kogan Device Interface'
|
name = 'Kogan Device Interface'
|
||||||
@ -102,7 +111,7 @@ class PDNOVEL(USBMS):
|
|||||||
DELETE_EXTS = ['.jpg', '.jpeg', '.png']
|
DELETE_EXTS = ['.jpg', '.jpeg', '.png']
|
||||||
|
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
coverdata = getattr(metadata, 'thumbnail', None)
|
coverdata = getattr(metadata, 'thumbnail', None)
|
||||||
if coverdata and coverdata[2]:
|
if coverdata and coverdata[2]:
|
||||||
with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile:
|
with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile:
|
||||||
@ -117,6 +126,12 @@ class PDNOVEL_KOBO(PDNOVEL):
|
|||||||
|
|
||||||
EBOOK_DIR_MAIN = 'eBooks/Kobo'
|
EBOOK_DIR_MAIN = 'eBooks/Kobo'
|
||||||
|
|
||||||
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
|
coverdata = getattr(metadata, 'thumbnail', None)
|
||||||
|
if coverdata and coverdata[2]:
|
||||||
|
with open(os.path.join(path, '.thumbnail', filename+'.jpg'), 'wb') as coverfile:
|
||||||
|
coverfile.write(coverdata[2])
|
||||||
|
|
||||||
|
|
||||||
class VELOCITYMICRO(USBMS):
|
class VELOCITYMICRO(USBMS):
|
||||||
name = 'VelocityMicro device interface'
|
name = 'VelocityMicro device interface'
|
||||||
|
@ -36,15 +36,15 @@ class N770(USBMS):
|
|||||||
|
|
||||||
class N810(N770):
|
class N810(N770):
|
||||||
name = 'Nokia 810 Device Interface'
|
name = 'Nokia 810 Device Interface'
|
||||||
gui_name = 'Nokia 810'
|
gui_name = 'Nokia 810/900'
|
||||||
description = _('Communicate with the Nokia 810 internet tablet.')
|
description = _('Communicate with the Nokia 810/900 internet tablet.')
|
||||||
|
|
||||||
PRODUCT_ID = [0x96]
|
PRODUCT_ID = [0x96, 0x1c7]
|
||||||
BCD = [0x316]
|
BCD = [0x316]
|
||||||
|
|
||||||
WINDOWS_MAIN_MEM = 'N810'
|
WINDOWS_MAIN_MEM = ['N810', 'N900']
|
||||||
|
|
||||||
MAIN_MEMORY_VOLUME_LABEL = 'N810 Main Memory'
|
MAIN_MEMORY_VOLUME_LABEL = 'Nokia Tablet Main Memory'
|
||||||
|
|
||||||
class E71X(USBMS):
|
class E71X(USBMS):
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ class NOOK(USBMS):
|
|||||||
DELETE_EXTS = ['.jpg']
|
DELETE_EXTS = ['.jpg']
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
try:
|
try:
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
Image, ImageDraw
|
Image, ImageDraw
|
||||||
|
@ -2,5 +2,11 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
MEDIA_XML = 'database/cache/media.xml'
|
MEDIA_XML = 'database/cache/media.xml'
|
||||||
|
MEDIA_EXT = 'database/cache/cacheExt.xml'
|
||||||
|
|
||||||
CACHE_XML = 'Sony Reader/database/cache.xml'
|
CACHE_XML = 'Sony Reader/database/cache.xml'
|
||||||
|
CACHE_EXT = 'Sony Reader/database/cacheExt.xml'
|
||||||
|
|
||||||
|
MEDIA_THUMBNAIL = 'database/thumbnail'
|
||||||
|
CACHE_THUMBNAIL = 'Sony Reader/database/thumbnail'
|
||||||
|
|
||||||
|
@ -9,10 +9,10 @@ Device driver for the SONY devices
|
|||||||
import os, time, re
|
import os, time, re
|
||||||
|
|
||||||
from calibre.devices.usbms.driver import USBMS, debug_print
|
from calibre.devices.usbms.driver import USBMS, debug_print
|
||||||
from calibre.devices.prs505 import MEDIA_XML
|
from calibre.devices.prs505 import MEDIA_XML, MEDIA_EXT, CACHE_XML, CACHE_EXT, \
|
||||||
from calibre.devices.prs505 import CACHE_XML
|
MEDIA_THUMBNAIL, CACHE_THUMBNAIL
|
||||||
from calibre.devices.prs505.sony_cache import XMLCache
|
from calibre.devices.prs505.sony_cache import XMLCache
|
||||||
from calibre import __appname__
|
from calibre import __appname__, prints
|
||||||
from calibre.devices.usbms.books import CollectionsBookList
|
from calibre.devices.usbms.books import CollectionsBookList
|
||||||
|
|
||||||
class PRS505(USBMS):
|
class PRS505(USBMS):
|
||||||
@ -66,6 +66,8 @@ class PRS505(USBMS):
|
|||||||
plugboard = None
|
plugboard = None
|
||||||
plugboard_func = None
|
plugboard_func = None
|
||||||
|
|
||||||
|
THUMBNAIL_HEIGHT = 200
|
||||||
|
|
||||||
def windows_filter_pnp_id(self, pnp_id):
|
def windows_filter_pnp_id(self, pnp_id):
|
||||||
return '_LAUNCHER' in pnp_id
|
return '_LAUNCHER' in pnp_id
|
||||||
|
|
||||||
@ -116,20 +118,21 @@ class PRS505(USBMS):
|
|||||||
return fname
|
return fname
|
||||||
|
|
||||||
def initialize_XML_cache(self):
|
def initialize_XML_cache(self):
|
||||||
paths, prefixes = {}, {}
|
paths, prefixes, ext_paths = {}, {}, {}
|
||||||
for prefix, path, source_id in [
|
for prefix, path, ext_path, source_id in [
|
||||||
('main', MEDIA_XML, 0),
|
('main', MEDIA_XML, MEDIA_EXT, 0),
|
||||||
('card_a', CACHE_XML, 1),
|
('card_a', CACHE_XML, CACHE_EXT, 1),
|
||||||
('card_b', CACHE_XML, 2)
|
('card_b', CACHE_XML, CACHE_EXT, 2)
|
||||||
]:
|
]:
|
||||||
prefix = getattr(self, '_%s_prefix'%prefix)
|
prefix = getattr(self, '_%s_prefix'%prefix)
|
||||||
if prefix is not None and os.path.exists(prefix):
|
if prefix is not None and os.path.exists(prefix):
|
||||||
paths[source_id] = os.path.join(prefix, *(path.split('/')))
|
paths[source_id] = os.path.join(prefix, *(path.split('/')))
|
||||||
|
ext_paths[source_id] = os.path.join(prefix, *(ext_path.split('/')))
|
||||||
prefixes[source_id] = prefix
|
prefixes[source_id] = prefix
|
||||||
d = os.path.dirname(paths[source_id])
|
d = os.path.dirname(paths[source_id])
|
||||||
if not os.path.exists(d):
|
if not os.path.exists(d):
|
||||||
os.makedirs(d)
|
os.makedirs(d)
|
||||||
return XMLCache(paths, prefixes, self.settings().use_author_sort)
|
return XMLCache(paths, ext_paths, prefixes, self.settings().use_author_sort)
|
||||||
|
|
||||||
def books(self, oncard=None, end_session=True):
|
def books(self, oncard=None, end_session=True):
|
||||||
debug_print('PRS505: starting fetching books for card', oncard)
|
debug_print('PRS505: starting fetching books for card', oncard)
|
||||||
@ -174,3 +177,31 @@ class PRS505(USBMS):
|
|||||||
def set_plugboards(self, plugboards, pb_func):
|
def set_plugboards(self, plugboards, pb_func):
|
||||||
self.plugboards = plugboards
|
self.plugboards = plugboards
|
||||||
self.plugboard_func = pb_func
|
self.plugboard_func = pb_func
|
||||||
|
|
||||||
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
|
if metadata.thumbnail and metadata.thumbnail[-1]:
|
||||||
|
path = path.replace('/', os.sep)
|
||||||
|
is_main = path.startswith(self._main_prefix)
|
||||||
|
thumbnail_dir = MEDIA_THUMBNAIL if is_main else CACHE_THUMBNAIL
|
||||||
|
prefix = None
|
||||||
|
if is_main:
|
||||||
|
prefix = self._main_prefix
|
||||||
|
else:
|
||||||
|
if self._card_a_prefix and \
|
||||||
|
path.startswith(self._card_a_prefix):
|
||||||
|
prefix = self._card_a_prefix
|
||||||
|
elif self._card_b_prefix and \
|
||||||
|
path.startswith(self._card_b_prefix):
|
||||||
|
prefix = self._card_b_prefix
|
||||||
|
if prefix is None:
|
||||||
|
prints('WARNING: Failed to find prefix for:', filepath)
|
||||||
|
return
|
||||||
|
thumbnail_dir = os.path.join(prefix, *thumbnail_dir.split('/'))
|
||||||
|
|
||||||
|
relpath = os.path.relpath(filepath, prefix)
|
||||||
|
thumbnail_dir = os.path.join(thumbnail_dir, relpath)
|
||||||
|
if not os.path.exists(thumbnail_dir):
|
||||||
|
os.makedirs(thumbnail_dir)
|
||||||
|
with open(os.path.join(thumbnail_dir, 'main_thumbnail.jpg'), 'wb') as f:
|
||||||
|
f.write(metadata.thumbnail[-1])
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ import os, time
|
|||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
from calibre import prints, guess_type, isbytestring
|
from calibre import prints, guess_type, isbytestring
|
||||||
from calibre.devices.errors import DeviceError
|
from calibre.devices.errors import DeviceError
|
||||||
@ -18,6 +19,20 @@ from calibre.ebooks.chardet import xml_to_unicode
|
|||||||
from calibre.ebooks.metadata import authors_to_string, title_sort, \
|
from calibre.ebooks.metadata import authors_to_string, title_sort, \
|
||||||
authors_to_sort_string
|
authors_to_sort_string
|
||||||
|
|
||||||
|
'''
|
||||||
|
cahceExt.xml
|
||||||
|
|
||||||
|
Periodical identifier sample from a PRS-650:
|
||||||
|
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
|
||||||
|
<text conformsTo="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0" periodicalName="The Atlantic" description="Current affairs and politics focussed on the US" publicationDate="Tue, 19 Oct 2010 00:00:00 GMT" path="database/media/books/calibre/Atlantic [Mon, 18 Oct 2010], The - calibre_1701.epub">
|
||||||
|
<thumbnail width="167" height="217">main_thumbnail.jpg</thumbnail>
|
||||||
|
</text>
|
||||||
|
</cacheExt>
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
# Utility functions {{{
|
# Utility functions {{{
|
||||||
EMPTY_CARD_CACHE = '''\
|
EMPTY_CARD_CACHE = '''\
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
@ -25,6 +40,12 @@ EMPTY_CARD_CACHE = '''\
|
|||||||
</cache>
|
</cache>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
EMPTY_EXT_CACHE = '''\
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
|
||||||
|
</cacheExt>
|
||||||
|
'''
|
||||||
|
|
||||||
MIME_MAP = {
|
MIME_MAP = {
|
||||||
"lrf" : "application/x-sony-bbeb",
|
"lrf" : "application/x-sony-bbeb",
|
||||||
'lrx' : 'application/x-sony-bbeb',
|
'lrx' : 'application/x-sony-bbeb',
|
||||||
@ -63,7 +84,7 @@ def uuid():
|
|||||||
|
|
||||||
class XMLCache(object):
|
class XMLCache(object):
|
||||||
|
|
||||||
def __init__(self, paths, prefixes, use_author_sort):
|
def __init__(self, paths, ext_paths, prefixes, use_author_sort):
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
debug_print('Building XMLCache...', paths)
|
debug_print('Building XMLCache...', paths)
|
||||||
self.paths = paths
|
self.paths = paths
|
||||||
@ -76,8 +97,8 @@ class XMLCache(object):
|
|||||||
for source_id, path in paths.items():
|
for source_id, path in paths.items():
|
||||||
if source_id == 0:
|
if source_id == 0:
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
raise DeviceError('The SONY XML cache media.xml does not exist. Try'
|
raise DeviceError(('The SONY XML cache %r does not exist. Try'
|
||||||
' disconnecting and reconnecting your reader.')
|
' disconnecting and reconnecting your reader.')%repr(path))
|
||||||
with open(path, 'rb') as f:
|
with open(path, 'rb') as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
else:
|
else:
|
||||||
@ -85,14 +106,34 @@ class XMLCache(object):
|
|||||||
if os.access(path, os.R_OK):
|
if os.access(path, os.R_OK):
|
||||||
with open(path, 'rb') as f:
|
with open(path, 'rb') as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
|
|
||||||
self.roots[source_id] = etree.fromstring(xml_to_unicode(
|
self.roots[source_id] = etree.fromstring(xml_to_unicode(
|
||||||
raw, strip_encoding_pats=True, assume_utf8=True,
|
raw, strip_encoding_pats=True, assume_utf8=True,
|
||||||
verbose=DEBUG)[0],
|
verbose=DEBUG)[0],
|
||||||
parser=parser)
|
parser=parser)
|
||||||
if self.roots[source_id] is None:
|
if self.roots[source_id] is None:
|
||||||
raise Exception(('The SONY database at %s is corrupted. Try '
|
raise Exception(('The SONY database at %r is corrupted. Try '
|
||||||
' disconnecting and reconnecting your reader.')%path)
|
' disconnecting and reconnecting your reader.')%path)
|
||||||
|
|
||||||
|
self.ext_paths, self.ext_roots = {}, {}
|
||||||
|
for source_id, path in ext_paths.items():
|
||||||
|
if not os.path.exists(path):
|
||||||
|
try:
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(EMPTY_EXT_CACHE)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if os.access(path, os.W_OK):
|
||||||
|
try:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
self.ext_roots[source_id] = etree.fromstring(
|
||||||
|
xml_to_unicode(f.read(),
|
||||||
|
strip_encoding_pats=True, assume_utf8=True,
|
||||||
|
verbose=DEBUG)[0], parser=parser)
|
||||||
|
self.ext_paths[source_id] = path
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
recs = self.roots[0].xpath('//*[local-name()="records"]')
|
recs = self.roots[0].xpath('//*[local-name()="records"]')
|
||||||
@ -352,12 +393,18 @@ class XMLCache(object):
|
|||||||
debug_print('Updating XML Cache:', i)
|
debug_print('Updating XML Cache:', i)
|
||||||
root = self.record_roots[i]
|
root = self.record_roots[i]
|
||||||
lpath_map = self.build_lpath_map(root)
|
lpath_map = self.build_lpath_map(root)
|
||||||
|
ext_root = self.ext_roots[i] if i in self.ext_roots else None
|
||||||
|
ext_lpath_map = None
|
||||||
|
if ext_root is not None:
|
||||||
|
ext_lpath_map = self.build_lpath_map(ext_root)
|
||||||
gtz_count = ltz_count = 0
|
gtz_count = ltz_count = 0
|
||||||
use_tz_var = False
|
use_tz_var = False
|
||||||
for book in booklist:
|
for book in booklist:
|
||||||
path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
|
path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
|
||||||
record = lpath_map.get(book.lpath, None)
|
record = lpath_map.get(book.lpath, None)
|
||||||
|
created = False
|
||||||
if record is None:
|
if record is None:
|
||||||
|
created = True
|
||||||
record = self.create_text_record(root, i, book.lpath)
|
record = self.create_text_record(root, i, book.lpath)
|
||||||
if plugboard is not None:
|
if plugboard is not None:
|
||||||
newmi = book.deepcopy_metadata()
|
newmi = book.deepcopy_metadata()
|
||||||
@ -373,6 +420,13 @@ class XMLCache(object):
|
|||||||
if book.device_collections is None:
|
if book.device_collections is None:
|
||||||
book.device_collections = []
|
book.device_collections = []
|
||||||
book.device_collections = playlist_map.get(book.lpath, [])
|
book.device_collections = playlist_map.get(book.lpath, [])
|
||||||
|
|
||||||
|
if created and ext_root is not None and \
|
||||||
|
ext_lpath_map.get(book.lpath, None) is None:
|
||||||
|
ext_record = self.create_ext_text_record(ext_root, i,
|
||||||
|
book.lpath, book.thumbnail)
|
||||||
|
self.periodicalize_book(book, ext_record)
|
||||||
|
|
||||||
debug_print('Timezone votes: %d GMT, %d LTZ, use_tz_var=%s'%
|
debug_print('Timezone votes: %d GMT, %d LTZ, use_tz_var=%s'%
|
||||||
(gtz_count, ltz_count, use_tz_var))
|
(gtz_count, ltz_count, use_tz_var))
|
||||||
self.update_playlists(i, root, booklist, collections_attributes)
|
self.update_playlists(i, root, booklist, collections_attributes)
|
||||||
@ -386,6 +440,47 @@ class XMLCache(object):
|
|||||||
self.fix_ids()
|
self.fix_ids()
|
||||||
debug_print('Finished update')
|
debug_print('Finished update')
|
||||||
|
|
||||||
|
def is_sony_periodical(self, book):
|
||||||
|
if _('News') not in book.tags:
|
||||||
|
return False
|
||||||
|
if not book.lpath.lower().endswith('.epub'):
|
||||||
|
return False
|
||||||
|
if book.pubdate.date() < date(2010, 10, 17):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def periodicalize_book(self, book, record):
|
||||||
|
if not self.is_sony_periodical(book):
|
||||||
|
return
|
||||||
|
record.set('conformsTo',
|
||||||
|
"http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0")
|
||||||
|
|
||||||
|
record.set('description', '')
|
||||||
|
|
||||||
|
name = None
|
||||||
|
if '[' in book.title:
|
||||||
|
name = book.title.split('[')[0].strip()
|
||||||
|
if len(name) < 4:
|
||||||
|
name = None
|
||||||
|
if not name:
|
||||||
|
try:
|
||||||
|
name = [t for t in book.tags if t != _('News')][0]
|
||||||
|
except:
|
||||||
|
name = None
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
name = book.title
|
||||||
|
|
||||||
|
record.set('periodicalName', name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
pubdate = strftime(book.pubdate.utctimetuple(),
|
||||||
|
zone=lambda x : x)
|
||||||
|
record.set('publicationDate', pubdate)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def rebuild_collections(self, booklist, bl_index):
|
def rebuild_collections(self, booklist, bl_index):
|
||||||
if bl_index not in self.record_roots:
|
if bl_index not in self.record_roots:
|
||||||
return
|
return
|
||||||
@ -472,6 +567,28 @@ class XMLCache(object):
|
|||||||
root.append(ans)
|
root.append(ans)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def create_ext_text_record(self, root, bl_id, lpath, thumbnail):
|
||||||
|
namespace = root.nsmap[None]
|
||||||
|
attrib = { 'path': lpath }
|
||||||
|
ans = root.makeelement('{%s}text'%namespace, attrib=attrib,
|
||||||
|
nsmap=root.nsmap)
|
||||||
|
ans.tail = '\n'
|
||||||
|
if len(root) > 0:
|
||||||
|
root[-1].tail = '\n\t'
|
||||||
|
else:
|
||||||
|
root.text = '\n\t'
|
||||||
|
root.append(ans)
|
||||||
|
if thumbnail and thumbnail[-1]:
|
||||||
|
ans.text = '\n' + '\t\t'
|
||||||
|
t = root.makeelement('{%s}thumbnail'%namespace,
|
||||||
|
attrib={'width':str(thumbnail[0]), 'height':str(thumbnail[1])},
|
||||||
|
nsmap=root.nsmap)
|
||||||
|
t.text = 'main_thumbnail.jpg'
|
||||||
|
ans.append(t)
|
||||||
|
t.tail = '\n\t'
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def update_text_record(self, record, book, path, bl_index,
|
def update_text_record(self, record, book, path, bl_index,
|
||||||
gtz_count, ltz_count, use_tz_var):
|
gtz_count, ltz_count, use_tz_var):
|
||||||
'''
|
'''
|
||||||
@ -589,6 +706,18 @@ class XMLCache(object):
|
|||||||
'<?xml version="1.0" encoding="UTF-8"?>')
|
'<?xml version="1.0" encoding="UTF-8"?>')
|
||||||
with open(path, 'wb') as f:
|
with open(path, 'wb') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
|
||||||
|
for i, path in self.ext_paths.items():
|
||||||
|
try:
|
||||||
|
raw = etree.tostring(self.ext_roots[i], encoding='UTF-8',
|
||||||
|
xml_declaration=True)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>",
|
||||||
|
'<?xml version="1.0" encoding="UTF-8"?>')
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(raw)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Utility methods {{{
|
# Utility methods {{{
|
||||||
|
@ -41,6 +41,15 @@ class NEWSMY(TECLAST_K3):
|
|||||||
WINDOWS_MAIN_MEM = 'NEWSMY'
|
WINDOWS_MAIN_MEM = 'NEWSMY'
|
||||||
WINDOWS_CARD_A_MEM = 'USBDISK____SD'
|
WINDOWS_CARD_A_MEM = 'USBDISK____SD'
|
||||||
|
|
||||||
|
class PICO(NEWSMY):
|
||||||
|
name = 'Pico device interface'
|
||||||
|
gui_name = 'Pico'
|
||||||
|
description = _('Communicate with the Pico reader.')
|
||||||
|
|
||||||
|
WINDOWS_MAIN_MEM = 'USBDISK__USER'
|
||||||
|
EBOOK_DIR_MAIN = 'Books'
|
||||||
|
FORMATS = ['EPUB', 'FB2', 'TXT', 'LRC', 'PDB', 'PDF', 'HTML', 'WTXT']
|
||||||
|
|
||||||
class IPAPYRUS(TECLAST_K3):
|
class IPAPYRUS(TECLAST_K3):
|
||||||
|
|
||||||
name = 'iPapyrus device interface'
|
name = 'iPapyrus device interface'
|
||||||
|
@ -5,8 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import dbus
|
import dbus, os
|
||||||
import os
|
|
||||||
|
|
||||||
def node_mountpoint(node):
|
def node_mountpoint(node):
|
||||||
|
|
||||||
@ -56,15 +55,6 @@ class UDisks(object):
|
|||||||
parent = device_node_path
|
parent = device_node_path
|
||||||
while parent[-1] in '0123456789':
|
while parent[-1] in '0123456789':
|
||||||
parent = parent[:-1]
|
parent = parent[:-1]
|
||||||
devices = [str(x) for x in self.main.EnumerateDeviceFiles()]
|
|
||||||
for d in devices:
|
|
||||||
if d.startswith(parent) and d != parent:
|
|
||||||
try:
|
|
||||||
self.unmount(d)
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
print 'Failed to unmount:', d
|
|
||||||
traceback.print_exc()
|
|
||||||
d = self.device(parent)
|
d = self.device(parent)
|
||||||
d.DriveEject([])
|
d.DriveEject([])
|
||||||
|
|
||||||
@ -76,13 +66,19 @@ def eject(node_path):
|
|||||||
u = UDisks()
|
u = UDisks()
|
||||||
u.eject(node_path)
|
u.eject(node_path)
|
||||||
|
|
||||||
|
def umount(node_path):
|
||||||
|
u = UDisks()
|
||||||
|
u.unmount(node_path)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
dev = sys.argv[1]
|
dev = sys.argv[1]
|
||||||
print 'Testing with node', dev
|
print 'Testing with node', dev
|
||||||
u = UDisks()
|
u = UDisks()
|
||||||
print 'Mounted at:', u.mount(dev)
|
print 'Mounted at:', u.mount(dev)
|
||||||
print 'Ejecting'
|
print 'Unmounting'
|
||||||
|
u.unmount(dev)
|
||||||
|
print 'Ejecting:'
|
||||||
u.eject(dev)
|
u.eject(dev)
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,6 +99,13 @@ class CollectionsBookList(BookList):
|
|||||||
def supports_collections(self):
|
def supports_collections(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def in_category_sort_rules(self, attr):
|
||||||
|
sorts = tweaks['sony_collection_sorting_rules']
|
||||||
|
for attrs,sortattr in sorts:
|
||||||
|
if attr in attrs or '*' in attrs:
|
||||||
|
return sortattr
|
||||||
|
return None
|
||||||
|
|
||||||
def compute_category_name(self, attr, category, field_meta):
|
def compute_category_name(self, attr, category, field_meta):
|
||||||
renames = tweaks['sony_collection_renaming_rules']
|
renames = tweaks['sony_collection_renaming_rules']
|
||||||
attr_name = renames.get(attr, None)
|
attr_name = renames.get(attr, None)
|
||||||
@ -116,6 +123,7 @@ class CollectionsBookList(BookList):
|
|||||||
from calibre.devices.usbms.driver import debug_print
|
from calibre.devices.usbms.driver import debug_print
|
||||||
debug_print('Starting get_collections:', prefs['manage_device_metadata'])
|
debug_print('Starting get_collections:', prefs['manage_device_metadata'])
|
||||||
debug_print('Renaming rules:', tweaks['sony_collection_renaming_rules'])
|
debug_print('Renaming rules:', tweaks['sony_collection_renaming_rules'])
|
||||||
|
debug_print('Sorting rules:', tweaks['sony_collection_sorting_rules'])
|
||||||
|
|
||||||
# Complexity: we can use renaming rules only when using automatic
|
# Complexity: we can use renaming rules only when using automatic
|
||||||
# management. Otherwise we don't always have the metadata to make the
|
# management. Otherwise we don't always have the metadata to make the
|
||||||
@ -171,6 +179,7 @@ class CollectionsBookList(BookList):
|
|||||||
else:
|
else:
|
||||||
val = [val]
|
val = [val]
|
||||||
|
|
||||||
|
sort_attr = self.in_category_sort_rules(attr)
|
||||||
for category in val:
|
for category in val:
|
||||||
is_series = False
|
is_series = False
|
||||||
if doing_dc:
|
if doing_dc:
|
||||||
@ -199,22 +208,41 @@ class CollectionsBookList(BookList):
|
|||||||
|
|
||||||
if cat_name not in collections:
|
if cat_name not in collections:
|
||||||
collections[cat_name] = {}
|
collections[cat_name] = {}
|
||||||
if is_series:
|
if use_renaming_rules and sort_attr:
|
||||||
|
sort_val = book.get(sort_attr, None)
|
||||||
|
collections[cat_name][lpath] = \
|
||||||
|
(book, sort_val, book.get('title_sort', 'zzzz'))
|
||||||
|
elif is_series:
|
||||||
if doing_dc:
|
if doing_dc:
|
||||||
collections[cat_name][lpath] = \
|
collections[cat_name][lpath] = \
|
||||||
(book, book.get('series_index', sys.maxint))
|
(book, book.get('series_index', sys.maxint), '')
|
||||||
else:
|
else:
|
||||||
collections[cat_name][lpath] = \
|
collections[cat_name][lpath] = \
|
||||||
(book, book.get(attr+'_index', sys.maxint))
|
(book, book.get(attr+'_index', sys.maxint), '')
|
||||||
else:
|
else:
|
||||||
if lpath not in collections[cat_name]:
|
if lpath not in collections[cat_name]:
|
||||||
collections[cat_name][lpath] = \
|
collections[cat_name][lpath] = \
|
||||||
(book, book.get('title_sort', 'zzzz'))
|
(book, book.get('title_sort', 'zzzz'), '')
|
||||||
# Sort collections
|
# Sort collections
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
|
def none_cmp(xx, yy):
|
||||||
|
x = xx[1]
|
||||||
|
y = yy[1]
|
||||||
|
if x is None and y is None:
|
||||||
|
return cmp(xx[2], yy[2])
|
||||||
|
if x is None:
|
||||||
|
return 1
|
||||||
|
if y is None:
|
||||||
|
return -1
|
||||||
|
c = cmp(x, y)
|
||||||
|
if c != 0:
|
||||||
|
return c
|
||||||
|
return cmp(xx[2], yy[2])
|
||||||
|
|
||||||
for category, lpaths in collections.items():
|
for category, lpaths in collections.items():
|
||||||
books = lpaths.values()
|
books = lpaths.values()
|
||||||
books.sort(cmp=lambda x,y:cmp(x[1], y[1]))
|
books.sort(cmp=none_cmp)
|
||||||
result[category] = [x[0] for x in books]
|
result[category] = [x[0] for x in books]
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -523,7 +523,8 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
devnodes.append(node)
|
devnodes.append(node)
|
||||||
|
|
||||||
devnodes += list(repeat(None, 3))
|
devnodes += list(repeat(None, 3))
|
||||||
ans = tuple(['/dev/'+x if ok.get(x, False) else None for x in devnodes[:3]])
|
ans = ['/dev/'+x if ok.get(x, False) else None for x in devnodes[:3]]
|
||||||
|
ans.sort(key=lambda x: x[5:] if x else 'zzzzz')
|
||||||
return self.linux_swap_drives(ans)
|
return self.linux_swap_drives(ans)
|
||||||
|
|
||||||
def linux_swap_drives(self, drives):
|
def linux_swap_drives(self, drives):
|
||||||
@ -732,24 +733,36 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def eject_linux(self):
|
def eject_linux(self):
|
||||||
try:
|
from calibre.devices.udisks import eject, umount
|
||||||
from calibre.devices.udisks import eject
|
drives = [d for d in self.find_device_nodes() if d]
|
||||||
return eject(self._linux_main_device_node)
|
for d in drives:
|
||||||
except:
|
try:
|
||||||
pass
|
umount(d)
|
||||||
drives = self.find_device_nodes()
|
except:
|
||||||
|
pass
|
||||||
|
failures = False
|
||||||
|
for d in drives:
|
||||||
|
try:
|
||||||
|
eject(d)
|
||||||
|
except Exception, e:
|
||||||
|
print 'Udisks eject call for:', d, 'failed:'
|
||||||
|
print '\t', e
|
||||||
|
failures = True
|
||||||
|
|
||||||
|
if not failures:
|
||||||
|
return
|
||||||
|
|
||||||
for drive in drives:
|
for drive in drives:
|
||||||
if drive:
|
cmd = 'calibre-mount-helper'
|
||||||
cmd = 'calibre-mount-helper'
|
if getattr(sys, 'frozen_path', False):
|
||||||
if getattr(sys, 'frozen_path', False):
|
cmd = os.path.join(sys.frozen_path, cmd)
|
||||||
cmd = os.path.join(sys.frozen_path, cmd)
|
cmd = [cmd, 'eject']
|
||||||
cmd = [cmd, 'eject']
|
mp = getattr(self, "_linux_mount_map", {}).get(drive,
|
||||||
mp = getattr(self, "_linux_mount_map", {}).get(drive,
|
'dummy/')[:-1]
|
||||||
'dummy/')[:-1]
|
try:
|
||||||
try:
|
subprocess.Popen(cmd + [drive, mp]).wait()
|
||||||
subprocess.Popen(cmd + [drive, mp]).wait()
|
except:
|
||||||
except:
|
pass
|
||||||
pass
|
|
||||||
|
|
||||||
def eject(self):
|
def eject(self):
|
||||||
if islinux:
|
if islinux:
|
||||||
|
@ -186,7 +186,8 @@ class USBMS(CLI, Device):
|
|||||||
self.put_file(infile, filepath, replace_file=True)
|
self.put_file(infile, filepath, replace_file=True)
|
||||||
try:
|
try:
|
||||||
self.upload_cover(os.path.dirname(filepath),
|
self.upload_cover(os.path.dirname(filepath),
|
||||||
os.path.splitext(os.path.basename(filepath))[0], mdata)
|
os.path.splitext(os.path.basename(filepath))[0],
|
||||||
|
mdata, filepath)
|
||||||
except: # Failure to upload cover is not catastrophic
|
except: # Failure to upload cover is not catastrophic
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
@ -197,14 +198,15 @@ class USBMS(CLI, Device):
|
|||||||
debug_print('USBMS: finished uploading %d books'%(len(files)))
|
debug_print('USBMS: finished uploading %d books'%(len(files)))
|
||||||
return zip(paths, cycle([on_card]))
|
return zip(paths, cycle([on_card]))
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
'''
|
'''
|
||||||
Upload book cover to the device. Default implementation does nothing.
|
Upload book cover to the device. Default implementation does nothing.
|
||||||
|
|
||||||
:param path: the full path were the associated book is located.
|
:param path: The full path to the directory where the associated book is located.
|
||||||
:param filename: the name of the book file without the extension.
|
:param filename: The name of the book file without the extension.
|
||||||
:param metadata: metadata belonging to the book. Use metadata.thumbnail
|
:param metadata: metadata belonging to the book. Use metadata.thumbnail
|
||||||
for cover
|
for cover
|
||||||
|
:param filepath: The full path to the ebook file
|
||||||
|
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
@ -30,9 +30,9 @@ def detect(aBuf):
|
|||||||
|
|
||||||
# Added by Kovid
|
# Added by Kovid
|
||||||
ENCODING_PATS = [
|
ENCODING_PATS = [
|
||||||
re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>',
|
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
|
||||||
re.IGNORECASE),
|
re.IGNORECASE),
|
||||||
re.compile(r'''<meta\s+?[^<>]+?content=['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
|
re.compile(r'''<meta\s+?[^<>]+?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
]
|
]
|
||||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||||
|
@ -14,7 +14,7 @@ from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
|||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre import extract, walk
|
from calibre import extract, walk, isbytestring, filesystem_encoding
|
||||||
from calibre.constants import __version__
|
from calibre.constants import __version__
|
||||||
|
|
||||||
DEBUG_README=u'''
|
DEBUG_README=u'''
|
||||||
@ -77,6 +77,10 @@ class Plumber(object):
|
|||||||
:param input: Path to input file.
|
:param input: Path to input file.
|
||||||
:param output: Path to output file/directory
|
:param output: Path to output file/directory
|
||||||
'''
|
'''
|
||||||
|
if isbytestring(input):
|
||||||
|
input = input.decode(filesystem_encoding)
|
||||||
|
if isbytestring(output):
|
||||||
|
output = output.decode(filesystem_encoding)
|
||||||
self.original_input_arg = input
|
self.original_input_arg = input
|
||||||
self.input = os.path.abspath(input)
|
self.input = os.path.abspath(input)
|
||||||
self.output = os.path.abspath(output)
|
self.output = os.path.abspath(output)
|
||||||
|
@ -15,22 +15,30 @@ def rules(stylesheets):
|
|||||||
if r.type == r.STYLE_RULE:
|
if r.type == r.STYLE_RULE:
|
||||||
yield r
|
yield r
|
||||||
|
|
||||||
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
def initialize_container(path_to_container, opf_name='metadata.opf',
|
||||||
|
extra_entries=[]):
|
||||||
'''
|
'''
|
||||||
Create an empty EPUB document, with a default skeleton.
|
Create an empty EPUB document, with a default skeleton.
|
||||||
'''
|
'''
|
||||||
CONTAINER='''\
|
rootfiles = ''
|
||||||
|
for path, mimetype, _ in extra_entries:
|
||||||
|
rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
|
||||||
|
path, mimetype)
|
||||||
|
CONTAINER = u'''\
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||||
<rootfiles>
|
<rootfiles>
|
||||||
<rootfile full-path="%s" media-type="application/oebps-package+xml"/>
|
<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
|
||||||
|
{extra_entries}
|
||||||
</rootfiles>
|
</rootfiles>
|
||||||
</container>
|
</container>
|
||||||
'''%opf_name
|
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
|
||||||
zf = ZipFile(path_to_container, 'w')
|
zf = ZipFile(path_to_container, 'w')
|
||||||
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
||||||
zf.writestr('META-INF/', '', 0700)
|
zf.writestr('META-INF/', '', 0700)
|
||||||
zf.writestr('META-INF/container.xml', CONTAINER)
|
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||||
|
for path, _, data in extra_entries:
|
||||||
|
zf.writestr(path, data)
|
||||||
return zf
|
return zf
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,6 +108,27 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
open('calibre_raster_cover.jpg', 'wb').write(
|
open('calibre_raster_cover.jpg', 'wb').write(
|
||||||
renderer)
|
renderer)
|
||||||
|
|
||||||
|
def find_opf(self):
|
||||||
|
def attr(n, attr):
|
||||||
|
for k, v in n.attrib.items():
|
||||||
|
if k.endswith(attr):
|
||||||
|
return v
|
||||||
|
try:
|
||||||
|
with open('META-INF/container.xml') as f:
|
||||||
|
root = etree.fromstring(f.read())
|
||||||
|
for r in root.xpath('//*[local-name()="rootfile"]'):
|
||||||
|
if attr(r, 'media-type') != "application/oebps-package+xml":
|
||||||
|
continue
|
||||||
|
path = attr(r, 'full-path')
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
path = os.path.join(os.getcwdu(), *path.split('/'))
|
||||||
|
if os.path.exists(path):
|
||||||
|
return path
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre import walk
|
from calibre import walk
|
||||||
@ -116,12 +137,13 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
zf = ZipFile(stream)
|
zf = ZipFile(stream)
|
||||||
zf.extractall(os.getcwd())
|
zf.extractall(os.getcwd())
|
||||||
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
|
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
|
||||||
opf = None
|
opf = self.find_opf()
|
||||||
for f in walk(u'.'):
|
if opf is None:
|
||||||
if f.lower().endswith('.opf') and '__MACOSX' not in f and \
|
for f in walk(u'.'):
|
||||||
not os.path.basename(f).startswith('.'):
|
if f.lower().endswith('.opf') and '__MACOSX' not in f and \
|
||||||
opf = os.path.abspath(f)
|
not os.path.basename(f).startswith('.'):
|
||||||
break
|
opf = os.path.abspath(f)
|
||||||
|
break
|
||||||
path = getattr(stream, 'name', 'stream')
|
path = getattr(stream, 'name', 'stream')
|
||||||
|
|
||||||
if opf is None:
|
if opf is None:
|
||||||
|
@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def workaround_webkit_quirks(self): # {{{
|
def workaround_webkit_quirks(self): # {{{
|
||||||
from calibre.ebooks.oeb.base import XPath
|
from calibre.ebooks.oeb.base import XPath
|
||||||
for x in self.oeb.spine:
|
for x in self.oeb.spine:
|
||||||
@ -183,6 +184,13 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
with TemporaryDirectory('_epub_output') as tdir:
|
with TemporaryDirectory('_epub_output') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
|
metadata_xml = None
|
||||||
|
extra_entries = []
|
||||||
|
if self.is_periodical:
|
||||||
|
if self.opts.output_profile.epub_periodical_format == 'sony':
|
||||||
|
from calibre.ebooks.epub.periodical import sony_metadata
|
||||||
|
metadata_xml, atom_xml = sony_metadata(oeb)
|
||||||
|
extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
|
||||||
oeb_output = plugin_for_output_format('oeb')
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
||||||
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||||
@ -194,10 +202,14 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||||
|
|
||||||
from calibre.ebooks.epub import initialize_container
|
from calibre.ebooks.epub import initialize_container
|
||||||
epub = initialize_container(output_path, os.path.basename(opf))
|
epub = initialize_container(output_path, os.path.basename(opf),
|
||||||
|
extra_entries=extra_entries)
|
||||||
epub.add_dir(tdir)
|
epub.add_dir(tdir)
|
||||||
if encryption is not None:
|
if encryption is not None:
|
||||||
epub.writestr('META-INF/encryption.xml', encryption)
|
epub.writestr('META-INF/encryption.xml', encryption)
|
||||||
|
if metadata_xml is not None:
|
||||||
|
epub.writestr('META-INF/metadata.xml',
|
||||||
|
metadata_xml.encode('utf-8'))
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
if os.path.exists(opts.extract_to):
|
if os.path.exists(opts.extract_to):
|
||||||
shutil.rmtree(opts.extract_to)
|
shutil.rmtree(opts.extract_to)
|
||||||
|
173
src/calibre/ebooks/epub/periodical.py
Normal file
173
src/calibre/ebooks/epub/periodical.py
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from calibre.constants import __appname__, __version__
|
||||||
|
from calibre import strftime, prepare_string_for_xml as xml
|
||||||
|
|
||||||
|
SONY_METADATA = u'''\
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:prs="http://xmlns.sony.net/e-book/prs/">
|
||||||
|
<rdf:Description rdf:about="">
|
||||||
|
<dc:title>{title}</dc:title>
|
||||||
|
<dc:publisher>{publisher}</dc:publisher>
|
||||||
|
<dcterms:alternative>{short_title}</dcterms:alternative>
|
||||||
|
<dcterms:issued>{issue_date}</dcterms:issued>
|
||||||
|
<dc:language>{language}</dc:language>
|
||||||
|
<dcterms:conformsTo rdf:resource="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0"/>
|
||||||
|
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/newspaper"/>
|
||||||
|
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/periodical"/>
|
||||||
|
</rdf:Description>
|
||||||
|
</rdf:RDF>
|
||||||
|
'''
|
||||||
|
|
||||||
|
SONY_ATOM = u'''\
|
||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||||
|
xmlns:prs="http://xmlns.sony.net/e-book/prs/"
|
||||||
|
xmlns:media="http://video.search.yahoo.com/mrss"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||||
|
|
||||||
|
<title>{short_title}</title>
|
||||||
|
<updated>{updated}</updated>
|
||||||
|
<id>{id}</id>
|
||||||
|
{entries}
|
||||||
|
</feed>
|
||||||
|
'''
|
||||||
|
|
||||||
|
SONY_ATOM_SECTION = u'''\
|
||||||
|
<entry rdf:ID="{title}">
|
||||||
|
<title>{title}</title>
|
||||||
|
<link href="{href}"/>
|
||||||
|
<id>{id}</id>
|
||||||
|
<updated>{updated}</updated>
|
||||||
|
<summary>{desc}</summary>
|
||||||
|
<category term="{short_title}/{title}"
|
||||||
|
scheme="http://xmlns.sony.net/e-book/terms/" label="{title}"/>
|
||||||
|
<dc:type xsi:type="prs:datatype">newspaper/section</dc:type>
|
||||||
|
<dcterms:isReferencedBy rdf:resource=""/>
|
||||||
|
</entry>
|
||||||
|
'''
|
||||||
|
|
||||||
|
SONY_ATOM_ENTRY = u'''\
|
||||||
|
<entry>
|
||||||
|
<title>{title}</title>
|
||||||
|
<author><name>{author}</name></author>
|
||||||
|
<link href="{href}"/>
|
||||||
|
<id>{id}</id>
|
||||||
|
<updated>{updated}</updated>
|
||||||
|
<summary>{desc}</summary>
|
||||||
|
<category term="{short_title}/{section_title}"
|
||||||
|
scheme="http://xmlns.sony.net/e-book/terms/" label="{section_title}"/>
|
||||||
|
<dcterms:extent xsi:type="prs:word-count">{word_count}</dcterms:extent>
|
||||||
|
<dc:type xsi:type="prs:datatype">newspaper/article</dc:type>
|
||||||
|
<dcterms:isReferencedBy rdf:resource="#{section_title}"/>
|
||||||
|
</entry>
|
||||||
|
'''
|
||||||
|
|
||||||
|
def sony_metadata(oeb):
|
||||||
|
m = oeb.metadata
|
||||||
|
title = short_title = unicode(m.title[0])
|
||||||
|
publisher = __appname__ + ' ' + __version__
|
||||||
|
try:
|
||||||
|
pt = unicode(oeb.metadata.publication_type[0])
|
||||||
|
short_title = u':'.join(pt.split(':')[2:])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
date = unicode(m.date[0]).split('T')[0]
|
||||||
|
except:
|
||||||
|
date = strftime('%Y-%m-%d')
|
||||||
|
try:
|
||||||
|
language = unicode(m.language[0]).replace('_', '-')
|
||||||
|
except:
|
||||||
|
language = 'en'
|
||||||
|
short_title = xml(short_title, True)
|
||||||
|
|
||||||
|
metadata = SONY_METADATA.format(title=xml(title),
|
||||||
|
short_title=short_title,
|
||||||
|
publisher=xml(publisher), issue_date=xml(date),
|
||||||
|
language=xml(language))
|
||||||
|
|
||||||
|
updated = strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
|
||||||
|
def cal_id(x):
|
||||||
|
for k, v in x.attrib.items():
|
||||||
|
if k.endswith('scheme') and v == 'uuid':
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
base_id = unicode(list(filter(cal_id, m.identifier))[0])
|
||||||
|
except:
|
||||||
|
base_id = str(uuid4())
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
seen_titles = set([])
|
||||||
|
for i, section in enumerate(oeb.toc):
|
||||||
|
if not section.href:
|
||||||
|
continue
|
||||||
|
secid = 'section%d'%i
|
||||||
|
sectitle = section.title
|
||||||
|
if not sectitle:
|
||||||
|
sectitle = _('Unknown')
|
||||||
|
d = 1
|
||||||
|
bsectitle = sectitle
|
||||||
|
while sectitle in seen_titles:
|
||||||
|
sectitle = bsectitle + ' ' + str(d)
|
||||||
|
d += 1
|
||||||
|
seen_titles.add(sectitle)
|
||||||
|
sectitle = xml(sectitle, True)
|
||||||
|
secdesc = section.description
|
||||||
|
if not secdesc:
|
||||||
|
secdesc = ''
|
||||||
|
secdesc = xml(secdesc)
|
||||||
|
entries.append(SONY_ATOM_SECTION.format(title=sectitle,
|
||||||
|
href=section.href, id=xml(base_id)+'/'+secid,
|
||||||
|
short_title=short_title, desc=secdesc, updated=updated))
|
||||||
|
|
||||||
|
for j, article in enumerate(section):
|
||||||
|
if not article.href:
|
||||||
|
continue
|
||||||
|
atitle = article.title
|
||||||
|
btitle = atitle
|
||||||
|
d = 1
|
||||||
|
while atitle in seen_titles:
|
||||||
|
atitle = btitle + ' ' + str(d)
|
||||||
|
d += 1
|
||||||
|
|
||||||
|
auth = article.author if article.author else ''
|
||||||
|
desc = section.description
|
||||||
|
if not desc:
|
||||||
|
desc = ''
|
||||||
|
aid = 'article%d'%j
|
||||||
|
|
||||||
|
entries.append(SONY_ATOM_ENTRY.format(
|
||||||
|
title=xml(atitle),
|
||||||
|
author=xml(auth),
|
||||||
|
updated=updated,
|
||||||
|
desc=desc,
|
||||||
|
short_title=short_title,
|
||||||
|
section_title=sectitle,
|
||||||
|
href=article.href,
|
||||||
|
word_count=str(1),
|
||||||
|
id=xml(base_id)+'/'+secid+'/'+aid
|
||||||
|
))
|
||||||
|
|
||||||
|
atom = SONY_ATOM.format(short_title=short_title,
|
||||||
|
entries='\n\n'.join(entries), updated=updated,
|
||||||
|
id=xml(base_id)).encode('utf-8')
|
||||||
|
|
||||||
|
return metadata, atom
|
||||||
|
|
@ -40,14 +40,35 @@ class FB2Input(InputFormatPlugin):
|
|||||||
accelerators):
|
accelerators):
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.oeb.base import XLINK_NS
|
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
||||||
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
|
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
|
||||||
log.debug('Parsing XML...')
|
log.debug('Parsing XML...')
|
||||||
raw = stream.read()
|
raw = stream.read().replace('\0', '')
|
||||||
try:
|
try:
|
||||||
doc = etree.fromstring(raw)
|
doc = etree.fromstring(raw)
|
||||||
except etree.XMLSyntaxError:
|
except etree.XMLSyntaxError:
|
||||||
doc = etree.fromstring(raw.replace('& ', '&'))
|
try:
|
||||||
|
doc = etree.fromstring(raw, parser=RECOVER_PARSER)
|
||||||
|
except:
|
||||||
|
doc = etree.fromstring(raw.replace('& ', '&'),
|
||||||
|
parser=RECOVER_PARSER)
|
||||||
|
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||||
|
css = ''
|
||||||
|
for s in stylesheets:
|
||||||
|
css += etree.tostring(s, encoding=unicode, method='text',
|
||||||
|
with_tail=False) + '\n\n'
|
||||||
|
if css:
|
||||||
|
import cssutils, logging
|
||||||
|
parser = cssutils.CSSParser(fetcher=None,
|
||||||
|
log=logging.getLogger('calibre.css'))
|
||||||
|
|
||||||
|
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
||||||
|
text = XHTML_CSS_NAMESPACE + css
|
||||||
|
log.debug('Parsing stylesheet...')
|
||||||
|
stylesheet = parser.parseString(text)
|
||||||
|
stylesheet.namespaces['h'] = XHTML_NS
|
||||||
|
css = unicode(stylesheet.cssText).replace('h|style', 'h|span')
|
||||||
|
css = re.sub(r'name\s*=\s*', 'class=', css)
|
||||||
self.extract_embedded_content(doc)
|
self.extract_embedded_content(doc)
|
||||||
log.debug('Converting XML to HTML...')
|
log.debug('Converting XML to HTML...')
|
||||||
ss = open(P('templates/fb2.xsl'), 'rb').read()
|
ss = open(P('templates/fb2.xsl'), 'rb').read()
|
||||||
@ -63,7 +84,9 @@ class FB2Input(InputFormatPlugin):
|
|||||||
for img in result.xpath('//img[@src]'):
|
for img in result.xpath('//img[@src]'):
|
||||||
src = img.get('src')
|
src = img.get('src')
|
||||||
img.set('src', self.binary_map.get(src, src))
|
img.set('src', self.binary_map.get(src, src))
|
||||||
open('index.xhtml', 'wb').write(transform.tostring(result))
|
index = transform.tostring(result)
|
||||||
|
open('index.xhtml', 'wb').write(index)
|
||||||
|
open('inline-styles.css', 'wb').write(css)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
mi = get_metadata(stream, 'fb2')
|
mi = get_metadata(stream, 'fb2')
|
||||||
if not mi.title:
|
if not mi.title:
|
||||||
|
@ -282,15 +282,22 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
basedir = os.getcwd()
|
basedir = os.getcwd()
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
|
||||||
|
fname = None
|
||||||
if hasattr(stream, 'name'):
|
if hasattr(stream, 'name'):
|
||||||
basedir = os.path.dirname(stream.name)
|
basedir = os.path.dirname(stream.name)
|
||||||
|
fname = os.path.basename(stream.name)
|
||||||
|
|
||||||
if file_ext != 'opf':
|
if file_ext != 'opf':
|
||||||
if opts.dont_package:
|
if opts.dont_package:
|
||||||
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
||||||
from calibre.ebooks.metadata.html import get_metadata
|
from calibre.ebooks.metadata.html import get_metadata
|
||||||
oeb = self.create_oebbook(stream.name, basedir, opts, log,
|
mi = get_metadata(stream)
|
||||||
get_metadata(stream))
|
if fname:
|
||||||
|
from calibre.ebooks.metadata.meta import metadata_from_filename
|
||||||
|
fmi = metadata_from_filename(fname)
|
||||||
|
fmi.smart_update(mi)
|
||||||
|
mi = fmi
|
||||||
|
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
@ -33,6 +33,10 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
OptionRecommendation(name='template_html',
|
OptionRecommendation(name='template_html',
|
||||||
help=_('Template used for the generation of the html contents of the book instead of the default file')),
|
help=_('Template used for the generation of the html contents of the book instead of the default file')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='extracted', recommended_value=False,
|
||||||
|
help=_('Extract the contents of the generated ZIP file to the directory of the generated ZIP file')
|
||||||
|
),
|
||||||
])
|
])
|
||||||
|
|
||||||
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||||
@ -168,10 +172,14 @@ class HTMLOutput(OutputFormatPlugin):
|
|||||||
item.unload_data_from_memory(memory=path)
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
zfile = zipfile.ZipFile(output_path, "w")
|
zfile = zipfile.ZipFile(output_path, "w")
|
||||||
zfile.write(output_file, os.path.basename(output_file), zipfile.ZIP_DEFLATED)
|
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
|
||||||
self.add_folder_to_zipfile(zfile, output_dir)
|
self.add_folder_to_zipfile(zfile, output_dir)
|
||||||
zfile.close()
|
zfile.close()
|
||||||
|
|
||||||
|
if (self.opts.extracted):
|
||||||
|
shutil.copy(output_file, dirname(output_path))
|
||||||
|
shutil.copytree(output_dir, os.path.join(dirname(output_path), basename(output_dir)))
|
||||||
|
|
||||||
# cleanup temp dir
|
# cleanup temp dir
|
||||||
shutil.rmtree(tempdir)
|
shutil.rmtree(tempdir)
|
||||||
|
|
||||||
|
@ -8,88 +8,122 @@ Fetch metadata using Amazon AWS
|
|||||||
'''
|
'''
|
||||||
import sys, re
|
import sys, re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
|
def find_asin(br, isbn):
|
||||||
|
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
||||||
|
raw = br.open_novisit(q).read()
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
root = html.fromstring(raw)
|
||||||
|
revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
|
||||||
|
revs = [x.get('name') for x in revs]
|
||||||
|
if revs:
|
||||||
|
return revs[0]
|
||||||
|
|
||||||
def AWS(tag):
|
def to_asin(br, isbn):
|
||||||
return '{%s}%s'%(AWS_NS, tag)
|
if len(isbn) == 13:
|
||||||
|
try:
|
||||||
class ISBNNotFound(ValueError):
|
asin = find_asin(br, isbn)
|
||||||
pass
|
except:
|
||||||
|
import traceback
|
||||||
def check_for_errors(root, isbn):
|
traceback.print_exc()
|
||||||
err = root.find('.//'+AWS('Error'))
|
asin = None
|
||||||
if err is not None:
|
else:
|
||||||
text = etree.tostring(err, method='text', pretty_print=True,
|
asin = isbn
|
||||||
encoding=unicode)
|
return asin
|
||||||
if 'AWS.InvalidParameterValue'+isbn in text:
|
|
||||||
raise ISBNNotFound(isbn)
|
|
||||||
raise Exception('Failed to get metadata with error: '\
|
|
||||||
+ text)
|
|
||||||
|
|
||||||
def get_social_metadata(title, authors, publisher, isbn):
|
def get_social_metadata(title, authors, publisher, isbn):
|
||||||
mi = MetaInformation(title, authors)
|
mi = Metadata(title, authors)
|
||||||
if isbn:
|
if not isbn:
|
||||||
br = browser()
|
|
||||||
response_xml = br.open('http://status.calibre-ebook.com/aws/metadata/'+isbn).read()
|
|
||||||
root = etree.fromstring(response_xml)
|
|
||||||
try:
|
|
||||||
check_for_errors(root, isbn)
|
|
||||||
except ISBNNotFound:
|
|
||||||
return mi
|
|
||||||
mi.title = root.findtext('.//'+AWS('Title'))
|
|
||||||
authors = [x.text for x in root.findall('.//'+AWS('Author'))]
|
|
||||||
if authors:
|
|
||||||
mi.authors = []
|
|
||||||
for x in authors:
|
|
||||||
mi.authors.extend(string_to_authors(x))
|
|
||||||
mi.publisher = root.findtext('.//'+AWS('Publisher'))
|
|
||||||
try:
|
|
||||||
d = root.findtext('.//'+AWS('PublicationDate'))
|
|
||||||
if d:
|
|
||||||
default = utcnow().replace(day=15)
|
|
||||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
|
||||||
mi.pubdate = d
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
rating = float(root.findtext('.//'+AWS('AverageRating')))
|
|
||||||
num_of_reviews = int(root.findtext('.//'+AWS('TotalReviews')))
|
|
||||||
if num_of_reviews > 4 and rating > 0 and rating < 5:
|
|
||||||
mi.rating = rating
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
tags = [x.text for x in root.findall('.//%s/%s'%(AWS('Subjects'),
|
|
||||||
AWS('Subject')))]
|
|
||||||
if tags:
|
|
||||||
mi.tags = []
|
|
||||||
for x in tags:
|
|
||||||
mi.tags.extend([y.strip() for y in x.split('/')])
|
|
||||||
mi.tags = [x.replace(',', ';') for x in mi.tags]
|
|
||||||
comments = root.find('.//%s/%s'%(AWS('EditorialReview'),
|
|
||||||
AWS('Content')))
|
|
||||||
if comments is not None:
|
|
||||||
mi.comments = etree.tostring(comments,
|
|
||||||
method='text', encoding=unicode)
|
|
||||||
mi.comments = re.sub('<([pP]|DIV)>', '\n\n', mi.comments)
|
|
||||||
mi.comments = re.sub('</?[iI]>', '*', mi.comments)
|
|
||||||
mi.comments = re.sub('</?[bB]>', '**', mi.comments)
|
|
||||||
mi.comments = re.sub('<BR>', '\n\n', mi.comments)
|
|
||||||
mi.comments = re.sub('<[^>]+>', '', mi.comments)
|
|
||||||
mi.comments = mi.comments.strip()
|
|
||||||
mi.comments = _('EDITORIAL REVIEW')+':\n\n'+mi.comments
|
|
||||||
|
|
||||||
return mi
|
return mi
|
||||||
|
isbn = check_isbn(isbn)
|
||||||
|
if not isbn:
|
||||||
|
return mi
|
||||||
|
br = browser()
|
||||||
|
asin = to_asin(br, isbn)
|
||||||
|
if asin and get_metadata(br, asin, mi):
|
||||||
|
return mi
|
||||||
|
from calibre.ebooks.metadata.xisbn import xisbn
|
||||||
|
for i in xisbn.get_associated_isbns(isbn):
|
||||||
|
asin = to_asin(br, i)
|
||||||
|
if asin and get_metadata(br, asin, mi):
|
||||||
|
return mi
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_metadata(br, asin, mi):
|
||||||
|
q = 'http://amzn.com/'+asin
|
||||||
|
try:
|
||||||
|
raw = br.open_novisit(q).read()
|
||||||
|
except Exception, e:
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return False
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return False
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
root = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
||||||
|
if ratings:
|
||||||
|
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||||
|
r = ratings[0]
|
||||||
|
for elem in r.xpath('descendant::*[@title]'):
|
||||||
|
t = elem.get('title')
|
||||||
|
m = pat.match(t)
|
||||||
|
if m is not None:
|
||||||
|
try:
|
||||||
|
mi.rating = float(m.group(1))/float(m.group(2)) * 5
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
|
||||||
|
if desc:
|
||||||
|
desc = desc[0]
|
||||||
|
for c in desc.xpath('descendant::*[@class="seeAll" or'
|
||||||
|
' @class="emptyClear" or @href]'):
|
||||||
|
c.getparent().remove(c)
|
||||||
|
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
||||||
|
# remove all attributes from tags
|
||||||
|
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||||
|
# Collapse whitespace
|
||||||
|
#desc = re.sub('\n+', '\n', desc)
|
||||||
|
#desc = re.sub(' +', ' ', desc)
|
||||||
|
# Remove the notice about text referring to out of print editions
|
||||||
|
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
||||||
|
# Remove comments
|
||||||
|
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||||
|
mi.comments = sanitize_comments_html(desc)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
print get_social_metadata(None, None, None, '9781416551720')
|
# Test xisbn
|
||||||
|
print get_social_metadata('Learning Python', None, None, '8324616489')
|
||||||
|
print
|
||||||
|
|
||||||
|
# Test sophisticated comment formatting
|
||||||
|
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
|
||||||
|
print
|
||||||
|
|
||||||
|
# Random tests
|
||||||
|
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
|
||||||
|
print
|
||||||
|
print get_social_metadata('The Great Gatsby', None, None, '0743273567')
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -43,7 +43,7 @@ class SafeFormat(TemplateFormatter):
|
|||||||
b = self.book.get_user_metadata(key, False)
|
b = self.book.get_user_metadata(key, False)
|
||||||
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
|
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
|
||||||
v = ''
|
v = ''
|
||||||
elif b and b['datatype'] == 'float' and b.get(key, 0.0) == 0.0:
|
elif b and b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0:
|
||||||
v = ''
|
v = ''
|
||||||
else:
|
else:
|
||||||
ign, v = self.book.format_field(key.lower(), series_with_index=False)
|
ign, v = self.book.format_field(key.lower(), series_with_index=False)
|
||||||
@ -501,7 +501,7 @@ class Metadata(object):
|
|||||||
if key.startswith('#') and key.endswith('_index'):
|
if key.startswith('#') and key.endswith('_index'):
|
||||||
tkey = key[:-6] # strip the _index
|
tkey = key[:-6] # strip the _index
|
||||||
cmeta = self.get_user_metadata(tkey, make_copy=False)
|
cmeta = self.get_user_metadata(tkey, make_copy=False)
|
||||||
if cmeta['datatype'] == 'series':
|
if cmeta and cmeta['datatype'] == 'series':
|
||||||
if self.get(tkey):
|
if self.get(tkey):
|
||||||
res = self.get_extra(tkey)
|
res = self.get_extra(tkey)
|
||||||
return (unicode(cmeta['name']+'_index'),
|
return (unicode(cmeta['name']+'_index'),
|
||||||
|
@ -22,7 +22,8 @@ def get_metadata(stream):
|
|||||||
'xlink':XLINK_NS})
|
'xlink':XLINK_NS})
|
||||||
tostring = lambda x : etree.tostring(x, method='text',
|
tostring = lambda x : etree.tostring(x, method='text',
|
||||||
encoding=unicode).strip()
|
encoding=unicode).strip()
|
||||||
root = etree.fromstring(stream.read())
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
root = etree.fromstring(stream.read(), parser=parser)
|
||||||
authors, author_sort = [], None
|
authors, author_sort = [], None
|
||||||
for au in XPath('//fb2:author')(root):
|
for au in XPath('//fb2:author')(root):
|
||||||
fname = lname = author = None
|
fname = lname = author = None
|
||||||
|
@ -12,6 +12,7 @@ from calibre.utils.logging import default_log
|
|||||||
from calibre.utils.titlecase import titlecase
|
from calibre.utils.titlecase import titlecase
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
from calibre.ebooks.metadata.covers import check_for_cover
|
from calibre.ebooks.metadata.covers import check_for_cover
|
||||||
|
from calibre.utils.html2text import html2text
|
||||||
|
|
||||||
metadata_config = None
|
metadata_config = None
|
||||||
|
|
||||||
@ -48,6 +49,11 @@ class MetadataSource(Plugin): # {{{
|
|||||||
#: member.
|
#: member.
|
||||||
string_customization_help = None
|
string_customization_help = None
|
||||||
|
|
||||||
|
#: Set this to true if your plugin returns HTML markup in comments.
|
||||||
|
#: Then if the user disables HTML, calibre will automagically convert
|
||||||
|
#: the HTML to Markdown.
|
||||||
|
has_html_comments = False
|
||||||
|
|
||||||
type = _('Metadata download')
|
type = _('Metadata download')
|
||||||
|
|
||||||
def __call__(self, title, author, publisher, isbn, verbose, log=None,
|
def __call__(self, title, author, publisher, isbn, verbose, log=None,
|
||||||
@ -79,6 +85,13 @@ class MetadataSource(Plugin): # {{{
|
|||||||
mi.comments = None
|
mi.comments = None
|
||||||
if not c.get('tags', True):
|
if not c.get('tags', True):
|
||||||
mi.tags = []
|
mi.tags = []
|
||||||
|
if self.has_html_comments and mi.comments and \
|
||||||
|
c.get('textcomments', False):
|
||||||
|
try:
|
||||||
|
mi.comments = html2text(mi.comments)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
mi.comments = None
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.exception = e
|
self.exception = e
|
||||||
@ -132,11 +145,17 @@ class MetadataSource(Plugin): # {{{
|
|||||||
setattr(w, '_'+x, cb)
|
setattr(w, '_'+x, cb)
|
||||||
cb.setChecked(c.get(x, True))
|
cb.setChecked(c.get(x, True))
|
||||||
w._layout.addWidget(cb)
|
w._layout.addWidget(cb)
|
||||||
|
|
||||||
|
cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
|
||||||
|
setattr(w, '_textcomments', cb)
|
||||||
|
cb.setChecked(c.get('textcomments', False))
|
||||||
|
w._layout.addWidget(cb)
|
||||||
|
|
||||||
return w
|
return w
|
||||||
|
|
||||||
def save_settings(self, w):
|
def save_settings(self, w):
|
||||||
dl_settings = {}
|
dl_settings = {}
|
||||||
for x in ('rating', 'tags', 'comments'):
|
for x in ('rating', 'tags', 'comments', 'textcomments'):
|
||||||
dl_settings[x] = getattr(w, '_'+x).isChecked()
|
dl_settings[x] = getattr(w, '_'+x).isChecked()
|
||||||
c = self.config_store()
|
c = self.config_store()
|
||||||
c.set(self.name, dl_settings)
|
c.set(self.name, dl_settings)
|
||||||
@ -210,6 +229,8 @@ class Amazon(MetadataSource): # {{{
|
|||||||
metadata_type = 'social'
|
metadata_type = 'social'
|
||||||
description = _('Downloads social metadata from amazon.com')
|
description = _('Downloads social metadata from amazon.com')
|
||||||
|
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
def fetch(self):
|
def fetch(self):
|
||||||
if not self.isbn:
|
if not self.isbn:
|
||||||
return
|
return
|
||||||
|
@ -382,11 +382,13 @@ class Guide(ResourceCollection): # {{{
|
|||||||
|
|
||||||
class MetadataField(object):
|
class MetadataField(object):
|
||||||
|
|
||||||
def __init__(self, name, is_dc=True, formatter=None, none_is=None):
|
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
|
||||||
|
renderer=lambda x: unicode(x)):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.is_dc = is_dc
|
self.is_dc = is_dc
|
||||||
self.formatter = formatter
|
self.formatter = formatter
|
||||||
self.none_is = none_is
|
self.none_is = none_is
|
||||||
|
self.renderer = renderer
|
||||||
|
|
||||||
def __real_get__(self, obj, type=None):
|
def __real_get__(self, obj, type=None):
|
||||||
ans = obj.get_metadata_element(self.name)
|
ans = obj.get_metadata_element(self.name)
|
||||||
@ -418,7 +420,7 @@ class MetadataField(object):
|
|||||||
return
|
return
|
||||||
if elem is None:
|
if elem is None:
|
||||||
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
|
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
|
||||||
obj.set_text(elem, unicode(val))
|
obj.set_text(elem, self.renderer(val))
|
||||||
|
|
||||||
|
|
||||||
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
|
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
|
||||||
@ -489,10 +491,11 @@ class OPF(object): # {{{
|
|||||||
series = MetadataField('series', is_dc=False)
|
series = MetadataField('series', is_dc=False)
|
||||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||||
pubdate = MetadataField('date', formatter=parse_date)
|
pubdate = MetadataField('date', formatter=parse_date,
|
||||||
|
renderer=isoformat)
|
||||||
publication_type = MetadataField('publication_type', is_dc=False)
|
publication_type = MetadataField('publication_type', is_dc=False)
|
||||||
timestamp = MetadataField('timestamp', is_dc=False,
|
timestamp = MetadataField('timestamp', is_dc=False,
|
||||||
formatter=parse_date)
|
formatter=parse_date, renderer=isoformat)
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
|
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
|
||||||
@ -826,11 +829,10 @@ class OPF(object): # {{{
|
|||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
matches = self.isbn_path(self.metadata)
|
matches = self.isbn_path(self.metadata)
|
||||||
if val is None:
|
if not val:
|
||||||
if matches:
|
for x in matches:
|
||||||
for x in matches:
|
x.getparent().remove(x)
|
||||||
x.getparent().remove(x)
|
return
|
||||||
return
|
|
||||||
if not matches:
|
if not matches:
|
||||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
|
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
|
||||||
matches = [self.create_metadata_element('identifier',
|
matches = [self.create_metadata_element('identifier',
|
||||||
@ -987,11 +989,14 @@ class OPF(object): # {{{
|
|||||||
def smart_update(self, mi, replace_metadata=False):
|
def smart_update(self, mi, replace_metadata=False):
|
||||||
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'language', 'tags', 'category', 'comments',
|
'isbn', 'tags', 'category', 'comments',
|
||||||
'pubdate'):
|
'pubdate'):
|
||||||
val = getattr(mi, attr, None)
|
val = getattr(mi, attr, None)
|
||||||
if val is not None and val != [] and val != (None, None):
|
if val is not None and val != [] and val != (None, None):
|
||||||
setattr(self, attr, val)
|
setattr(self, attr, val)
|
||||||
|
lang = getattr(mi, 'language', None)
|
||||||
|
if lang and lang != 'und':
|
||||||
|
self.language = lang
|
||||||
temp = self.to_book_metadata()
|
temp = self.to_book_metadata()
|
||||||
temp.smart_update(mi, replace_metadata=replace_metadata)
|
temp.smart_update(mi, replace_metadata=replace_metadata)
|
||||||
self._user_metadata_ = temp.get_all_user_metadata(True)
|
self._user_metadata_ = temp.get_all_user_metadata(True)
|
||||||
|
@ -12,7 +12,7 @@ import os, time, sys, shutil
|
|||||||
|
|
||||||
from calibre.utils.ipc.job import ParallelJob
|
from calibre.utils.ipc.job import ParallelJob
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
|
|
||||||
@ -21,51 +21,76 @@ def debug(*args):
|
|||||||
prints(*args)
|
prints(*args)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
def read_metadata_(task, tdir, notification=lambda x,y:x):
|
def serialize_metadata_for(formats, tdir, id_):
|
||||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
|
mi = metadata_from_formats(formats)
|
||||||
|
mi.cover = None
|
||||||
|
cdata = None
|
||||||
|
if mi.cover_data:
|
||||||
|
cdata = mi.cover_data[-1]
|
||||||
|
mi.cover_data = None
|
||||||
|
if not mi.application_id:
|
||||||
|
mi.application_id = '__calibre_dummy__'
|
||||||
|
with open(os.path.join(tdir, '%s.opf'%id_), 'wb') as f:
|
||||||
|
f.write(metadata_to_opf(mi))
|
||||||
|
if cdata:
|
||||||
|
with open(os.path.join(tdir, str(id_)), 'wb') as f:
|
||||||
|
f.write(cdata)
|
||||||
|
|
||||||
|
def read_metadata_(task, tdir, notification=lambda x,y:x):
|
||||||
|
with TemporaryDirectory() as mdir:
|
||||||
|
do_read_metadata(task, tdir, mdir, notification)
|
||||||
|
|
||||||
|
def do_read_metadata(task, tdir, mdir, notification):
|
||||||
from calibre.customize.ui import run_plugins_on_import
|
from calibre.customize.ui import run_plugins_on_import
|
||||||
for x in task:
|
for x in task:
|
||||||
try:
|
try:
|
||||||
id, formats = x
|
id_, formats = x
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
if isinstance(formats, basestring): formats = [formats]
|
if isinstance(formats, basestring): formats = [formats]
|
||||||
mi = metadata_from_formats(formats)
|
|
||||||
mi.cover = None
|
|
||||||
cdata = None
|
|
||||||
if mi.cover_data:
|
|
||||||
cdata = mi.cover_data[-1]
|
|
||||||
mi.cover_data = None
|
|
||||||
if not mi.application_id:
|
|
||||||
mi.application_id = '__calibre_dummy__'
|
|
||||||
with open(os.path.join(tdir, '%s.opf'%id), 'wb') as f:
|
|
||||||
f.write(metadata_to_opf(mi))
|
|
||||||
if cdata:
|
|
||||||
with open(os.path.join(tdir, str(id)), 'wb') as f:
|
|
||||||
f.write(cdata)
|
|
||||||
import_map = {}
|
import_map = {}
|
||||||
|
fmts, metadata_fmts = [], []
|
||||||
for format in formats:
|
for format in formats:
|
||||||
|
mfmt = format
|
||||||
|
name, ext = os.path.splitext(os.path.basename(format))
|
||||||
nfp = run_plugins_on_import(format)
|
nfp = run_plugins_on_import(format)
|
||||||
if nfp is None:
|
if not nfp or nfp == format or not os.access(nfp, os.R_OK):
|
||||||
nfp = format
|
nfp = None
|
||||||
nfp = os.path.abspath(nfp)
|
else:
|
||||||
|
# Ensure that the filename is preserved so that
|
||||||
|
# reading metadata from filename is not broken
|
||||||
|
nfp = os.path.abspath(nfp)
|
||||||
|
nfext = os.path.splitext(nfp)[1]
|
||||||
|
mfmt = os.path.join(mdir, name + nfext)
|
||||||
|
shutil.copyfile(nfp, mfmt)
|
||||||
|
metadata_fmts.append(mfmt)
|
||||||
|
fmts.append(nfp)
|
||||||
|
|
||||||
|
serialize_metadata_for(metadata_fmts, tdir, id_)
|
||||||
|
|
||||||
|
for format, nfp in zip(formats, fmts):
|
||||||
|
if not nfp:
|
||||||
|
continue
|
||||||
if isinstance(nfp, unicode):
|
if isinstance(nfp, unicode):
|
||||||
nfp.encode(filesystem_encoding)
|
nfp.encode(filesystem_encoding)
|
||||||
x = lambda j : os.path.abspath(os.path.normpath(os.path.normcase(j)))
|
x = lambda j : os.path.abspath(os.path.normpath(os.path.normcase(j)))
|
||||||
if x(nfp) != x(format) and os.access(nfp, os.R_OK|os.W_OK):
|
if x(nfp) != x(format) and os.access(nfp, os.R_OK|os.W_OK):
|
||||||
fmt = os.path.splitext(format)[1].replace('.', '').lower()
|
fmt = os.path.splitext(format)[1].replace('.', '').lower()
|
||||||
nfmt = os.path.splitext(nfp)[1].replace('.', '').lower()
|
nfmt = os.path.splitext(nfp)[1].replace('.', '').lower()
|
||||||
dest = os.path.join(tdir, '%s.%s'%(id, nfmt))
|
dest = os.path.join(tdir, '%s.%s'%(id_, nfmt))
|
||||||
shutil.copyfile(nfp, dest)
|
shutil.copyfile(nfp, dest)
|
||||||
import_map[fmt] = dest
|
import_map[fmt] = dest
|
||||||
os.remove(nfp)
|
|
||||||
if import_map:
|
if import_map:
|
||||||
with open(os.path.join(tdir, str(id)+'.import'), 'wb') as f:
|
with open(os.path.join(tdir, str(id_)+'.import'), 'wb') as f:
|
||||||
for fmt, nfp in import_map.items():
|
for fmt, nfp in import_map.items():
|
||||||
f.write(fmt+':'+nfp+'\n')
|
f.write(fmt+':'+nfp+'\n')
|
||||||
notification(0.5, id)
|
notification(0.5, id_)
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
with open(os.path.join(tdir, '%s.error'%id), 'wb') as f:
|
with open(os.path.join(tdir, '%s.error'%id_), 'wb') as f:
|
||||||
f.write(traceback.format_exc())
|
f.write(traceback.format_exc())
|
||||||
|
|
||||||
class Progress(object):
|
class Progress(object):
|
||||||
|
80
src/calibre/ebooks/metadata/xisbn.py
Normal file
80
src/calibre/ebooks/metadata/xisbn.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import threading, re, json
|
||||||
|
|
||||||
|
from calibre import browser
|
||||||
|
|
||||||
|
class xISBN(object):
|
||||||
|
|
||||||
|
QUERY = 'http://xisbn.worldcat.org/webservices/xid/isbn/%s?method=getEditions&format=json&fl=form,year,lang,ed'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.lock = threading.RLock()
|
||||||
|
self._data = []
|
||||||
|
self._map = {}
|
||||||
|
|
||||||
|
self.br = browser()
|
||||||
|
self.isbn_pat = re.compile(r'[^0-9X]', re.IGNORECASE)
|
||||||
|
|
||||||
|
def purify(self, isbn):
|
||||||
|
return self.isbn_pat.sub('', isbn.upper())
|
||||||
|
|
||||||
|
def fetch_data(self, isbn):
|
||||||
|
url = self.QUERY%isbn
|
||||||
|
data = self.br.open_novisit(url).read()
|
||||||
|
data = json.loads(data)
|
||||||
|
if data.get('stat', None) != 'ok':
|
||||||
|
return []
|
||||||
|
data = data.get('list', [])
|
||||||
|
ans = []
|
||||||
|
for rec in data:
|
||||||
|
forms = rec.get('form', [])
|
||||||
|
# Only get books, not audio/video
|
||||||
|
forms = [x for x in forms if x in ('BA', 'BC', 'BB', 'DA')]
|
||||||
|
if forms:
|
||||||
|
ans.append(rec)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def get_data(self, isbn):
|
||||||
|
isbn = self.purify(isbn)
|
||||||
|
with self.lock:
|
||||||
|
if isbn not in self._map:
|
||||||
|
try:
|
||||||
|
data = self.fetch_data(isbn)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
data = []
|
||||||
|
id_ = len(self._data)
|
||||||
|
self._data.append(data)
|
||||||
|
for rec in data:
|
||||||
|
for i in rec.get('isbn', []):
|
||||||
|
self._map[i] = id_
|
||||||
|
self._map[isbn] = id_
|
||||||
|
return self._data[self._map[isbn]]
|
||||||
|
|
||||||
|
def get_associated_isbns(self, isbn):
|
||||||
|
data = self.get_data(isbn)
|
||||||
|
ans = set([])
|
||||||
|
for rec in data:
|
||||||
|
for i in rec.get('isbn', []):
|
||||||
|
ans.add(i)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
xisbn = xISBN()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
isbn = sys.argv[-1]
|
||||||
|
print xisbn.get_data(isbn)
|
||||||
|
print
|
||||||
|
print xisbn.get_associated_isbns(isbn)
|
||||||
|
|
||||||
|
|
@ -27,6 +27,8 @@ TABLE_TAGS = set(['table', 'tr', 'td', 'th', 'caption'])
|
|||||||
SPECIAL_TAGS = set(['hr', 'br'])
|
SPECIAL_TAGS = set(['hr', 'br'])
|
||||||
CONTENT_TAGS = set(['img', 'hr', 'br'])
|
CONTENT_TAGS = set(['img', 'hr', 'br'])
|
||||||
|
|
||||||
|
NOT_VTAGS = HEADER_TAGS | NESTABLE_TAGS | TABLE_TAGS | SPECIAL_TAGS | \
|
||||||
|
CONTENT_TAGS
|
||||||
PAGE_BREAKS = set(['always', 'left', 'right'])
|
PAGE_BREAKS = set(['always', 'left', 'right'])
|
||||||
|
|
||||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
@ -57,8 +59,6 @@ class FormatState(object):
|
|||||||
self.indent = 0.
|
self.indent = 0.
|
||||||
self.fsize = 3
|
self.fsize = 3
|
||||||
self.ids = set()
|
self.ids = set()
|
||||||
self.valign = 'baseline'
|
|
||||||
self.nest = False
|
|
||||||
self.italic = False
|
self.italic = False
|
||||||
self.bold = False
|
self.bold = False
|
||||||
self.strikethrough = False
|
self.strikethrough = False
|
||||||
@ -76,7 +76,6 @@ class FormatState(object):
|
|||||||
and self.italic == other.italic \
|
and self.italic == other.italic \
|
||||||
and self.bold == other.bold \
|
and self.bold == other.bold \
|
||||||
and self.href == other.href \
|
and self.href == other.href \
|
||||||
and self.valign == other.valign \
|
|
||||||
and self.preserve == other.preserve \
|
and self.preserve == other.preserve \
|
||||||
and self.family == other.family \
|
and self.family == other.family \
|
||||||
and self.bgcolor == other.bgcolor \
|
and self.bgcolor == other.bgcolor \
|
||||||
@ -224,7 +223,6 @@ class MobiMLizer(object):
|
|||||||
return
|
return
|
||||||
if not pstate or istate != pstate:
|
if not pstate or istate != pstate:
|
||||||
inline = para
|
inline = para
|
||||||
valign = istate.valign
|
|
||||||
fsize = istate.fsize
|
fsize = istate.fsize
|
||||||
href = istate.href
|
href = istate.href
|
||||||
if not href:
|
if not href:
|
||||||
@ -234,19 +232,8 @@ class MobiMLizer(object):
|
|||||||
else:
|
else:
|
||||||
inline = etree.SubElement(inline, XHTML('a'), href=href)
|
inline = etree.SubElement(inline, XHTML('a'), href=href)
|
||||||
bstate.anchor = inline
|
bstate.anchor = inline
|
||||||
if valign == 'super':
|
|
||||||
parent = inline
|
if fsize != 3:
|
||||||
if istate.nest and bstate.inline is not None:
|
|
||||||
parent = bstate.inline
|
|
||||||
istate.nest = False
|
|
||||||
inline = etree.SubElement(parent, XHTML('sup'))
|
|
||||||
elif valign == 'sub':
|
|
||||||
parent = inline
|
|
||||||
if istate.nest and bstate.inline is not None:
|
|
||||||
parent = bstate.inline
|
|
||||||
istate.nest = False
|
|
||||||
inline = etree.SubElement(parent, XHTML('sub'))
|
|
||||||
elif fsize != 3:
|
|
||||||
inline = etree.SubElement(inline, XHTML('font'),
|
inline = etree.SubElement(inline, XHTML('font'),
|
||||||
size=str(fsize))
|
size=str(fsize))
|
||||||
if istate.family == 'monospace':
|
if istate.family == 'monospace':
|
||||||
@ -279,7 +266,8 @@ class MobiMLizer(object):
|
|||||||
else:
|
else:
|
||||||
inline.append(item)
|
inline.append(item)
|
||||||
|
|
||||||
def mobimlize_elem(self, elem, stylizer, bstate, istates):
|
def mobimlize_elem(self, elem, stylizer, bstate, istates,
|
||||||
|
ignore_valign=False):
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
or namespace(elem.tag) != XHTML_NS:
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
return
|
return
|
||||||
@ -287,7 +275,15 @@ class MobiMLizer(object):
|
|||||||
# <mbp:frame-set/> does not exist lalalala
|
# <mbp:frame-set/> does not exist lalalala
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
or style['visibility'] == 'hidden':
|
or style['visibility'] == 'hidden':
|
||||||
return
|
id_ = elem.get('id', None)
|
||||||
|
if id_:
|
||||||
|
# Keep anchors so people can use display:none
|
||||||
|
# to generate hidden TOCs
|
||||||
|
elem.clear()
|
||||||
|
elem.text = None
|
||||||
|
elem.set('id', id_)
|
||||||
|
else:
|
||||||
|
return
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
istate = copy.copy(istates[-1])
|
istate = copy.copy(istates[-1])
|
||||||
istate.rendered = False
|
istate.rendered = False
|
||||||
@ -351,15 +347,6 @@ class MobiMLizer(object):
|
|||||||
istate.family = 'sans-serif'
|
istate.family = 'sans-serif'
|
||||||
else:
|
else:
|
||||||
istate.family = 'serif'
|
istate.family = 'serif'
|
||||||
valign = style['vertical-align']
|
|
||||||
if valign in ('super', 'text-top') or asfloat(valign) > 0:
|
|
||||||
istate.nest = istate.valign in ('sub', 'super')
|
|
||||||
istate.valign = 'super'
|
|
||||||
elif valign == 'sub' or asfloat(valign) < 0:
|
|
||||||
istate.nest = istate.valign in ('sub', 'super')
|
|
||||||
istate.valign = 'sub'
|
|
||||||
else:
|
|
||||||
istate.valign = 'baseline'
|
|
||||||
if 'id' in elem.attrib:
|
if 'id' in elem.attrib:
|
||||||
istate.ids.add(elem.attrib['id'])
|
istate.ids.add(elem.attrib['id'])
|
||||||
if 'name' in elem.attrib:
|
if 'name' in elem.attrib:
|
||||||
@ -407,6 +394,36 @@ class MobiMLizer(object):
|
|||||||
text = None
|
text = None
|
||||||
else:
|
else:
|
||||||
text = COLLAPSE.sub(' ', elem.text)
|
text = COLLAPSE.sub(' ', elem.text)
|
||||||
|
valign = style['vertical-align']
|
||||||
|
not_baseline = valign in ('super', 'sub', 'text-top',
|
||||||
|
'text-bottom')
|
||||||
|
vtag = 'sup' if valign in ('super', 'text-top') else 'sub'
|
||||||
|
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
|
||||||
|
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||||
|
vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
|
||||||
|
vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
|
||||||
|
self.mobimlize_elem(elem, stylizer, vbstate, istates,
|
||||||
|
ignore_valign=True)
|
||||||
|
if len(istates) > 0:
|
||||||
|
istates.pop()
|
||||||
|
if len(istates) == 0:
|
||||||
|
istates.append(FormatState())
|
||||||
|
at_start = bstate.para is None
|
||||||
|
if at_start:
|
||||||
|
self.mobimlize_content('span', '', bstate, istates)
|
||||||
|
parent = bstate.para if bstate.inline is None else bstate.inline
|
||||||
|
if parent is not None:
|
||||||
|
vtag = etree.SubElement(parent, XHTML(vtag))
|
||||||
|
# Add anchors
|
||||||
|
for child in vbstate.body:
|
||||||
|
if child is not vbstate.para:
|
||||||
|
vtag.append(child)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
for child in vbstate.para:
|
||||||
|
vtag.append(child)
|
||||||
|
return
|
||||||
|
|
||||||
if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
|
if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
|
||||||
self.mobimlize_content(tag, text, bstate, istates)
|
self.mobimlize_content(tag, text, bstate, istates)
|
||||||
for child in elem:
|
for child in elem:
|
||||||
@ -421,6 +438,8 @@ class MobiMLizer(object):
|
|||||||
tail = COLLAPSE.sub(' ', child.tail)
|
tail = COLLAPSE.sub(' ', child.tail)
|
||||||
if tail:
|
if tail:
|
||||||
self.mobimlize_content(tag, tail, bstate, istates)
|
self.mobimlize_content(tag, tail, bstate, istates)
|
||||||
|
|
||||||
|
|
||||||
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
|
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
|
||||||
bstate.pbreak = True
|
bstate.pbreak = True
|
||||||
if isblock:
|
if isblock:
|
||||||
|
@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
])
|
])
|
||||||
|
|
||||||
def check_for_periodical(self):
|
def check_for_periodical(self):
|
||||||
if self.oeb.metadata.publication_type and \
|
if self.is_periodical:
|
||||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
|
self.periodicalize_toc()
|
||||||
self.periodicalize_toc()
|
self.check_for_masthead()
|
||||||
self.check_for_masthead()
|
self.opts.mobi_periodical = True
|
||||||
self.opts.mobi_periodical = True
|
|
||||||
else:
|
else:
|
||||||
self.opts.mobi_periodical = False
|
self.opts.mobi_periodical = False
|
||||||
|
|
||||||
|
@ -2043,12 +2043,16 @@ class MobiWriter(object):
|
|||||||
else :
|
else :
|
||||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||||
|
|
||||||
if True:
|
# Apparently the CTOC must end with a null byte
|
||||||
rec_count = len(self._ctoc_records)
|
self._ctoc.write('\0')
|
||||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
|
||||||
(rec_count + 1, 'records, last record' if rec_count else 'record,', len(self._ctoc.getvalue())/655) )
|
|
||||||
|
|
||||||
return align_block(self._ctoc.getvalue())
|
ctoc = self._ctoc.getvalue()
|
||||||
|
rec_count = len(self._ctoc_records)
|
||||||
|
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||||
|
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||||
|
len(ctoc)/655) )
|
||||||
|
|
||||||
|
return align_block(ctoc)
|
||||||
|
|
||||||
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
||||||
pos = 0xc0 + indxt.tell()
|
pos = 0xc0 + indxt.tell()
|
||||||
|
@ -25,6 +25,7 @@ from calibre.translations.dynamic import translate
|
|||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
|
from calibre import isbytestring
|
||||||
|
|
||||||
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
|
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
|
||||||
@ -404,7 +405,8 @@ class DirContainer(object):
|
|||||||
|
|
||||||
def __init__(self, path, log):
|
def __init__(self, path, log):
|
||||||
self.log = log
|
self.log = log
|
||||||
path = unicode(path)
|
if isbytestring(path):
|
||||||
|
path = path.decode(filesystem_encoding)
|
||||||
ext = os.path.splitext(path)[1].lower()
|
ext = os.path.splitext(path)[1].lower()
|
||||||
if ext == '.opf':
|
if ext == '.opf':
|
||||||
self.opfname = os.path.basename(path)
|
self.opfname = os.path.basename(path)
|
||||||
|
@ -6,8 +6,6 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import cStringIO
|
|
||||||
|
|
||||||
from calibre import fit_image
|
from calibre import fit_image
|
||||||
|
|
||||||
class RescaleImages(object):
|
class RescaleImages(object):
|
||||||
@ -19,13 +17,7 @@ class RescaleImages(object):
|
|||||||
self.rescale(qt=is_ok_to_use_qt())
|
self.rescale(qt=is_ok_to_use_qt())
|
||||||
|
|
||||||
def rescale(self, qt=True):
|
def rescale(self, qt=True):
|
||||||
from PyQt4.Qt import QImage, Qt
|
from calibre.utils.magick.draw import Image
|
||||||
from calibre.gui2 import pixmap_to_data
|
|
||||||
try:
|
|
||||||
from PIL import Image as PILImage
|
|
||||||
PILImage
|
|
||||||
except ImportError:
|
|
||||||
import Image as PILImage
|
|
||||||
|
|
||||||
is_image_collection = getattr(self.opts, 'is_image_collection', False)
|
is_image_collection = getattr(self.opts, 'is_image_collection', False)
|
||||||
|
|
||||||
@ -35,6 +27,7 @@ class RescaleImages(object):
|
|||||||
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
||||||
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
|
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
|
||||||
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
|
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
|
||||||
|
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
if item.media_type.startswith('image'):
|
if item.media_type.startswith('image'):
|
||||||
ext = item.media_type.split('/')[-1].upper()
|
ext = item.media_type.split('/')[-1].upper()
|
||||||
@ -44,42 +37,25 @@ class RescaleImages(object):
|
|||||||
|
|
||||||
raw = item.data
|
raw = item.data
|
||||||
if not raw: continue
|
if not raw: continue
|
||||||
if qt:
|
try:
|
||||||
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
|
img = Image()
|
||||||
try:
|
img.load(raw)
|
||||||
if not img.loadFromData(raw): continue
|
except:
|
||||||
except:
|
continue
|
||||||
continue
|
width, height = img.size
|
||||||
width, height = img.width(), img.height()
|
|
||||||
else:
|
|
||||||
f = cStringIO.StringIO(raw)
|
|
||||||
try:
|
|
||||||
im = PILImage.open(f)
|
|
||||||
except IOError:
|
|
||||||
continue
|
|
||||||
width, height = im.size
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
scaled, new_width, new_height = fit_image(width, height,
|
scaled, new_width, new_height = fit_image(width, height,
|
||||||
page_width, page_height)
|
page_width, page_height)
|
||||||
if scaled:
|
if scaled:
|
||||||
data = None
|
|
||||||
self.log('Rescaling image from %dx%d to %dx%d'%(
|
self.log('Rescaling image from %dx%d to %dx%d'%(
|
||||||
width, height, new_width, new_height), item.href)
|
width, height, new_width, new_height), item.href)
|
||||||
if qt:
|
try:
|
||||||
img = img.scaled(new_width, new_height,
|
img.size = (new_width, new_height)
|
||||||
Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
data = img.export(ext.lower())
|
||||||
data = pixmap_to_data(img, format=ext)
|
except:
|
||||||
|
self.log.exception('Failed to rescale image')
|
||||||
else:
|
else:
|
||||||
try:
|
|
||||||
im = im.resize((int(new_width), int(new_height)), PILImage.ANTIALIAS)
|
|
||||||
of = cStringIO.StringIO()
|
|
||||||
im.convert('RGB').save(of, ext)
|
|
||||||
data = of.getvalue()
|
|
||||||
except:
|
|
||||||
self.log.exception('Failed to rescale image')
|
|
||||||
if data is not None:
|
|
||||||
item.data = data
|
item.data = data
|
||||||
item.unload_data_from_memory()
|
item.unload_data_from_memory()
|
||||||
|
|
||||||
|
@ -20,20 +20,10 @@ class SNBOutput(OutputFormatPlugin):
|
|||||||
file_type = 'snb'
|
file_type = 'snb'
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
# OptionRecommendation(name='newline', recommended_value='system',
|
|
||||||
# level=OptionRecommendation.LOW,
|
|
||||||
# short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
|
|
||||||
# help=_('Type of newline to use. Options are %s. Default is \'system\'. '
|
|
||||||
# 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
|
||||||
# 'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
|
||||||
# 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
|
|
||||||
OptionRecommendation(name='snb_output_encoding', recommended_value='utf-8',
|
OptionRecommendation(name='snb_output_encoding', recommended_value='utf-8',
|
||||||
level=OptionRecommendation.LOW,
|
level=OptionRecommendation.LOW,
|
||||||
help=_('Specify the character encoding of the output document. ' \
|
help=_('Specify the character encoding of the output document. ' \
|
||||||
'The default is utf-8.')),
|
'The default is utf-8.')),
|
||||||
# OptionRecommendation(name='inline_toc',
|
|
||||||
# recommended_value=False, level=OptionRecommendation.LOW,
|
|
||||||
# help=_('Add Table of Contents to beginning of the book.')),
|
|
||||||
OptionRecommendation(name='snb_max_line_length',
|
OptionRecommendation(name='snb_max_line_length',
|
||||||
recommended_value=0, level=OptionRecommendation.LOW,
|
recommended_value=0, level=OptionRecommendation.LOW,
|
||||||
help=_('The maximum number of characters per line. This splits on '
|
help=_('The maximum number of characters per line. This splits on '
|
||||||
@ -41,10 +31,18 @@ class SNBOutput(OutputFormatPlugin):
|
|||||||
'the line will be broken at the space after and will exceed the '
|
'the line will be broken at the space after and will exceed the '
|
||||||
'specified value. Also, there is a minimum of 25 characters. '
|
'specified value. Also, there is a minimum of 25 characters. '
|
||||||
'Use 0 to disable line splitting.')),
|
'Use 0 to disable line splitting.')),
|
||||||
# OptionRecommendation(name='force_max_line_length',
|
OptionRecommendation(name='snb_insert_empty_line',
|
||||||
# recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
# help=_('Force splitting on the max-line-length value when no space '
|
help=_('Specify whether or not to insert an empty line between '
|
||||||
# 'is present. Also allows max-line-length to be below the minimum')),
|
'two paragraphs.')),
|
||||||
|
OptionRecommendation(name='snb_indent_first_line',
|
||||||
|
recommended_value=True, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Specify whether or not to insert two space characters '
|
||||||
|
'to indent the first line of each paragraph.')),
|
||||||
|
OptionRecommendation(name='snb_hide_chapter_name',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Specify whether or not to hide the chapter title for each '
|
||||||
|
'chapter. Useful for image-only output (eg. comics).')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
@ -230,7 +228,7 @@ class SNBOutput(OutputFormatPlugin):
|
|||||||
img.load(imageData)
|
img.load(imageData)
|
||||||
(x,y) = img.size
|
(x,y) = img.size
|
||||||
if self.opts:
|
if self.opts:
|
||||||
SCREEN_Y, SCREEN_X = self.opts.output_profile.comic_screen_size
|
SCREEN_X, SCREEN_Y = self.opts.output_profile.comic_screen_size
|
||||||
else:
|
else:
|
||||||
SCREEN_X = 540
|
SCREEN_X = 540
|
||||||
SCREEN_Y = 700
|
SCREEN_Y = 700
|
||||||
|
@ -88,7 +88,10 @@ class SNBMLizer(object):
|
|||||||
trees = { }
|
trees = { }
|
||||||
for subitem, subtitle in self.subitems:
|
for subitem, subtitle in self.subitems:
|
||||||
snbcTree = etree.Element("snbc")
|
snbcTree = etree.Element("snbc")
|
||||||
etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle
|
snbcHead = etree.SubElement(snbcTree, "head")
|
||||||
|
etree.SubElement(snbcHead, "title").text = subtitle
|
||||||
|
if self.opts and self.opts.snb_hide_chapter_name:
|
||||||
|
etree.SubElement(snbcHead, "hidetitle").text = u"true"
|
||||||
etree.SubElement(snbcTree, "body")
|
etree.SubElement(snbcTree, "body")
|
||||||
trees[subitem] = snbcTree
|
trees[subitem] = snbcTree
|
||||||
output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
|
output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
|
||||||
@ -96,27 +99,37 @@ class SNBMLizer(object):
|
|||||||
output = self.cleanup_text(u''.join(output))
|
output = self.cleanup_text(u''.join(output))
|
||||||
|
|
||||||
subitem = ''
|
subitem = ''
|
||||||
|
bodyTree = trees[subitem].find(".//body")
|
||||||
for line in output.splitlines():
|
for line in output.splitlines():
|
||||||
if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
|
if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
|
||||||
line = line.strip(u' \t\n\r\u3000')
|
line = line.strip(u' \t\n\r\u3000')
|
||||||
else:
|
else:
|
||||||
etree.SubElement(trees[subitem].find(".//body"), "text").text = \
|
etree.SubElement(bodyTree, "text").text = \
|
||||||
etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
|
etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
|
||||||
continue
|
continue
|
||||||
if len(line) != 0:
|
if len(line) != 0:
|
||||||
if line.find(CALIBRE_SNB_IMG_TAG) == 0:
|
if line.find(CALIBRE_SNB_IMG_TAG) == 0:
|
||||||
prefix = ProcessFileName(os.path.dirname(self.item.href))
|
prefix = ProcessFileName(os.path.dirname(self.item.href))
|
||||||
if prefix != '':
|
if prefix != '':
|
||||||
etree.SubElement(trees[subitem].find(".//body"), "img").text = \
|
etree.SubElement(bodyTree, "img").text = \
|
||||||
prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
|
prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
|
||||||
else:
|
else:
|
||||||
etree.SubElement(trees[subitem].find(".//body"), "img").text = \
|
etree.SubElement(bodyTree, "img").text = \
|
||||||
line[len(CALIBRE_SNB_IMG_TAG):]
|
line[len(CALIBRE_SNB_IMG_TAG):]
|
||||||
elif line.find(CALIBRE_SNB_BM_TAG) == 0:
|
elif line.find(CALIBRE_SNB_BM_TAG) == 0:
|
||||||
subitem = line[len(CALIBRE_SNB_BM_TAG):]
|
subitem = line[len(CALIBRE_SNB_BM_TAG):]
|
||||||
|
bodyTree = trees[subitem].find(".//body")
|
||||||
else:
|
else:
|
||||||
etree.SubElement(trees[subitem].find(".//body"), "text").text = \
|
if self.opts and self.opts.snb_indent_first_line:
|
||||||
etree.CDATA(unicode(u'\u3000\u3000' + line))
|
prefix = u'\u3000\u3000'
|
||||||
|
else:
|
||||||
|
prefix = u''
|
||||||
|
etree.SubElement(bodyTree, "text").text = \
|
||||||
|
etree.CDATA(unicode(prefix + line))
|
||||||
|
if self.opts and self.opts.snb_insert_empty_line:
|
||||||
|
etree.SubElement(bodyTree, "text").text = \
|
||||||
|
etree.CDATA(u'')
|
||||||
|
|
||||||
return trees
|
return trees
|
||||||
|
|
||||||
def remove_newlines(self, text):
|
def remove_newlines(self, text):
|
||||||
|
@ -255,7 +255,7 @@ def error_dialog(parent, title, msg, det_msg='', show=False,
|
|||||||
return d
|
return d
|
||||||
|
|
||||||
def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,
|
def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,
|
||||||
buttons=QMessageBox.Yes|QMessageBox.No):
|
buttons=QMessageBox.Yes|QMessageBox.No, yes_button=QMessageBox.Yes):
|
||||||
d = MessageBox(QMessageBox.Question, title, msg, buttons,
|
d = MessageBox(QMessageBox.Question, title, msg, buttons,
|
||||||
parent, det_msg)
|
parent, det_msg)
|
||||||
d.setIconPixmap(QPixmap(I('dialog_question.png')))
|
d.setIconPixmap(QPixmap(I('dialog_question.png')))
|
||||||
@ -263,7 +263,7 @@ def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,
|
|||||||
if not show_copy_button:
|
if not show_copy_button:
|
||||||
d.cb.setVisible(False)
|
d.cb.setVisible(False)
|
||||||
|
|
||||||
return d.exec_() == QMessageBox.Yes
|
return d.exec_() == yes_button
|
||||||
|
|
||||||
def info_dialog(parent, title, msg, det_msg='', show=False):
|
def info_dialog(parent, title, msg, det_msg='', show=False):
|
||||||
d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok,
|
d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok,
|
||||||
@ -399,6 +399,7 @@ class FileIconProvider(QFileIconProvider):
|
|||||||
'fb2' : 'fb2',
|
'fb2' : 'fb2',
|
||||||
'rtf' : 'rtf',
|
'rtf' : 'rtf',
|
||||||
'odt' : 'odt',
|
'odt' : 'odt',
|
||||||
|
'snb' : 'snb',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -514,7 +515,7 @@ class FileDialog(QObject):
|
|||||||
if f and os.path.exists(f):
|
if f and os.path.exists(f):
|
||||||
self.selected_files.append(f)
|
self.selected_files.append(f)
|
||||||
else:
|
else:
|
||||||
opts = QFileDialog.ShowDirsOnly if mode == QFileDialog.DirectoryOnly else QFileDialog.Option()
|
opts = QFileDialog.ShowDirsOnly if mode == QFileDialog.Directory else QFileDialog.Option()
|
||||||
f = unicode(QFileDialog.getExistingDirectory(parent, title, initial_dir, opts))
|
f = unicode(QFileDialog.getExistingDirectory(parent, title, initial_dir, opts))
|
||||||
if os.path.exists(f):
|
if os.path.exists(f):
|
||||||
self.selected_files.append(f)
|
self.selected_files.append(f)
|
||||||
@ -534,7 +535,7 @@ class FileDialog(QObject):
|
|||||||
|
|
||||||
def choose_dir(window, name, title, default_dir='~'):
|
def choose_dir(window, name, title, default_dir='~'):
|
||||||
fd = FileDialog(title=title, filters=[], add_all_files_filter=False,
|
fd = FileDialog(title=title, filters=[], add_all_files_filter=False,
|
||||||
parent=window, name=name, mode=QFileDialog.DirectoryOnly,
|
parent=window, name=name, mode=QFileDialog.Directory,
|
||||||
default_dir=default_dir)
|
default_dir=default_dir)
|
||||||
dir = fd.get_files()
|
dir = fd.get_files()
|
||||||
if dir:
|
if dir:
|
||||||
|
@ -9,7 +9,6 @@ from PyQt4.Qt import Qt
|
|||||||
|
|
||||||
from calibre.gui2 import Dispatcher
|
from calibre.gui2 import Dispatcher
|
||||||
from calibre.gui2.tools import fetch_scheduled_recipe
|
from calibre.gui2.tools import fetch_scheduled_recipe
|
||||||
from calibre.utils.config import dynamic
|
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
|
|
||||||
class FetchNewsAction(InterfaceAction):
|
class FetchNewsAction(InterfaceAction):
|
||||||
@ -60,9 +59,9 @@ class FetchNewsAction(InterfaceAction):
|
|||||||
return self.gui.job_exception(job)
|
return self.gui.job_exception(job)
|
||||||
id = self.gui.library_view.model().add_news(pt.name, arg)
|
id = self.gui.library_view.model().add_news(pt.name, arg)
|
||||||
self.gui.library_view.model().reset()
|
self.gui.library_view.model().reset()
|
||||||
sync = dynamic.get('news_to_be_synced', set([]))
|
sync = self.gui.news_to_be_synced
|
||||||
sync.add(id)
|
sync.add(id)
|
||||||
dynamic.set('news_to_be_synced', sync)
|
self.gui.news_to_be_synced = sync
|
||||||
self.scheduler.recipe_downloaded(arg)
|
self.scheduler.recipe_downloaded(arg)
|
||||||
self.gui.status_bar.show_message(arg['title'] + _(' fetched.'), 3000)
|
self.gui.status_bar.show_message(arg['title'] + _(' fetched.'), 3000)
|
||||||
self.gui.email_news(id)
|
self.gui.email_news(id)
|
||||||
|
@ -35,7 +35,6 @@ class ViewAction(InterfaceAction):
|
|||||||
self.qaction.setMenu(self.view_menu)
|
self.qaction.setMenu(self.view_menu)
|
||||||
ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
|
ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
|
||||||
|
|
||||||
|
|
||||||
def location_selected(self, loc):
|
def location_selected(self, loc):
|
||||||
enabled = loc == 'library'
|
enabled = loc == 'library'
|
||||||
for action in list(self.view_menu.actions())[1:]:
|
for action in list(self.view_menu.actions())[1:]:
|
||||||
@ -111,7 +110,7 @@ class ViewAction(InterfaceAction):
|
|||||||
'books at once can be slow and have a negative effect on the '
|
'books at once can be slow and have a negative effect on the '
|
||||||
'responsiveness of your computer. Once started the process '
|
'responsiveness of your computer. Once started the process '
|
||||||
'cannot be stopped until complete. Do you wish to continue?'
|
'cannot be stopped until complete. Do you wish to continue?'
|
||||||
) % num)
|
) % num, show_copy_button=False)
|
||||||
|
|
||||||
def view_folder(self, *args):
|
def view_folder(self, *args):
|
||||||
rows = self.gui.current_view().selectionModel().selectedRows()
|
rows = self.gui.current_view().selectionModel().selectedRows()
|
||||||
@ -134,6 +133,9 @@ class ViewAction(InterfaceAction):
|
|||||||
rows = self.gui.current_view().selectionModel().selectedRows()
|
rows = self.gui.current_view().selectionModel().selectedRows()
|
||||||
self._view_books(rows)
|
self._view_books(rows)
|
||||||
|
|
||||||
|
def view_triggered(self, index):
|
||||||
|
self._view_books([index])
|
||||||
|
|
||||||
def view_specific_book(self, index):
|
def view_specific_book(self, index):
|
||||||
self._view_books([index])
|
self._view_books([index])
|
||||||
|
|
||||||
|
@ -28,6 +28,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne
|
|||||||
if log is None:
|
if log is None:
|
||||||
log = Log()
|
log = Log()
|
||||||
from calibre.library import db
|
from calibre.library import db
|
||||||
|
from calibre.utils.config import prefs
|
||||||
|
prefs.refresh()
|
||||||
db = db()
|
db = db()
|
||||||
db.catalog_plugin_on_device_temp_mapping = dbspec
|
db.catalog_plugin_on_device_temp_mapping = dbspec
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.gui2.convert.snb_output_ui import Ui_Form
|
from calibre.gui2.convert.snb_output_ui import Ui_Form
|
||||||
@ -18,18 +18,9 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
[])
|
['snb_insert_empty_line', 'snb_indent_first_line',
|
||||||
|
'snb_hide_chapter_name',])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
# default = self.opt_newline.currentText()
|
|
||||||
|
|
||||||
# global newline_model
|
|
||||||
# if newline_model is None:
|
|
||||||
# newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys())
|
|
||||||
# self.newline_model = newline_model
|
|
||||||
# self.opt_newline.setModel(self.newline_model)
|
|
||||||
|
|
||||||
# default_index = self.opt_newline.findText(default)
|
|
||||||
# system_index = self.opt_newline.findText('system')
|
|
||||||
# self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0)
|
|
||||||
|
@ -13,60 +13,41 @@
|
|||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout" rowstretch="0,0,0,0,0">
|
||||||
<!-- <item row="0" column="0"> -->
|
<item row="4" column="0">
|
||||||
<!-- <widget class="QLabel" name="label"> -->
|
<spacer name="verticalSpacer">
|
||||||
<!-- <property name="text"> -->
|
<property name="orientation">
|
||||||
<!-- <string>&Line ending style:</string> -->
|
<enum>Qt::Vertical</enum>
|
||||||
<!-- </property> -->
|
</property>
|
||||||
<!-- <property name="buddy"> -->
|
<property name="sizeHint" stdset="0">
|
||||||
<!-- <cstring>opt_newline</cstring> -->
|
<size>
|
||||||
<!-- </property> -->
|
<width>20</width>
|
||||||
<!-- </widget> -->
|
<height>40</height>
|
||||||
<!-- </item> -->
|
</size>
|
||||||
<!-- <item row="0" column="1"> -->
|
</property>
|
||||||
<!-- <widget class="QComboBox" name="opt_newline"/> -->
|
</spacer>
|
||||||
<!-- </item> -->
|
</item>
|
||||||
<!-- <item row="4" column="0"> -->
|
<item row="3" column="0">
|
||||||
<!-- <spacer name="verticalSpacer"> -->
|
<widget class="QCheckBox" name="opt_snb_hide_chapter_name">
|
||||||
<!-- <property name="orientation"> -->
|
<property name="text">
|
||||||
<!-- <enum>Qt::Vertical</enum> -->
|
<string>Hide chapter name</string>
|
||||||
<!-- </property> -->
|
</property>
|
||||||
<!-- <property name="sizeHint" stdset="0"> -->
|
</widget>
|
||||||
<!-- <size> -->
|
</item>
|
||||||
<!-- <width>20</width> -->
|
<item row="2" column="0">
|
||||||
<!-- <height>246</height> -->
|
<widget class="QCheckBox" name="opt_snb_indent_first_line">
|
||||||
<!-- </size> -->
|
<property name="text">
|
||||||
<!-- </property> -->
|
<string>Insert space before the first line for each paragraph</string>
|
||||||
<!-- </spacer> -->
|
</property>
|
||||||
<!-- </item> -->
|
</widget>
|
||||||
<!-- <item row="3" column="0" colspan="2"> -->
|
</item>
|
||||||
<!-- <widget class="QCheckBox" name="opt_inline_toc"> -->
|
<item row="1" column="0">
|
||||||
<!-- <property name="text"> -->
|
<widget class="QCheckBox" name="opt_snb_insert_empty_line">
|
||||||
<!-- <string>&Inline TOC</string> -->
|
<property name="text">
|
||||||
<!-- </property> -->
|
<string>Insert empty line between paragraphs</string>
|
||||||
<!-- </widget> -->
|
</property>
|
||||||
<!-- </item> -->
|
</widget>
|
||||||
<!-- <item row="1" column="1"> -->
|
</item>
|
||||||
<!-- <widget class="QSpinBox" name="opt_max_line_length"/> -->
|
|
||||||
<!-- </item> -->
|
|
||||||
<!-- <item row="1" column="0"> -->
|
|
||||||
<!-- <widget class="QLabel" name="label_2"> -->
|
|
||||||
<!-- <property name="text"> -->
|
|
||||||
<!-- <string>&Maximum line length:</string> -->
|
|
||||||
<!-- </property> -->
|
|
||||||
<!-- <property name="buddy"> -->
|
|
||||||
<!-- <cstring>opt_max_line_length</cstring> -->
|
|
||||||
<!-- </property> -->
|
|
||||||
<!-- </widget> -->
|
|
||||||
<!-- </item> -->
|
|
||||||
<!-- <item row="2" column="0" colspan="2"> -->
|
|
||||||
<!-- <widget class="QCheckBox" name="opt_force_max_line_length"> -->
|
|
||||||
<!-- <property name="text"> -->
|
|
||||||
<!-- <string>Force maximum line length</string> -->
|
|
||||||
<!-- </property> -->
|
|
||||||
<!-- </widget> -->
|
|
||||||
<!-- </item> -->
|
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
@ -429,7 +429,38 @@ class BulkBase(Base):
|
|||||||
self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify)
|
self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify)
|
||||||
|
|
||||||
class BulkBool(BulkBase, Bool):
|
class BulkBool(BulkBase, Bool):
|
||||||
pass
|
|
||||||
|
def get_initial_value(self, book_ids):
|
||||||
|
value = None
|
||||||
|
for book_id in book_ids:
|
||||||
|
val = self.db.get_custom(book_id, num=self.col_id, index_is_id=True)
|
||||||
|
if tweaks['bool_custom_columns_are_tristate'] == 'no' and val is None:
|
||||||
|
val = False
|
||||||
|
if value is not None and value != val:
|
||||||
|
return None
|
||||||
|
value = val
|
||||||
|
return value
|
||||||
|
|
||||||
|
def setup_ui(self, parent):
|
||||||
|
self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
|
||||||
|
QComboBox(parent)]
|
||||||
|
w = self.widgets[1]
|
||||||
|
items = [_('Yes'), _('No'), _('Undefined')]
|
||||||
|
icons = [I('ok.png'), I('list_remove.png'), I('blank.png')]
|
||||||
|
for icon, text in zip(icons, items):
|
||||||
|
w.addItem(QIcon(icon), text)
|
||||||
|
|
||||||
|
def setter(self, val):
|
||||||
|
val = {None: 2, False: 1, True: 0}[val]
|
||||||
|
self.widgets[1].setCurrentIndex(val)
|
||||||
|
|
||||||
|
def commit(self, book_ids, notify=False):
|
||||||
|
val = self.gui_val
|
||||||
|
val = self.normalize_ui_val(val)
|
||||||
|
if val != self.initial_val:
|
||||||
|
if tweaks['bool_custom_columns_are_tristate'] == 'no' and val is None:
|
||||||
|
val = False
|
||||||
|
self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify)
|
||||||
|
|
||||||
class BulkInt(BulkBase, Int):
|
class BulkInt(BulkBase, Int):
|
||||||
pass
|
pass
|
||||||
|
@ -484,17 +484,22 @@ class DeviceMenu(QMenu): # {{{
|
|||||||
_('Storage Card B')),
|
_('Storage Card B')),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
later_menus = []
|
||||||
|
|
||||||
for menu in (self, self.set_default_menu):
|
for menu in (self, self.set_default_menu):
|
||||||
for actions, desc in (
|
for actions, desc in (
|
||||||
(basic_actions, ''),
|
(basic_actions, ''),
|
||||||
|
(specific_actions, _('Send specific format to')),
|
||||||
(delete_actions, _('Send and delete from library')),
|
(delete_actions, _('Send and delete from library')),
|
||||||
(specific_actions, _('Send specific format'))
|
|
||||||
):
|
):
|
||||||
mdest = menu
|
mdest = menu
|
||||||
if actions is not basic_actions:
|
if actions is not basic_actions:
|
||||||
mdest = menu.addMenu(desc)
|
mdest = QMenu(desc)
|
||||||
self._memory.append(mdest)
|
self._memory.append(mdest)
|
||||||
|
later_menus.append(mdest)
|
||||||
|
if menu is self.set_default_menu:
|
||||||
|
menu.addMenu(mdest)
|
||||||
|
menu.addSeparator()
|
||||||
|
|
||||||
for dest, delete, specific, icon, text in actions:
|
for dest, delete, specific, icon, text in actions:
|
||||||
action = DeviceAction(dest, delete, specific, icon, text, self)
|
action = DeviceAction(dest, delete, specific, icon, text, self)
|
||||||
@ -507,7 +512,7 @@ class DeviceMenu(QMenu): # {{{
|
|||||||
action.a_s.connect(self.action_triggered)
|
action.a_s.connect(self.action_triggered)
|
||||||
self.actions.append(action)
|
self.actions.append(action)
|
||||||
mdest.addAction(action)
|
mdest.addAction(action)
|
||||||
if actions is not specific_actions:
|
if actions is basic_actions:
|
||||||
menu.addSeparator()
|
menu.addSeparator()
|
||||||
|
|
||||||
da = config['default_send_to_device_action']
|
da = config['default_send_to_device_action']
|
||||||
@ -525,14 +530,21 @@ class DeviceMenu(QMenu): # {{{
|
|||||||
self.group.triggered.connect(self.change_default_action)
|
self.group.triggered.connect(self.change_default_action)
|
||||||
self.addSeparator()
|
self.addSeparator()
|
||||||
|
|
||||||
|
self.addMenu(later_menus[0])
|
||||||
|
self.addSeparator()
|
||||||
|
|
||||||
mitem = self.addAction(QIcon(I('eject.png')), _('Eject device'))
|
mitem = self.addAction(QIcon(I('eject.png')), _('Eject device'))
|
||||||
mitem.setEnabled(False)
|
mitem.setEnabled(False)
|
||||||
mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit())
|
mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit())
|
||||||
self.disconnect_mounted_device_action = mitem
|
self.disconnect_mounted_device_action = mitem
|
||||||
|
|
||||||
self.addSeparator()
|
self.addSeparator()
|
||||||
|
|
||||||
self.addMenu(self.set_default_menu)
|
self.addMenu(self.set_default_menu)
|
||||||
self.addSeparator()
|
self.addSeparator()
|
||||||
|
|
||||||
|
self.addMenu(later_menus[1])
|
||||||
|
self.addSeparator()
|
||||||
|
|
||||||
annot = self.addAction(_('Fetch annotations (experimental)'))
|
annot = self.addAction(_('Fetch annotations (experimental)'))
|
||||||
annot.setEnabled(False)
|
annot.setEnabled(False)
|
||||||
annot.triggered.connect(lambda x :
|
annot.triggered.connect(lambda x :
|
||||||
@ -1029,7 +1041,7 @@ class DeviceMixin(object): # {{{
|
|||||||
to_s = [account]
|
to_s = [account]
|
||||||
subjects = [_('News:')+' '+mi.title]
|
subjects = [_('News:')+' '+mi.title]
|
||||||
texts = [_('Attached is the')+' '+mi.title]
|
texts = [_('Attached is the')+' '+mi.title]
|
||||||
attachment_names = [mi.title+os.path.splitext(attachment)[1]]
|
attachment_names = [ascii_filename(mi.title)+os.path.splitext(attachment)[1]]
|
||||||
attachments = [attachment]
|
attachments = [attachment]
|
||||||
jobnames = ['%s:%s'%(id, mi.title)]
|
jobnames = ['%s:%s'%(id, mi.title)]
|
||||||
remove = [id] if config['delete_news_from_library_on_upload']\
|
remove = [id] if config['delete_news_from_library_on_upload']\
|
||||||
@ -1102,12 +1114,35 @@ class DeviceMixin(object): # {{{
|
|||||||
self.status_bar.show_message(_('Sending catalogs to device.'), 5000)
|
self.status_bar.show_message(_('Sending catalogs to device.'), 5000)
|
||||||
|
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def news_to_be_synced(self):
|
||||||
|
doc = 'Set of ids to be sent to device'
|
||||||
|
def fget(self):
|
||||||
|
ans = []
|
||||||
|
try:
|
||||||
|
ans = self.library_view.model().db.prefs.get('news_to_be_synced',
|
||||||
|
[])
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return set(ans)
|
||||||
|
|
||||||
|
def fset(self, ids):
|
||||||
|
try:
|
||||||
|
self.library_view.model().db.prefs.set('news_to_be_synced',
|
||||||
|
list(ids))
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
return property(fget=fget, fset=fset, doc=doc)
|
||||||
|
|
||||||
|
|
||||||
def sync_news(self, send_ids=None, do_auto_convert=True):
|
def sync_news(self, send_ids=None, do_auto_convert=True):
|
||||||
if self.device_connected:
|
if self.device_connected:
|
||||||
del_on_upload = config['delete_news_from_library_on_upload']
|
del_on_upload = config['delete_news_from_library_on_upload']
|
||||||
settings = self.device_manager.device.settings()
|
settings = self.device_manager.device.settings()
|
||||||
ids = list(dynamic.get('news_to_be_synced', set([]))) if send_ids is None else send_ids
|
ids = list(self.news_to_be_synced) if send_ids is None else send_ids
|
||||||
ids = [id for id in ids if self.library_view.model().db.has_id(id)]
|
ids = [id for id in ids if self.library_view.model().db.has_id(id)]
|
||||||
files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
|
files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
|
||||||
ids, settings.format_map,
|
ids, settings.format_map,
|
||||||
@ -1139,7 +1174,7 @@ class DeviceMixin(object): # {{{
|
|||||||
for f in files:
|
for f in files:
|
||||||
f.deleted_after_upload = del_on_upload
|
f.deleted_after_upload = del_on_upload
|
||||||
if not files:
|
if not files:
|
||||||
dynamic.set('news_to_be_synced', set([]))
|
self.news_to_be_synced = set([])
|
||||||
return
|
return
|
||||||
metadata = self.library_view.model().metadata_for(ids)
|
metadata = self.library_view.model().metadata_for(ids)
|
||||||
names = []
|
names = []
|
||||||
@ -1153,7 +1188,7 @@ class DeviceMixin(object): # {{{
|
|||||||
if mi.cover and os.access(mi.cover, os.R_OK):
|
if mi.cover and os.access(mi.cover, os.R_OK):
|
||||||
mi.thumbnail = self.cover_to_thumbnail(open(mi.cover,
|
mi.thumbnail = self.cover_to_thumbnail(open(mi.cover,
|
||||||
'rb').read())
|
'rb').read())
|
||||||
dynamic.set('news_to_be_synced', set([]))
|
self.news_to_be_synced = set([])
|
||||||
if config['upload_news_to_device'] and files:
|
if config['upload_news_to_device'] and files:
|
||||||
remove = ids if del_on_upload else []
|
remove = ids if del_on_upload else []
|
||||||
space = { self.location_manager.free[0] : None,
|
space = { self.location_manager.free[0] : None,
|
||||||
@ -1347,8 +1382,9 @@ class DeviceMixin(object): # {{{
|
|||||||
# If it does not, then do it here.
|
# If it does not, then do it here.
|
||||||
if not self.set_books_in_library(self.booklists(), reset=True):
|
if not self.set_books_in_library(self.booklists(), reset=True):
|
||||||
self.upload_booklists()
|
self.upload_booklists()
|
||||||
self.book_on_device(None, reset=True)
|
with self.library_view.preserve_selected_books:
|
||||||
self.refresh_ondevice()
|
self.book_on_device(None, reset=True)
|
||||||
|
self.refresh_ondevice()
|
||||||
|
|
||||||
view = self.card_a_view if on_card == 'carda' else \
|
view = self.card_a_view if on_card == 'carda' else \
|
||||||
self.card_b_view if on_card == 'cardb' else self.memory_view
|
self.card_b_view if on_card == 'cardb' else self.memory_view
|
||||||
|
@ -90,10 +90,15 @@ class BookInfo(QDialog, Ui_BookInfo):
|
|||||||
row = row.row()
|
row = row.row()
|
||||||
if row == self.current_row:
|
if row == self.current_row:
|
||||||
return
|
return
|
||||||
|
info = self.view.model().get_book_info(row)
|
||||||
|
if info is None:
|
||||||
|
# Indicates books was deleted from library, or row numbers have
|
||||||
|
# changed
|
||||||
|
return
|
||||||
|
|
||||||
self.previous_button.setEnabled(False if row == 0 else True)
|
self.previous_button.setEnabled(False if row == 0 else True)
|
||||||
self.next_button.setEnabled(False if row == self.view.model().rowCount(QModelIndex())-1 else True)
|
self.next_button.setEnabled(False if row == self.view.model().rowCount(QModelIndex())-1 else True)
|
||||||
self.current_row = row
|
self.current_row = row
|
||||||
info = self.view.model().get_book_info(row)
|
|
||||||
self.setWindowTitle(info[_('Title')])
|
self.setWindowTitle(info[_('Title')])
|
||||||
self.title.setText('<b>'+info.pop(_('Title')))
|
self.title.setText('<b>'+info.pop(_('Title')))
|
||||||
comments = info.pop(_('Comments'), '')
|
comments = info.pop(_('Comments'), '')
|
||||||
|
@ -3,11 +3,16 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
|
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
|
||||||
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
|
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
|
||||||
QLineEdit
|
QLineEdit, Qt
|
||||||
|
|
||||||
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.library.check_library import CheckLibrary, CHECKS
|
from calibre.library.check_library import CheckLibrary, CHECKS
|
||||||
|
from calibre.library.database2 import delete_file, delete_tree
|
||||||
|
from calibre import prints
|
||||||
|
|
||||||
class Item(QTreeWidgetItem):
|
class Item(QTreeWidgetItem):
|
||||||
pass
|
pass
|
||||||
@ -24,24 +29,25 @@ class CheckLibraryDialog(QDialog):
|
|||||||
self.setLayout(self._layout)
|
self.setLayout(self._layout)
|
||||||
|
|
||||||
self.log = QTreeWidget(self)
|
self.log = QTreeWidget(self)
|
||||||
|
self.log.itemChanged.connect(self.item_changed)
|
||||||
self._layout.addWidget(self.log)
|
self._layout.addWidget(self.log)
|
||||||
|
|
||||||
self.check = QPushButton(_('Run the check'))
|
self.check = QPushButton(_('&Run the check'))
|
||||||
self.check.setDefault(False)
|
self.check.setDefault(False)
|
||||||
self.check.clicked.connect(self.run_the_check)
|
self.check.clicked.connect(self.run_the_check)
|
||||||
self.copy = QPushButton(_('Copy to clipboard'))
|
self.copy = QPushButton(_('Copy &to clipboard'))
|
||||||
self.copy.setDefault(False)
|
self.copy.setDefault(False)
|
||||||
self.copy.clicked.connect(self.copy_to_clipboard)
|
self.copy.clicked.connect(self.copy_to_clipboard)
|
||||||
self.ok = QPushButton('&Done')
|
self.ok = QPushButton('&Done')
|
||||||
self.ok.setDefault(True)
|
self.ok.setDefault(True)
|
||||||
self.ok.clicked.connect(self.accept)
|
self.ok.clicked.connect(self.accept)
|
||||||
self.cancel = QPushButton('&Cancel')
|
self.delete = QPushButton('Delete &marked')
|
||||||
self.cancel.setDefault(False)
|
self.delete.setDefault(False)
|
||||||
self.cancel.clicked.connect(self.reject)
|
self.delete.clicked.connect(self.delete_marked)
|
||||||
self.bbox = QDialogButtonBox(self)
|
self.bbox = QDialogButtonBox(self)
|
||||||
self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole)
|
|
||||||
self.bbox.addButton(self.check, QDialogButtonBox.ActionRole)
|
self.bbox.addButton(self.check, QDialogButtonBox.ActionRole)
|
||||||
self.bbox.addButton(self.cancel, QDialogButtonBox.RejectRole)
|
self.bbox.addButton(self.delete, QDialogButtonBox.ActionRole)
|
||||||
|
self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole)
|
||||||
self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole)
|
self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole)
|
||||||
|
|
||||||
h = QHBoxLayout()
|
h = QHBoxLayout()
|
||||||
@ -49,12 +55,16 @@ class CheckLibraryDialog(QDialog):
|
|||||||
h.addWidget(ln)
|
h.addWidget(ln)
|
||||||
self.name_ignores = QLineEdit()
|
self.name_ignores = QLineEdit()
|
||||||
self.name_ignores.setText(db.prefs.get('check_library_ignore_names', ''))
|
self.name_ignores.setText(db.prefs.get('check_library_ignore_names', ''))
|
||||||
|
self.name_ignores.setToolTip(
|
||||||
|
_('Enter comma-separated standard file name wildcards, such as synctoy*.dat'))
|
||||||
ln.setBuddy(self.name_ignores)
|
ln.setBuddy(self.name_ignores)
|
||||||
h.addWidget(self.name_ignores)
|
h.addWidget(self.name_ignores)
|
||||||
le = QLabel(_('Extensions to ignore'))
|
le = QLabel(_('Extensions to ignore'))
|
||||||
h.addWidget(le)
|
h.addWidget(le)
|
||||||
self.ext_ignores = QLineEdit()
|
self.ext_ignores = QLineEdit()
|
||||||
self.ext_ignores.setText(db.prefs.get('check_library_ignore_extensions', ''))
|
self.ext_ignores.setText(db.prefs.get('check_library_ignore_extensions', ''))
|
||||||
|
self.ext_ignores.setToolTip(
|
||||||
|
_('Enter comma-separated extensions without a leading dot. Used only in book folders'))
|
||||||
le.setBuddy(self.ext_ignores)
|
le.setBuddy(self.ext_ignores)
|
||||||
h.addWidget(self.ext_ignores)
|
h.addWidget(self.ext_ignores)
|
||||||
self._layout.addLayout(h)
|
self._layout.addLayout(h)
|
||||||
@ -83,35 +93,70 @@ class CheckLibraryDialog(QDialog):
|
|||||||
plaintext = []
|
plaintext = []
|
||||||
|
|
||||||
def builder(tree, checker, check):
|
def builder(tree, checker, check):
|
||||||
attr = check[0]
|
attr, h, checkable = check
|
||||||
list = getattr(checker, attr, None)
|
list = getattr(checker, attr, None)
|
||||||
if list is None:
|
if list is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
h = check[1]
|
|
||||||
tl = Item([h])
|
tl = Item([h])
|
||||||
for problem in list:
|
for problem in list:
|
||||||
it = Item()
|
it = Item()
|
||||||
|
if checkable:
|
||||||
|
it.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable)
|
||||||
|
it.setCheckState(1, False)
|
||||||
|
else:
|
||||||
|
it.setFlags(Qt.ItemIsEnabled)
|
||||||
it.setText(0, problem[0])
|
it.setText(0, problem[0])
|
||||||
it.setText(1, problem[1])
|
it.setText(1, problem[1])
|
||||||
p = ', '.join(problem[2])
|
|
||||||
it.setText(2, p)
|
|
||||||
tl.addChild(it)
|
tl.addChild(it)
|
||||||
plaintext.append(','.join([h, problem[0], problem[1], p]))
|
self.all_items.append(it)
|
||||||
|
plaintext.append(','.join([h, problem[0], problem[1]]))
|
||||||
tree.addTopLevelItem(tl)
|
tree.addTopLevelItem(tl)
|
||||||
|
|
||||||
t = self.log
|
t = self.log
|
||||||
t.clear()
|
t.clear()
|
||||||
t.setColumnCount(3);
|
t.setColumnCount(2);
|
||||||
t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')])
|
t.setHeaderLabels([_('Name'), _('Path from library')])
|
||||||
|
self.all_items = []
|
||||||
for check in CHECKS:
|
for check in CHECKS:
|
||||||
builder(t, checker, check)
|
builder(t, checker, check)
|
||||||
|
|
||||||
t.setColumnWidth(0, 200)
|
t.setColumnWidth(0, 200)
|
||||||
t.setColumnWidth(1, 400)
|
t.setColumnWidth(1, 400)
|
||||||
|
self.delete.setEnabled(False)
|
||||||
self.text_results = '\n'.join(plaintext)
|
self.text_results = '\n'.join(plaintext)
|
||||||
|
|
||||||
|
def item_changed(self, item, column):
|
||||||
|
for it in self.all_items:
|
||||||
|
if it.checkState(1):
|
||||||
|
self.delete.setEnabled(True)
|
||||||
|
return
|
||||||
|
|
||||||
|
def delete_marked(self):
|
||||||
|
if not confirm('<p>'+_('The marked files and folders will be '
|
||||||
|
'<b>permanently deleted</b>. Are you sure?')
|
||||||
|
+'</p>', 'check_library_editor_delete', self):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sort the paths in reverse length order so that we can be sure that
|
||||||
|
# if an item is in another item, the sub-item will be deleted first.
|
||||||
|
items = sorted(self.all_items,
|
||||||
|
key=lambda x: len(x.text(1)),
|
||||||
|
reverse=True)
|
||||||
|
for it in items:
|
||||||
|
if it.checkState(1):
|
||||||
|
try:
|
||||||
|
p = os.path.join(self.db.library_path ,unicode(it.text(1)))
|
||||||
|
if os.path.isdir(p):
|
||||||
|
delete_tree(p)
|
||||||
|
else:
|
||||||
|
delete_file(p)
|
||||||
|
except:
|
||||||
|
prints('failed to delete',
|
||||||
|
os.path.join(self.db.library_path,
|
||||||
|
unicode(it.text(1))))
|
||||||
|
self.run_the_check()
|
||||||
|
|
||||||
def copy_to_clipboard(self):
|
def copy_to_clipboard(self):
|
||||||
QApplication.clipboard().setText(self.text_results)
|
QApplication.clipboard().setText(self.text_results)
|
||||||
|
|
||||||
|
@ -190,7 +190,8 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
|||||||
if self.model.rowCount() < 1:
|
if self.model.rowCount() < 1:
|
||||||
info_dialog(self, _('No metadata found'),
|
info_dialog(self, _('No metadata found'),
|
||||||
_('No metadata found, try adjusting the title and author '
|
_('No metadata found, try adjusting the title and author '
|
||||||
'or the ISBN key.')).exec_()
|
'and/or removing the ISBN.')).exec_()
|
||||||
|
self.reject()
|
||||||
return
|
return
|
||||||
|
|
||||||
self.matches.setModel(self.model)
|
self.matches.setModel(self.model)
|
||||||
|
@ -16,6 +16,7 @@ from calibre.gui2.custom_column_widgets import populate_metadata_page
|
|||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.gui2.progress_indicator import ProgressIndicator
|
from calibre.gui2.progress_indicator import ProgressIndicator
|
||||||
from calibre.utils.config import dynamic
|
from calibre.utils.config import dynamic
|
||||||
|
from calibre.utils.titlecase import titlecase
|
||||||
|
|
||||||
class MyBlockingBusy(QDialog):
|
class MyBlockingBusy(QDialog):
|
||||||
|
|
||||||
@ -50,6 +51,7 @@ class MyBlockingBusy(QDialog):
|
|||||||
self.start()
|
self.start()
|
||||||
|
|
||||||
self.args = args
|
self.args = args
|
||||||
|
self.series_start_value = None
|
||||||
self.db = db
|
self.db = db
|
||||||
self.ids = ids
|
self.ids = ids
|
||||||
self.error = None
|
self.error = None
|
||||||
@ -115,7 +117,7 @@ class MyBlockingBusy(QDialog):
|
|||||||
aum = [a.strip().replace('|', ',') for a in aum.split(',')]
|
aum = [a.strip().replace('|', ',') for a in aum.split(',')]
|
||||||
new_title = authors_to_string(aum)
|
new_title = authors_to_string(aum)
|
||||||
if do_title_case:
|
if do_title_case:
|
||||||
new_title = new_title.title()
|
new_title = titlecase(new_title)
|
||||||
self.db.set_title(id, new_title, notify=False)
|
self.db.set_title(id, new_title, notify=False)
|
||||||
title_set = True
|
title_set = True
|
||||||
if title:
|
if title:
|
||||||
@ -123,7 +125,7 @@ class MyBlockingBusy(QDialog):
|
|||||||
self.db.set_authors(id, new_authors, notify=False)
|
self.db.set_authors(id, new_authors, notify=False)
|
||||||
if do_title_case and not title_set:
|
if do_title_case and not title_set:
|
||||||
title = self.db.title(id, index_is_id=True)
|
title = self.db.title(id, index_is_id=True)
|
||||||
self.db.set_title(id, title.title(), notify=False)
|
self.db.set_title(id, titlecase(title), notify=False)
|
||||||
if au:
|
if au:
|
||||||
self.db.set_authors(id, string_to_authors(au), notify=False)
|
self.db.set_authors(id, string_to_authors(au), notify=False)
|
||||||
elif self.current_phase == 2:
|
elif self.current_phase == 2:
|
||||||
@ -147,8 +149,10 @@ class MyBlockingBusy(QDialog):
|
|||||||
|
|
||||||
if do_series:
|
if do_series:
|
||||||
if do_series_restart:
|
if do_series_restart:
|
||||||
next = series_start_value
|
if self.series_start_value is None:
|
||||||
series_start_value += 1
|
self.series_start_value = series_start_value
|
||||||
|
next = self.series_start_value
|
||||||
|
self.series_start_value += 1
|
||||||
else:
|
else:
|
||||||
next = self.db.get_next_series_num_for(series)
|
next = self.db.get_next_series_num_for(series)
|
||||||
self.db.set_series(id, series, notify=False, commit=False)
|
self.db.set_series(id, series, notify=False, commit=False)
|
||||||
@ -179,7 +183,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
|||||||
s_r_functions = { '' : lambda x: x,
|
s_r_functions = { '' : lambda x: x,
|
||||||
_('Lower Case') : lambda x: x.lower(),
|
_('Lower Case') : lambda x: x.lower(),
|
||||||
_('Upper Case') : lambda x: x.upper(),
|
_('Upper Case') : lambda x: x.upper(),
|
||||||
_('Title Case') : lambda x: x.title(),
|
_('Title Case') : lambda x: titlecase(x),
|
||||||
}
|
}
|
||||||
|
|
||||||
s_r_match_modes = [ _('Character match'),
|
s_r_match_modes = [ _('Character match'),
|
||||||
@ -567,6 +571,10 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
|||||||
self.initalize_authors()
|
self.initalize_authors()
|
||||||
self.initialize_series()
|
self.initialize_series()
|
||||||
self.initialize_publisher()
|
self.initialize_publisher()
|
||||||
|
for x in ('authors', 'publisher', 'series'):
|
||||||
|
x = getattr(self, x)
|
||||||
|
x.setSizeAdjustPolicy(x.AdjustToMinimumContentsLengthWithIcon)
|
||||||
|
x.setMinimumContentsLength(25)
|
||||||
|
|
||||||
def initalize_authors(self):
|
def initalize_authors(self):
|
||||||
all_authors = self.db.all_authors()
|
all_authors = self.db.all_authors()
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user