[Sync] Sync with trunk, revision 6907
184
Changelog.yaml
@ -4,6 +4,190 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.7.28
|
||||||
|
date: 2010-11-12
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
|
||||||
|
|
||||||
|
- title: "Driver for Nook Color, Eken M001"
|
||||||
|
|
||||||
|
- title: "Add a tweak to turn off double clicking to open viewer"
|
||||||
|
|
||||||
|
- title: "Catalog generation: Add indication when a book has no formats"
|
||||||
|
tickets: [7376]
|
||||||
|
|
||||||
|
- title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
|
||||||
|
tickets: [7488]
|
||||||
|
|
||||||
|
- title: "Bulk convert dialog: Hide useless restore defaults button."
|
||||||
|
tickets: [7471]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
|
||||||
|
tickets: [7355]
|
||||||
|
|
||||||
|
- title: "Fix some SONY readers not being detected on windows"
|
||||||
|
tickets: [7413]
|
||||||
|
|
||||||
|
- title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
|
||||||
|
tickets: [7455]
|
||||||
|
|
||||||
|
- title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
|
||||||
|
tickets: [7506]
|
||||||
|
|
||||||
|
- title: "Sony driver: Ignore invalid strings when updating XML database"
|
||||||
|
|
||||||
|
- title: "Content Server: Add day to displayed date in /mobile book listing"
|
||||||
|
|
||||||
|
- title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
|
||||||
|
tickets: [7481]
|
||||||
|
|
||||||
|
- title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
|
||||||
|
tickets: [7472]
|
||||||
|
|
||||||
|
- title: "Fix not enough vertical space for text in the preferences dialog category listing"
|
||||||
|
|
||||||
|
- title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
|
||||||
|
|
||||||
|
- title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
|
||||||
|
|
||||||
|
- title: "Fix bug in regex used to extract charset from <meta> tags"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Add support for the <q> tag"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Zeit Online
|
||||||
|
- Gamespot Review
|
||||||
|
- Ploitika
|
||||||
|
- Pagina12
|
||||||
|
- Irish Times
|
||||||
|
- elektrolese
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Handelsblatt and European Voice"
|
||||||
|
author: "malfi"
|
||||||
|
|
||||||
|
- title: "Polityka and Newsweek"
|
||||||
|
author: "Mateusz Kielar"
|
||||||
|
|
||||||
|
- title: "MarcTV"
|
||||||
|
author: "Marc Toensings"
|
||||||
|
|
||||||
|
- title: "Rolling Stone"
|
||||||
|
author: "Darko Miletic"
|
||||||
|
|
||||||
|
- title: "Vedomosti"
|
||||||
|
author: "Nikolai Kotchetkov"
|
||||||
|
|
||||||
|
- title: "Hola.com"
|
||||||
|
author: "bmsleight"
|
||||||
|
|
||||||
|
- title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
|
||||||
|
author: "BlonG"
|
||||||
|
|
||||||
|
- title: "SC Print Magazine"
|
||||||
|
author: "Tony Maro"
|
||||||
|
|
||||||
|
- title: "Diario Sport"
|
||||||
|
author: "Jefferson Frantz"
|
||||||
|
|
||||||
|
- version: 0.7.27
|
||||||
|
date: 2010-11-05
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "The book list behavior has changed"
|
||||||
|
type: major
|
||||||
|
description: >
|
||||||
|
"Now double clicking on an entry in the book list will open it in the viewer. To edit metadata single click a previously selected entry instead. This is consistent with
|
||||||
|
the usage in most operating systems, so should be most intuitive for new users. Also typing any key no longer starts an edit, instead press F2 (Enter on OS X) to start editing
|
||||||
|
the current cell. Also you now have to double click instead of single clicking the book details panel to open the detailed info dialog for the book."
|
||||||
|
|
||||||
|
- title: "Added a new HTML output format plugin, which converts the input document to a ZIP file. The zip file contains HTML pages suitable for display in a website"
|
||||||
|
|
||||||
|
- title: "Support for iRiver Cover Story and Digma Q600"
|
||||||
|
|
||||||
|
- title: "Add a search button (labelled Go!) to explicitly run a search with the text currently in the quick search box"
|
||||||
|
|
||||||
|
- title: "Add author to the calibre geenrated book jacket"
|
||||||
|
tickets: [7402]
|
||||||
|
|
||||||
|
- title: "Add the title of the destination book to the merge dialog warning message"
|
||||||
|
|
||||||
|
- title: "calibre-server: Make auto reload control separate from --devlop with a new command line option --auto-reload"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix book details panel not being updated after a delete-merge"
|
||||||
|
tickets: [7426]
|
||||||
|
|
||||||
|
- title: "Fix clicking in the search box launches a search if you have search as you type enabled"
|
||||||
|
tickets: [7425]
|
||||||
|
|
||||||
|
- title: "Use a browser widget to display book details for more robustness and better performance when vieweing large HTML comments"
|
||||||
|
|
||||||
|
- title: "Fix cover browser not updated after copy to library and delete"
|
||||||
|
tickets: [7416]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke sending non calibre EPUB files to the iPad. Also handle failure to set cover in iTunes gracefully"
|
||||||
|
tickets: [7356]
|
||||||
|
|
||||||
|
- title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
|
||||||
|
tickets: [7321]
|
||||||
|
|
||||||
|
- title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"
|
||||||
|
|
||||||
|
- title: "calibre-server: Fix regression that broke --daemonize"
|
||||||
|
|
||||||
|
- title: "EPUB Input: Handle ncx files that have <navpoint> elements with no content correctly."
|
||||||
|
tickets: [7396]
|
||||||
|
|
||||||
|
- title: "SNBOutput: Fixed a bug in handling pre tag"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Don't ignore hidden anchors."
|
||||||
|
tickets: [7384]
|
||||||
|
|
||||||
|
- title: "Fix switching libraries and generating a catalog could generate a catalog for the wrong library"
|
||||||
|
|
||||||
|
- title: "MOBI Output: Fix regression that broke conversion of anchors inside superscripts/subscripts."
|
||||||
|
tickets: [7368]
|
||||||
|
|
||||||
|
- title: "Content server: Fix various minor bugs"
|
||||||
|
tickets: [7379, 6768, 7354]
|
||||||
|
|
||||||
|
- title: "Amazon metadata download plugin: Make it more robust and add option to auto convert HTML to text"
|
||||||
|
|
||||||
|
- title: "Re-arrange send to device menu to make it harder to accidentally trigger the send and delete actions"
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Danas
|
||||||
|
- Fudzilla
|
||||||
|
- Zeit Online
|
||||||
|
- New York Times
|
||||||
|
- Mediapart
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Ynet and Calcalist"
|
||||||
|
author: "marbs"
|
||||||
|
|
||||||
|
- title: "El Faro de Vigo"
|
||||||
|
author: "Jefferson Frantz"
|
||||||
|
|
||||||
|
- title: "Clic_RBS"
|
||||||
|
author: "avoredo"
|
||||||
|
|
||||||
|
- title: "Correio da Manha"
|
||||||
|
author: "jmst"
|
||||||
|
|
||||||
|
- title: "Rue89"
|
||||||
|
author: "Louis Gesbert"
|
||||||
|
|
||||||
- version: 0.7.26
|
- version: 0.7.26
|
||||||
date: 2010-10-30
|
date: 2010-10-30
|
||||||
|
|
||||||
|
@ -81,6 +81,14 @@ p.unread_book {
|
|||||||
text-indent:-2em;
|
text-indent:-2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.wishlist_item {
|
||||||
|
text-align:left;
|
||||||
|
margin-top:0px;
|
||||||
|
margin-bottom:0px;
|
||||||
|
margin-left:2em;
|
||||||
|
text-indent:-2em;
|
||||||
|
}
|
||||||
|
|
||||||
p.date_read {
|
p.date_read {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
margin-top:0px;
|
margin-top:0px;
|
||||||
@ -104,3 +112,14 @@ hr.annotations_divider {
|
|||||||
margin-top:0em;
|
margin-top:0em;
|
||||||
margin-bottom:0em;
|
margin-bottom:0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
td.publisher, td.date {
|
||||||
|
font-weight:bold;
|
||||||
|
text-align:center;
|
||||||
|
}
|
||||||
|
td.rating {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
td.thumbnail img {
|
||||||
|
-webkit-box-shadow: 6px 6px 6px #888;
|
||||||
|
}
|
@ -4,7 +4,7 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<title>..:: calibre library ::.. {title}</title>
|
<title>..:: calibre {library} ::.. {title}</title>
|
||||||
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
||||||
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
||||||
|
|
||||||
@ -41,7 +41,7 @@
|
|||||||
<div class="area">
|
<div class="area">
|
||||||
<div class="bubble">
|
<div class="bubble">
|
||||||
<p><a href="{prefix}/browse" title="Return to top level"
|
<p><a href="{prefix}/browse" title="Return to top level"
|
||||||
>→ home ←</a></p>
|
>→ {home} ←</a></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div id="nav-container">
|
<div id="nav-container">
|
||||||
@ -80,7 +80,7 @@
|
|||||||
<form name="search_form" action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
|
<form name="search_form" action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
|
||||||
<input value="{initial_search}" type="text" title="Search" name="query"
|
<input value="{initial_search}" type="text" title="Search" name="query"
|
||||||
class="search_input" />
|
class="search_input" />
|
||||||
<input type="submit" value="Search" title="Search" alt="Search" />
|
<input type="submit" value="{Search}" title="{Search}" alt="{Search}" />
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
<div> </div>
|
<div> </div>
|
||||||
|
@ -211,3 +211,9 @@ generate_cover_title_font = None
|
|||||||
# Absolute path to a TTF font file to use as the font for the footer in the
|
# Absolute path to a TTF font file to use as the font for the footer in the
|
||||||
# default cover
|
# default cover
|
||||||
generate_cover_foot_font = None
|
generate_cover_foot_font = None
|
||||||
|
|
||||||
|
|
||||||
|
# Behavior of doubleclick on the books list. Choices:
|
||||||
|
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
|
||||||
|
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||||
|
doubleclick_on_library_view = 'open_viewer'
|
||||||
|
BIN
resources/images/format-text-bold.png
Normal file
After Width: | Height: | Size: 5.0 KiB |
BIN
resources/images/format-text-italic.png
Normal file
After Width: | Height: | Size: 4.1 KiB |
BIN
resources/images/format-text-strikethrough.png
Normal file
After Width: | Height: | Size: 5.9 KiB |
BIN
resources/images/format-text-underline.png
Normal file
After Width: | Height: | Size: 4.4 KiB |
BIN
resources/images/hotmail.png
Normal file
After Width: | Height: | Size: 2.6 KiB |
BIN
resources/images/news/avto-magazin.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
resources/images/news/dnevnik.png
Normal file
After Width: | Height: | Size: 861 B |
BIN
resources/images/news/rollingstone.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/siol.png
Normal file
After Width: | Height: | Size: 423 B |
@ -39,7 +39,16 @@
|
|||||||
.cbj_title {
|
.cbj_title {
|
||||||
font-size: x-large;
|
font-size: x-large;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Author
|
||||||
|
*/
|
||||||
|
.cbj_author {
|
||||||
|
font-size: medium;
|
||||||
|
text-align: center;
|
||||||
|
margin-bottom: 1ex;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Table containing Series, Publication Year, Rating and Tags
|
** Table containing Series, Publication Year, Rating and Tags
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
<body>
|
<body>
|
||||||
<div class="cbj_banner">
|
<div class="cbj_banner">
|
||||||
<div class="cbj_title">{title}</div>
|
<div class="cbj_title">{title}</div>
|
||||||
|
<div class="cbj_author">{author}</div>
|
||||||
<table class="cbj_header">
|
<table class="cbj_header">
|
||||||
<tr class="cbj_series">
|
<tr class="cbj_series">
|
||||||
<td class="cbj_label">{series_label}:</td>
|
<td class="cbj_label">{series_label}:</td>
|
||||||
|
46
resources/recipes/avto-magazin.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, BlonG'
|
||||||
|
'''
|
||||||
|
avto-magazin.si
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class Dnevnik(BasicNewsRecipe):
|
||||||
|
title = u'Avto Magazin'
|
||||||
|
__author__ = u'BlonG'
|
||||||
|
description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
labguage = 'sl'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
conversion_options = {'linearize_tables' : True}
|
||||||
|
|
||||||
|
|
||||||
|
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||||
|
# dict(name='div', attrs={'class':'entry-content'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':'voteConfirmation'}),
|
||||||
|
dict(name='div', attrs={'id':'InsideVote'}),
|
||||||
|
dict(name='div', attrs={'class':'Zone234'}),
|
||||||
|
dict(name='div', attrs={'class':'Comments'}),
|
||||||
|
dict(name='div', attrs={'class':'sorodneNovice'}),
|
||||||
|
dict(name='div', attrs={'id':'footer'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Novice', u'http://www.avto-magazin.si/rss/')
|
||||||
|
]
|
43
resources/recipes/calcalist.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import re
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
|
description = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.'
|
||||||
|
cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG'
|
||||||
|
title = u'Calcalist'
|
||||||
|
language = 'he'
|
||||||
|
__author__ = 'marbs'
|
||||||
|
extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_attributes = ['width']
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
|
||||||
|
remove_tags = [dict(name='p', attrs={'text':[' ']})]
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'),
|
||||||
|
(u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
|
||||||
|
(u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
|
||||||
|
(u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'),
|
||||||
|
(u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'),
|
||||||
|
(u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'),
|
||||||
|
(u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'),
|
||||||
|
(u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'),
|
||||||
|
(u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'),
|
||||||
|
(u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'),
|
||||||
|
(u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'),
|
||||||
|
(u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'),
|
||||||
|
(u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
split1 = url.split("-")
|
||||||
|
print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
|
||||||
|
return print_url
|
50
resources/recipes/clic_rbs.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ClicRBS(BasicNewsRecipe):
|
||||||
|
title = u'ClicRBS'
|
||||||
|
language = 'pt'
|
||||||
|
__author__ = 'arvoredo'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 9
|
||||||
|
cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'descricao'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'coluna'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'extra'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
|
||||||
|
remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
|
||||||
|
remove_tags_after = dict(name='ul', attrs={'class':'lista'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
|
||||||
|
, (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
|
||||||
|
, (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
|
||||||
|
, (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
|
||||||
|
, (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
|
||||||
|
, (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
|
||||||
|
, (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
|
||||||
|
, (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
|
||||||
|
, (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
|
||||||
|
, (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
|
||||||
|
, (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
|
||||||
|
, (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
|
||||||
|
, (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
|
||||||
|
, (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
|
||||||
|
, (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
|
||||||
|
, (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
cite{color:#007BB5; font-size:xx-small; font-style:italic;}
|
||||||
|
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
|
h3{font-size:large; color:#082963; font-weight:bold;}
|
||||||
|
#ident{color:#0179B4; font-size:xx-small;}
|
||||||
|
p{color:#000000;font-weight:normal;}
|
||||||
|
.commentario p{color:#007BB5; font-style:italic;}
|
||||||
|
'''
|
44
resources/recipes/cm_journal.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CMJornal_pt(BasicNewsRecipe):
|
||||||
|
title = 'Correio da Manha - Portugal'
|
||||||
|
__author__ = 'jmst'
|
||||||
|
description = 'As noticias de Portugal e do Mundo'
|
||||||
|
publisher = 'Cofina Media'
|
||||||
|
category = ''
|
||||||
|
oldest_article = 1
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'pt'
|
||||||
|
extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name=['h2','h1'])
|
||||||
|
, dict(name='div', attrs={'class': ['news']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','embed','iframe'])
|
||||||
|
,dict(name='a',attrs={'href':['#']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
|
||||||
|
,(u'Portugal' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010' )
|
||||||
|
,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
|
||||||
|
,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
|
||||||
|
,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012' )
|
||||||
|
,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('noticia.aspx', 'Imprimir.aspx')
|
||||||
|
|
@ -25,7 +25,7 @@ class Danas(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
.article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
|
.article,.articledescription,body,.lokacija,.feed{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
|
||||||
.nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
|
.nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
|
||||||
.antrfileText{border-left: 2px solid #999999;
|
.antrfileText{border-left: 2px solid #999999;
|
||||||
margin-left: 0.8em;
|
margin-left: 0.8em;
|
||||||
@ -59,11 +59,14 @@ class Danas(BasicNewsRecipe):
|
|||||||
,(re.compile(u'\u201d'), lambda match: '”') # right double quotation mark
|
,(re.compile(u'\u201d'), lambda match: '”') # right double quotation mark
|
||||||
,(re.compile(u'\u201e'), lambda match: '“') # double low-9 quotation mark
|
,(re.compile(u'\u201e'), lambda match: '“') # double low-9 quotation mark
|
||||||
,(re.compile(u'\u201f'), lambda match: '”') # double high-reversed-9 quotation mark
|
,(re.compile(u'\u201f'), lambda match: '”') # double high-reversed-9 quotation mark
|
||||||
|
,(re.compile(u'\u00f4'), lambda match: '“') # latin small letter o with circumflex
|
||||||
|
,(re.compile(u'\u00f6'), lambda match: '”') # latin small letter o with dieaeresis
|
||||||
|
,(re.compile(u'\u00e1'), lambda match: ' ' ) # latin small letter a with acute
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner','listaVesti','article_nav']})
|
||||||
,dict(name='div', attrs={'id':'comments'})
|
,dict(name='div', attrs={'id':'comments'})
|
||||||
,dict(name=['object','link','iframe','meta'])
|
,dict(name=['object','link','iframe','meta'])
|
||||||
]
|
]
|
||||||
|
61
resources/recipes/deredactie.recipe
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class deredactie(BasicNewsRecipe):
|
||||||
|
title = u'Deredactie.be'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
|
||||||
|
language = 'de'
|
||||||
|
keep_only_tags = []
|
||||||
|
__author__ = 'malfi'
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
|
||||||
|
remove_tags = []
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
|
||||||
|
remove_tags.append(dict(name = 'hr'))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
def parse_index(self):
|
||||||
|
categories = []
|
||||||
|
catnames = {}
|
||||||
|
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
|
||||||
|
for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
|
||||||
|
a = elem.find('a', href=True)
|
||||||
|
m = re.search('(?<=/)[^/]*$', a['href'])
|
||||||
|
cat = str(m.group(0))
|
||||||
|
categories.append(cat)
|
||||||
|
catnames[cat] = a['title']
|
||||||
|
self.log("found cat %s\n" % catnames[cat])
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
for cat in categories:
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
|
||||||
|
for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
|
||||||
|
skip_this_article = False
|
||||||
|
url = a['href'].strip()
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.deredactie.be' + url
|
||||||
|
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
|
||||||
|
for article in articles :
|
||||||
|
if article['url'] == url :
|
||||||
|
skip_this_article = True
|
||||||
|
self.log("SKIPPING DUP %s" % url)
|
||||||
|
break
|
||||||
|
if skip_this_article :
|
||||||
|
continue;
|
||||||
|
articles.append(myarticle)
|
||||||
|
self.log("Adding URL %s\n" %url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((catnames[cat], articles))
|
||||||
|
return feeds
|
||||||
|
|
42
resources/recipes/diario_sport.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DiarioSport(BasicNewsRecipe):
|
||||||
|
title = u'Diario Sport'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 75
|
||||||
|
__author__ = 'Jefferson Frantz'
|
||||||
|
description = 'Todas las noticias del Barça y del mundo del deporte en general'
|
||||||
|
timefmt = ' [%d %b, %Y]'
|
||||||
|
language = 'es'
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
feeds = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
|
||||||
|
'''
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['noticiasMedio']})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','script','ul'])
|
||||||
|
,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
|
||||||
|
,dict(name='p', attrs={'class':['masinformacion','hora']})
|
||||||
|
,dict(name='a', attrs={'class':["'link'"]})
|
||||||
|
,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
|
||||||
|
,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
img = soup.find('img',src='/img/videos/mascaravideo.png')
|
||||||
|
if not img is None:
|
||||||
|
img.extract()
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
63
resources/recipes/dnevnik.recipe
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, BlonG'
|
||||||
|
'''
|
||||||
|
dnevnik.si
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class Dnevnik(BasicNewsRecipe):
|
||||||
|
title = u'Dnevnik.si'
|
||||||
|
__author__ = u'BlonG'
|
||||||
|
description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
|
||||||
|
Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
|
||||||
|
bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
|
||||||
|
pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
|
||||||
|
dru\u017ebe.'''
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'sl'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||||
|
dict(name='div', attrs={'class':'entry-content'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':'fb_article_top'}),
|
||||||
|
dict(name='div', attrs={'class':'related'}),
|
||||||
|
dict(name='div', attrs={'class':'fb_article_foot'}),
|
||||||
|
dict(name='div', attrs={'class':'spreading'}),
|
||||||
|
dict(name='dl', attrs={'class':'ad'}),
|
||||||
|
dict(name='p', attrs={'class':'report'}),
|
||||||
|
dict(name='div', attrs={'class':'hfeed comments'}),
|
||||||
|
dict(name='dl', attrs={'id':'entryPanel'}),
|
||||||
|
dict(name='dl', attrs={'class':'infopush ip_wide'}),
|
||||||
|
dict(name='div', attrs={'class':'sidebar'}),
|
||||||
|
dict(name='dl', attrs={'class':'bottom'}),
|
||||||
|
dict(name='div', attrs={'id':'footer'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13')
|
||||||
|
,(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14')
|
||||||
|
,(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116')
|
||||||
|
,(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5')
|
||||||
|
,(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15')
|
||||||
|
,(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17')
|
||||||
|
,(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18')
|
||||||
|
,(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19')
|
||||||
|
,(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20')
|
||||||
|
,(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21')
|
||||||
|
,(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
|
||||||
|
]
|
77
resources/recipes/el_faro.recipe
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElFaroDeVigo(BasicNewsRecipe):
|
||||||
|
title = u'El Faro de Vigo'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'Jefferson Frantz'
|
||||||
|
description = 'Noticias de Vigo'
|
||||||
|
timefmt = ' [%d %b, %Y]'
|
||||||
|
language = 'es'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
## (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
|
||||||
|
## (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
|
||||||
|
(u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
|
||||||
|
(u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
|
||||||
|
(u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
|
||||||
|
## (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
|
||||||
|
(u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
|
||||||
|
(u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
|
||||||
|
(u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
|
||||||
|
(u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
|
||||||
|
(u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
|
||||||
|
(u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
|
||||||
|
(u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
|
||||||
|
(u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
|
||||||
|
|
||||||
|
extra_css = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
|
||||||
|
h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
|
||||||
|
h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
|
||||||
|
.enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
|
||||||
|
.noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
|
||||||
|
url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
divs = soup.findAll(True, {'class':'enlacenegrita10'})
|
||||||
|
for div in divs:
|
||||||
|
div['align'] = 'left'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','script','ul','iframe','ol'])
|
||||||
|
,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
|
||||||
|
,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Fetch elektrolese.
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class elektrolese(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'elektrolese'
|
|
||||||
description = 'News about electronic publishing'
|
|
||||||
__author__ = 'Oliver Niesner'
|
|
||||||
use_embedded_content = False
|
|
||||||
timefmt = ' [%a %d %b %Y]'
|
|
||||||
language = 'de'
|
|
||||||
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
no_stylesheets = True
|
|
||||||
conversion_options = {'linearize_tables':True}
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
|
|
||||||
remove_tags_after = [dict(id='comments')]
|
|
||||||
filter_regexps = [r'ad\.doubleclick\.net']
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
|
|
||||||
dict(id='comments'),
|
|
||||||
dict(id='Navbar1')]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [ (u'elektrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
|
|
||||||
|
|
||||||
|
|
58
resources/recipes/eu_commission.recipe
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
LANGUAGE = 'de'
|
||||||
|
|
||||||
|
def feedlink(num):
|
||||||
|
return u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
|
||||||
|
str(num)+'&lang='+ LANGUAGE
|
||||||
|
|
||||||
|
class EUCommissionPress(BasicNewsRecipe):
|
||||||
|
title = u'Pressemitteilungen der EU Kommission pro Politikbereich'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
|
||||||
|
__author__ = 'malfi'
|
||||||
|
language = LANGUAGE
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
|
||||||
|
remove_tags = []
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Pressemitteilung des Tages',feedlink(64)),
|
||||||
|
(u'Presidency',feedlink(137)),
|
||||||
|
(u'Foreign affairs and security policy',feedlink(138)),
|
||||||
|
(u'Agriculture and rural development',feedlink(139)),
|
||||||
|
(u'Budget and financial programming ',feedlink(140)),
|
||||||
|
(u'Climate action',feedlink(141)),
|
||||||
|
(u'Competition',feedlink(142)),
|
||||||
|
(u'Development',feedlink(143)),
|
||||||
|
(u'Digital agenda',feedlink(144)),
|
||||||
|
(u'Economic and monetary affairs',feedlink(145)),
|
||||||
|
(u'Education, culture, multilingualism and youth ',feedlink(146)),
|
||||||
|
(u'Employment, social Affairs and inclusion ',feedlink(147)),
|
||||||
|
(u'Energy',feedlink(148)),
|
||||||
|
(u'Enlargment and European neighbourhood policy ',feedlink(149)),
|
||||||
|
(u'Environment',feedlink(150)),
|
||||||
|
(u'Health and consumer policy',feedlink(151)),
|
||||||
|
(u'Home affairs',feedlink(152)),
|
||||||
|
(u'Industry and entrepreneurship',feedlink(153)),
|
||||||
|
(u'Inter-Institutional relations and administration',feedlink(154)),
|
||||||
|
(u'Internal market and services',feedlink(155)),
|
||||||
|
(u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
|
||||||
|
(u'Justice, fundamental rights and citizenship',feedlink(157)),
|
||||||
|
(u'Maritime affairs and fisheries',feedlink(158)),
|
||||||
|
(u'Regional policy',feedlink(159)),
|
||||||
|
(u'Research and innovation',feedlink(160)),
|
||||||
|
(u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
|
||||||
|
(u'Trade',feedlink(162)),
|
||||||
|
(u'Transport',feedlink(163))
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
51
resources/recipes/european_voice.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class EuropeanVoice(BasicNewsRecipe):
|
||||||
|
title = u'European Voice'
|
||||||
|
__author__ = 'malfi'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
|
||||||
|
language = 'en'
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})]
|
||||||
|
feeds = [
|
||||||
|
(u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
|
||||||
|
(u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
|
||||||
|
(u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
|
||||||
|
(u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
|
||||||
|
(u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
|
||||||
|
(u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
|
||||||
|
(u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
|
||||||
|
(u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
|
||||||
|
(u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
|
||||||
|
(u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
|
||||||
|
(u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
|
||||||
|
(u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
|
||||||
|
(u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
|
||||||
|
(u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
|
||||||
|
(u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
|
||||||
|
(u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
|
||||||
|
(u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
|
||||||
|
(u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
|
||||||
|
(u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
|
||||||
|
(u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?bPrint=1'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
denied = soup.findAll(True,text='Subscribers')
|
||||||
|
if denied:
|
||||||
|
raise Exception('Article skipped, because content can only be seen with subscription')
|
||||||
|
return soup
|
||||||
|
|
@ -33,7 +33,7 @@ class Fudzilla(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
|
(u'Posts', u'http://www.fudzilla.com/?format=feed')
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = u'Marc T\xf6nsing'
|
__author__ = u'Marc Toensing'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
|
||||||
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
|
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
|
||||||
('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
|
('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
|
||||||
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
|
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
|
||||||
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('link') + '?print=1'
|
return article.get('link') + '?print=1'
|
||||||
|
|
||||||
|
|
||||||
|
28
resources/recipes/german_gov.recipe
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class GermanGovermentPress(BasicNewsRecipe):
|
||||||
|
title = u'Pressemitteilungen der Bundesregierung'
|
||||||
|
oldest_article = 14
|
||||||
|
__author__ = 'malfi'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
|
||||||
|
language = 'de'
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'h2'))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
|
||||||
|
remove_tags = []
|
||||||
|
feeds = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
def print_version(self, url):
|
||||||
|
m = re.search(r'^(.*).html$', url)
|
||||||
|
return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2010, Szing'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -10,49 +10,52 @@ globeandmail.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class GlobeAndMail(BasicNewsRecipe):
|
class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||||
title = u'Globe and Mail'
|
title = u'Globe & Mail'
|
||||||
language = 'en_CA'
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Szing'
|
||||||
__author__ = 'Kovid Goyal'
|
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 10
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '''
|
max_articles_per_feed = 100
|
||||||
h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
|
encoding = 'utf8'
|
||||||
h4 {margin-top: 0px;}
|
publisher = 'Globe & Mail'
|
||||||
#byline { font-family: monospace; font-weight:bold; }
|
language = 'en_CA'
|
||||||
#placeline {font-weight:bold;}
|
extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
|
||||||
#credit {margin-top:0px;}
|
|
||||||
.tag {font-size: 22pt;}'''
|
|
||||||
description = 'Canada\'s national newspaper'
|
|
||||||
keep_only_tags = [dict(name='article')]
|
|
||||||
remove_tags = [dict(name='aside'),
|
|
||||||
dict(name='footer'),
|
|
||||||
dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
|
|
||||||
dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
|
|
||||||
]
|
|
||||||
feeds = [
|
|
||||||
(u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
|
|
||||||
(u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
|
|
||||||
(u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
|
|
||||||
(u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
|
||||||
(u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
|
|
||||||
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
|
||||||
(u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
|
|
||||||
(u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
|
|
||||||
(u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
|
||||||
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
|
||||||
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
|
||||||
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
|
||||||
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
|
||||||
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
|
|
||||||
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
|
||||||
(u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
feeds = [
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
(u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
|
||||||
if '/video/' not in url:
|
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
||||||
return url
|
(u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
|
||||||
|
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
|
||||||
|
(u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
|
||||||
|
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
||||||
|
(u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
||||||
|
(u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
||||||
|
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||||
|
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
||||||
|
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||||
|
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||||
|
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1'),
|
||||||
|
dict(name='h2', attrs={'id':'articletitle'}),
|
||||||
|
dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
|
||||||
|
dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
|
||||||
|
dict(name='id', attrs={'class':'article'}),
|
||||||
|
dict(name='table', attrs={'class':'todays-market'}),
|
||||||
|
dict(name='header', attrs={'id':'leadheader'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
|
||||||
|
]
|
||||||
|
|
||||||
|
#this has to be here or the text in the article appears twice.
|
||||||
|
remove_tags_after = [dict(id='article')]
|
||||||
|
|
||||||
|
#Use the mobile version rather than the web version
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '&service=mobile'
|
||||||
|
|
||||||
|
41
resources/recipes/handelsblatt.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Handelsblatt(BasicNewsRecipe):
|
||||||
|
title = u'Handelsblatt'
|
||||||
|
__author__ = 'malfi'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
|
||||||
|
language = 'de'
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
|
||||||
|
remove_tags = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
|
||||||
|
(u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
|
||||||
|
(u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
|
||||||
|
(u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
|
||||||
|
(u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
|
||||||
|
(u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
|
||||||
|
(u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
|
||||||
|
(u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
|
||||||
|
(u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
|
||||||
|
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
m = re.search('(?<=;)[0-9]*', url)
|
||||||
|
return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
|
||||||
|
|
||||||
|
|
38
resources/recipes/hola.recipe
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
|
||||||
|
'''
|
||||||
|
hola.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Hackaday(BasicNewsRecipe):
|
||||||
|
title = u'Hola'
|
||||||
|
__author__ = 'bmsleight'
|
||||||
|
description = 'diario de actualidad, moda y belleza.'
|
||||||
|
oldest_article = 10
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'es'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'cuerpo'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ),
|
||||||
|
(u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ),
|
||||||
|
(u'Cine' , u'http://www.hola.com/cine/rss.xml' ),
|
||||||
|
(u'Música' , u'http://www.hola.com/musica/rss.xml' ),
|
||||||
|
(u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ),
|
||||||
|
(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ),
|
||||||
|
(u'Niños' , u'http://www.hola.com/ninos/rss.xml' ),
|
||||||
|
(u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
url = article.get('guid', None)
|
||||||
|
return url
|
@ -33,13 +33,14 @@ class IrishTimes(BasicNewsRecipe):
|
|||||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
if url.count('rss.feedsportal.com'):
|
if url.count('rss.feedsportal.com'):
|
||||||
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
u = 'http://www.irishtimes.com' + \
|
||||||
else:
|
(((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01..htm','_pf.html')
|
||||||
u = url.replace('.html','_pf.html')
|
else:
|
||||||
return u
|
u = url.replace('.html','_pf.html')
|
||||||
|
return u
|
||||||
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.link
|
return article.link
|
||||||
|
@ -38,6 +38,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
|||||||
.loc{font-weight: bold}
|
.loc{font-weight: bold}
|
||||||
.carton{text-align: center}
|
.carton{text-align: center}
|
||||||
.credit{font-weight: bold}
|
.credit{font-weight: bold}
|
||||||
|
.sumario{font-weight: bold; text-align: center}
|
||||||
.text{margin-top: 1.4em}
|
.text{margin-top: 1.4em}
|
||||||
p.inicial{display: inline; font-size: xx-large; font-weight: bold}
|
p.inicial{display: inline; font-size: xx-large; font-weight: bold}
|
||||||
p.s-s{display: inline; text-indent: 0}
|
p.s-s{display: inline; text-indent: 0}
|
||||||
|
35
resources/recipes/marctv.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch MarcTV.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MarcTVde(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Marc Toensings Visionen'
|
||||||
|
|
||||||
|
description = 'Marc Toensings Visionen'
|
||||||
|
|
||||||
|
language = 'de'
|
||||||
|
|
||||||
|
__author__ = 'Marc Toensing'
|
||||||
|
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
|
||||||
|
oldest_article = 665
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
|
||||||
|
keep_only_tags = dict(name='div', attrs={'class':["content"]})
|
||||||
|
|
||||||
|
feeds = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
|
||||||
|
|
||||||
|
extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return 'http://marctv.de/marctv.png'
|
@ -1,7 +1,9 @@
|
|||||||
cense__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Eddie Lau'
|
__copyright__ = '2010, Eddie Lau'
|
||||||
'''
|
'''
|
||||||
modified from Singtao Toronto calibre recipe by rty
|
modified from Singtao Toronto calibre recipe by rty
|
||||||
|
Change Log:
|
||||||
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
@ -23,42 +25,37 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
|||||||
recursions = 0
|
recursions = 0
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time
|
# convert UTC to local hk time - at around HKT 5.30am, all news are available
|
||||||
dt_local = dt_utc - datetime.timedelta(-8.0/24)
|
dt_local = dt_utc - datetime.timedelta(-2.5/24)
|
||||||
return dt_local.strftime("%Y%m%d")
|
return dt_local.strftime("%Y%m%d")
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_section(self, url):
|
def parse_section(self, url):
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||||
current_articles = []
|
current_articles = []
|
||||||
for i in divs:
|
included_urls = []
|
||||||
a = i.find('a', href = True)
|
for i in divs:
|
||||||
title = self.tag_to_string(a)
|
a = i.find('a', href = True)
|
||||||
url = a.get('href', False)
|
title = self.tag_to_string(a)
|
||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = a.get('href', False)
|
||||||
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
if url not in included_urls:
|
||||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
return current_articles
|
included_urls.append(url)
|
||||||
|
return current_articles
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll(width=True):
|
|
||||||
del item['width']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
57
resources/recipes/mmc_rtv.recipe
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, BlonG'
|
||||||
|
'''
|
||||||
|
www.rtvslo.si
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MMCRTV(BasicNewsRecipe):
|
||||||
|
title = u'MMC RTV Slovenija'
|
||||||
|
__author__ = u'BlonG'
|
||||||
|
description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija"
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'sl'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
split_url = url.split("/")
|
||||||
|
print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + split_url[-1]
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'title'}),
|
||||||
|
dict(name='div', attrs={'id':'newsbody'}),
|
||||||
|
dict(name='div', attrs={'id':'newsblocks'}),
|
||||||
|
]
|
||||||
|
# remove_tags=[
|
||||||
|
# 40 dict(name='div', attrs={'id':'newsblocks'}),
|
||||||
|
# ]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'),
|
||||||
|
(u'Svet', u'http://www.rtvslo.si/feeds/02.xml'),
|
||||||
|
(u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'),
|
||||||
|
(u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'),
|
||||||
|
(u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'),
|
||||||
|
(u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'),
|
||||||
|
(u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'),
|
||||||
|
(u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'),
|
||||||
|
(u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# def preprocess_html(self, soup):
|
||||||
|
# newsblocks = soup.find('div',attrs = ['id':'newsblocks'])
|
||||||
|
# soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks)
|
||||||
|
# return soup
|
||||||
|
|
68
resources/recipes/newsweek_polska.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Newsweek(BasicNewsRecipe):
|
||||||
|
EDITION = 0
|
||||||
|
|
||||||
|
title = u'Newsweek Polska'
|
||||||
|
__author__ = 'Mateusz Kielar'
|
||||||
|
description = 'Weekly magazine'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.body {font-size: small}
|
||||||
|
.author {font-size: x-small}
|
||||||
|
.lead {font-size: x-small}
|
||||||
|
.title{font-size: x-large; font-weight: bold}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
|
||||||
|
|
||||||
|
def find_last_full_issue(self):
|
||||||
|
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
|
||||||
|
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||||
|
page = self.index_to_soup(issue)
|
||||||
|
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||||
|
page = self.index_to_soup(issue)
|
||||||
|
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
self.find_last_full_issue()
|
||||||
|
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
|
||||||
|
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
|
||||||
|
self.cover_url = img['src']
|
||||||
|
feeds = []
|
||||||
|
parent = soup.find(id='content-left-big')
|
||||||
|
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
|
||||||
|
section = self.tag_to_string(txt).capitalize()
|
||||||
|
articles = list(self.find_articles(txt))
|
||||||
|
feeds.append((section, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def find_articles(self, txt):
|
||||||
|
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
|
||||||
|
if a.name in "div":
|
||||||
|
break
|
||||||
|
yield {
|
||||||
|
'title' : self.tag_to_string(a),
|
||||||
|
'url' : 'http://www.newsweek.pl'+a['href'],
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
35
resources/recipes/now_toronto.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#Based on Lars Jacob's Taz Digiabo recipe
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Starson17'
|
||||||
|
|
||||||
|
import os, urllib2, zipfile
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
class NowToronto(BasicNewsRecipe):
|
||||||
|
title = u'Now Toronto'
|
||||||
|
description = u'Now Toronto'
|
||||||
|
__author__ = 'Starson17'
|
||||||
|
conversion_options = {
|
||||||
|
'no_default_epub_cover' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_index(self):
|
||||||
|
epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
|
||||||
|
soup = self.index_to_soup(epub_feed)
|
||||||
|
url = soup.find(name = 'feedburner:origlink').string
|
||||||
|
f = urllib2.urlopen(url)
|
||||||
|
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||||
|
self.report_progress(0,_('downloading epub'))
|
||||||
|
tmp.write(f.read())
|
||||||
|
tmp.close()
|
||||||
|
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||||
|
self.report_progress(0,_('extracting epub'))
|
||||||
|
zfile.extractall(self.output_dir)
|
||||||
|
tmp.close()
|
||||||
|
index = os.path.join(self.output_dir, 'content.opf')
|
||||||
|
self.report_progress(1,_('epub downloaded and extracted'))
|
||||||
|
return index
|
@ -5,65 +5,61 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
'''
|
'''
|
||||||
import re
|
import re, string, time
|
||||||
import time
|
from calibre import entity_to_unicode, strftime
|
||||||
from calibre import entity_to_unicode
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||||
Comment, BeautifulStoneSoup
|
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'New York Times Top Stories'
|
# set headlinesOnly to True for the headlines-only version
|
||||||
__author__ = 'GRiker'
|
headlinesOnly = True
|
||||||
language = 'en'
|
|
||||||
requires_version = (0, 7, 5)
|
|
||||||
description = 'Top Stories from the New York Times'
|
|
||||||
|
|
||||||
# List of sections typically included in Top Stories. Use a keyword from the
|
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||||
# right column in the excludeSectionKeywords[] list to skip downloading that section
|
# Otherwise, only the sections named will be included. For example,
|
||||||
sections = {
|
#
|
||||||
'arts' : 'Arts',
|
# includeSections = ['Politics','Sports']
|
||||||
'business' : 'Business',
|
#
|
||||||
'diningwine' : 'Dining & Wine',
|
# would cause only the Politics and Sports sections to be included.
|
||||||
'editorials' : 'Editorials',
|
|
||||||
'health' : 'Health',
|
|
||||||
'magazine' : 'Magazine',
|
|
||||||
'mediaadvertising' : 'Media & Advertising',
|
|
||||||
'newyorkregion' : 'New York/Region',
|
|
||||||
'oped' : 'Op-Ed',
|
|
||||||
'politics' : 'Politics',
|
|
||||||
'science' : 'Science',
|
|
||||||
'sports' : 'Sports',
|
|
||||||
'technology' : 'Technology',
|
|
||||||
'topstories' : 'Top Stories',
|
|
||||||
'travel' : 'Travel',
|
|
||||||
'us' : 'U.S.',
|
|
||||||
'world' : 'World'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add section keywords from the right column above to skip that section
|
includeSections = [] # by default, all sections included
|
||||||
# For example, to skip sections containing the word 'Sports' or 'Dining', use:
|
|
||||||
# excludeSectionKeywords = ['Sports', 'Dining']
|
# excludeSections: List of sections to exclude. If empty, all sections found will be included.
|
||||||
# Fetch only Business and Technology
|
# Otherwise, the sections named will be excluded. For example,
|
||||||
# excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
|
#
|
||||||
# Fetch only Top Stories
|
# excludeSections = ['Politics','Sports']
|
||||||
# excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
|
#
|
||||||
# By default, no sections are skipped.
|
# would cause the Politics and Sports sections to be excluded. This parameter can be used
|
||||||
excludeSectionKeywords = []
|
# in conjuction with includeSections although in most cases using one or the other, but
|
||||||
|
# not both, is sufficient.
|
||||||
|
|
||||||
|
excludeSections = []
|
||||||
|
|
||||||
# one_picture_per_article specifies that calibre should only use the first image
|
# one_picture_per_article specifies that calibre should only use the first image
|
||||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||||
# will be moved to a location between the headline and the byline.
|
# will be moved to a location between the headline and the byline.
|
||||||
# If one_picture_per_article = False, all images from the article will be included
|
# If one_picture_per_article = False, all images from the article will be included
|
||||||
|
|
||||||
# and shown in their original location.
|
# and shown in their original location.
|
||||||
one_picture_per_article = True
|
one_picture_per_article = True
|
||||||
|
|
||||||
# The maximum number of articles that will be downloaded
|
# The maximum number of articles that will be downloaded
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
|
||||||
|
if headlinesOnly:
|
||||||
|
title='New York Times Headlines'
|
||||||
|
description = 'Headlines from the New York Times'
|
||||||
|
else:
|
||||||
|
title='New York Times'
|
||||||
|
description = 'Today\'s New York Times'
|
||||||
|
|
||||||
|
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||||
|
language = 'en'
|
||||||
|
requires_version = (0, 7, 5)
|
||||||
|
|
||||||
|
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
needs_subscription = True
|
|
||||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
cover_margins = (18,18,'grey99')
|
cover_margins = (18,18,'grey99')
|
||||||
|
|
||||||
@ -82,6 +78,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'entry-response module',
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
'icon enlargeThis',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
|
'metaFootnote',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
'nextArticleLink',
|
'nextArticleLink',
|
||||||
'nextArticleLink clearfix',
|
'nextArticleLink clearfix',
|
||||||
@ -89,12 +86,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
'subNavigation clearfix',
|
re.compile('^subNavigation'),
|
||||||
'subNavigation tabContent active',
|
re.compile('^leaderboard'),
|
||||||
'subNavigation tabContent active clearfix',
|
re.compile('^module'),
|
||||||
]}),
|
]}),
|
||||||
dict(id=[
|
dict(id=[
|
||||||
'adxLeaderboard',
|
'adxLeaderboard',
|
||||||
|
'adxSponLink',
|
||||||
'archive',
|
'archive',
|
||||||
'articleExtras',
|
'articleExtras',
|
||||||
'articleInline',
|
'articleInline',
|
||||||
@ -105,87 +103,98 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'footer',
|
'footer',
|
||||||
'header',
|
'header',
|
||||||
'header_search',
|
'header_search',
|
||||||
|
'inlineBox',
|
||||||
'login',
|
'login',
|
||||||
'masthead',
|
'masthead',
|
||||||
'masthead-nav',
|
'masthead-nav',
|
||||||
'memberTools',
|
'memberTools',
|
||||||
'navigation',
|
'navigation',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
|
'readerReviews',
|
||||||
|
'readerReviewsCount',
|
||||||
'relatedArticles',
|
'relatedArticles',
|
||||||
|
'relatedTopics',
|
||||||
'respond',
|
'respond',
|
||||||
'side_search',
|
'side_search',
|
||||||
'side_index',
|
'side_index',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'toolsRight',
|
'toolsRight',
|
||||||
]),
|
]),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.headline {text-align: left;}\n \
|
extra_css = '''
|
||||||
.byline {font-family: monospace; \
|
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||||
text-align: left; \
|
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-top: 0px; \
|
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-bottom: 0px;}\n \
|
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.dateline {font-size: small; \
|
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
margin-top: 0px; \
|
.timestamp { text-align: left; font-size: small; }
|
||||||
margin-bottom: 0px;}\n \
|
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.timestamp {font-size: small; \
|
a:link {text-decoration: none; }
|
||||||
margin-top: 0px; \
|
.articleBody { }
|
||||||
margin-bottom: 0px;}\n \
|
.authorId {text-align: left; }
|
||||||
.source {text-align: left;}\n \
|
.image {text-align: center;}
|
||||||
.image {text-align: center;}\n \
|
.source {text-align: left; }'''
|
||||||
.credit {text-align: right; \
|
|
||||||
font-size: small; \
|
|
||||||
margin-top: 0px; \
|
|
||||||
margin-bottom: 0px;}\n \
|
|
||||||
.articleBody {text-align: left;}\n \
|
|
||||||
.authorId {text-align: left; \
|
|
||||||
font-style: italic;}\n '
|
|
||||||
|
|
||||||
def dump_ans(self, ans) :
|
def filter_ans(self, ans) :
|
||||||
total_article_count = 0
|
total_article_count = 0
|
||||||
for section in ans :
|
idx = 0
|
||||||
|
idx_max = len(ans)-1
|
||||||
|
while idx <= idx_max:
|
||||||
|
if self.includeSections != []:
|
||||||
|
if ans[idx][0] not in self.includeSections:
|
||||||
|
print "SECTION NOT INCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
|
if ans[idx][0] in self.excludeSections:
|
||||||
|
print "SECTION EXCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("section %s: %d articles" % (section[0], len(section[1])) )
|
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||||
for article in section[1]:
|
for article in ans[idx][1]:
|
||||||
total_article_count += 1
|
total_article_count += 1
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||||
article['url'].encode('cp1252','replace')))
|
article['url'].encode('cp1252','replace')))
|
||||||
|
idx = idx+1
|
||||||
|
|
||||||
self.log( "Queued %d articles" % total_article_count )
|
self.log( "Queued %d articles" % total_article_count )
|
||||||
|
return ans
|
||||||
|
|
||||||
def fixChars(self,string):
|
def fixChars(self,string):
|
||||||
# Replace lsquo (\x91)
|
# Replace lsquo (\x91)
|
||||||
fixed = re.sub("\x91","‘",string)
|
fixed = re.sub("\x91","‘",string)
|
||||||
|
|
||||||
# Replace rsquo (\x92)
|
# Replace rsquo (\x92)
|
||||||
fixed = re.sub("\x92","’",fixed)
|
fixed = re.sub("\x92","’",fixed)
|
||||||
|
|
||||||
# Replace ldquo (\x93)
|
# Replace ldquo (\x93)
|
||||||
fixed = re.sub("\x93","“",fixed)
|
fixed = re.sub("\x93","“",fixed)
|
||||||
|
|
||||||
# Replace rdquo (\x94)
|
# Replace rdquo (\x94)
|
||||||
fixed = re.sub("\x94","”",fixed)
|
fixed = re.sub("\x94","”",fixed)
|
||||||
|
|
||||||
# Replace ndash (\x96)
|
# Replace ndash (\x96)
|
||||||
fixed = re.sub("\x96","–",fixed)
|
fixed = re.sub("\x96","–",fixed)
|
||||||
|
|
||||||
# Replace mdash (\x97)
|
# Replace mdash (\x97)
|
||||||
fixed = re.sub("\x97","—",fixed)
|
fixed = re.sub("\x97","—",fixed)
|
||||||
|
|
||||||
return fixed
|
return fixed
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
try:
|
br.open('http://www.nytimes.com/auth/login')
|
||||||
br.open('http://www.nytimes.com/auth/login')
|
br.select_form(name='login')
|
||||||
br.select_form(name='login')
|
br['USERID'] = self.username
|
||||||
br['USERID'] = self.username
|
br['PASSWORD'] = self.password
|
||||||
br['PASSWORD'] = self.password
|
raw = br.submit().read()
|
||||||
br.submit()
|
if 'Please try again' in raw:
|
||||||
except:
|
raise Exception('Your username and password are incorrect')
|
||||||
self.log("\nFailed to login")
|
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
@ -213,6 +222,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
cover = None
|
cover = None
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
|
def short_title(self):
|
||||||
|
return self.title
|
||||||
|
|
||||||
def index_to_soup(self, url_or_raw, raw=False):
|
def index_to_soup(self, url_or_raw, raw=False):
|
||||||
'''
|
'''
|
||||||
OVERRIDE of class method
|
OVERRIDE of class method
|
||||||
@ -255,157 +267,184 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# Kindle TOC descriptions won't render certain characters
|
# Kindle TOC descriptions won't render certain characters
|
||||||
if description:
|
if description:
|
||||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||||
# Replace '&' with '&'
|
# Replace '&' with '&'
|
||||||
massaged = re.sub("&","&", massaged)
|
massaged = re.sub("&","&", massaged)
|
||||||
return self.fixChars(massaged)
|
return self.fixChars(massaged)
|
||||||
else:
|
else:
|
||||||
return description
|
return description
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_todays_index(self):
|
||||||
|
|
||||||
|
def feed_title(div):
|
||||||
|
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = None
|
||||||
|
ans = []
|
||||||
|
url_list = []
|
||||||
|
|
||||||
|
def handle_article(div):
|
||||||
|
a = div.find('a', href=True)
|
||||||
|
if not a:
|
||||||
|
return
|
||||||
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
if not url.startswith("http"):
|
||||||
|
return
|
||||||
|
if not url.endswith(".html"):
|
||||||
|
return
|
||||||
|
if 'podcast' in url:
|
||||||
|
return
|
||||||
|
if '/video/' in url:
|
||||||
|
return
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if url in url_list:
|
||||||
|
return
|
||||||
|
url_list.append(url)
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
description = ''
|
||||||
|
pubdate = strftime('%a, %d %b')
|
||||||
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
|
if summary:
|
||||||
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
author = ''
|
||||||
|
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||||
|
if authorAttribution:
|
||||||
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
|
else:
|
||||||
|
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||||
|
if authorAttribution:
|
||||||
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
|
feed = key if key is not None else 'Uncategorized'
|
||||||
|
if not articles.has_key(feed):
|
||||||
|
ans.append(feed)
|
||||||
|
articles[feed] = []
|
||||||
|
articles[feed].append(
|
||||||
|
dict(title=title, url=url, date=pubdate,
|
||||||
|
description=description, author=author,
|
||||||
|
content=''))
|
||||||
|
|
||||||
|
|
||||||
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
|
|
||||||
|
|
||||||
|
# Find each article
|
||||||
|
for div in soup.findAll(True,
|
||||||
|
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||||
|
|
||||||
|
if div['class'] in ['section-headline','sectionHeader']:
|
||||||
|
key = string.capwords(feed_title(div))
|
||||||
|
key = key.replace('Op-ed','Op-Ed')
|
||||||
|
key = key.replace('U.s.','U.S.')
|
||||||
|
elif div['class'] in ['story', 'story headline'] :
|
||||||
|
handle_article(div)
|
||||||
|
elif div['class'] == 'headlinesOnly multiline flush':
|
||||||
|
for lidiv in div.findAll('li'):
|
||||||
|
handle_article(lidiv)
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return self.filter_ans(ans)
|
||||||
|
|
||||||
|
def parse_headline_index(self):
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
ans = []
|
ans = []
|
||||||
|
url_list = []
|
||||||
feed = key = 'All Top Stories'
|
|
||||||
articles[key] = []
|
|
||||||
ans.append(key)
|
|
||||||
self.log("Scanning 1 section ...")
|
|
||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the outer table
|
# Fetch the content table
|
||||||
table = soup.find('table')
|
content_table = soup.find('table',{'id':'content'})
|
||||||
previousTable = table
|
if content_table is None:
|
||||||
|
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
||||||
|
return None
|
||||||
|
|
||||||
# Find the deepest table containing the stories
|
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
||||||
while True :
|
|
||||||
table = table.find('table')
|
|
||||||
if table.find(text=re.compile('top stories start')) :
|
|
||||||
previousTable = table
|
|
||||||
continue
|
|
||||||
else :
|
|
||||||
table = previousTable
|
|
||||||
break
|
|
||||||
|
|
||||||
# There are multiple subtables, find the one containing the stories
|
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||||
for block in table.findAll('table') :
|
for div_sec in td_col.findAll('div',recursive=False):
|
||||||
if block.find(text=re.compile('top stories start')) :
|
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||||
table = block
|
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
break
|
section_name = re.sub(r'^ *$','',section_name)
|
||||||
else :
|
if section_name == '':
|
||||||
continue
|
continue
|
||||||
|
section_name=string.capwords(section_name)
|
||||||
|
if section_name == 'U.s.':
|
||||||
|
section_name = 'U.S.'
|
||||||
|
elif section_name == 'Op-ed':
|
||||||
|
section_name = 'Op-Ed'
|
||||||
|
pubdate = strftime('%a, %d %b')
|
||||||
|
|
||||||
# Again there are multiple subtables, find the one containing the stories
|
search_div = div_sec
|
||||||
for storyblock in table.findAll('table') :
|
for next_tag in h6_sec_name.findNextSiblings(True):
|
||||||
if storyblock.find(text=re.compile('top stories start')) :
|
if next_tag.__class__.__name__ == 'Tag':
|
||||||
break
|
if next_tag.name == 'div':
|
||||||
else :
|
search_div = next_tag
|
||||||
continue
|
|
||||||
|
|
||||||
skipThisSection = False
|
|
||||||
todays_article_count = 0
|
|
||||||
# Within this table are <font face="times new roman, times, san serif"> entries
|
|
||||||
self.log("Fetching feed Top Stories")
|
|
||||||
for tr in storyblock.findAllNext('tr'):
|
|
||||||
if tr.find('span') is not None :
|
|
||||||
|
|
||||||
sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
|
|
||||||
'times new roman,times, sans serif',
|
|
||||||
'times new roman, times, sans serif']})
|
|
||||||
section = None
|
|
||||||
bylines = []
|
|
||||||
descriptions = []
|
|
||||||
pubdate = None
|
|
||||||
|
|
||||||
# Get the Section title
|
|
||||||
for (x,i) in enumerate(sectionblock.contents) :
|
|
||||||
skipThisSection = False
|
|
||||||
# Extract the section title
|
|
||||||
if ('Comment' in str(i.__class__)) :
|
|
||||||
if 'start(name=' in i :
|
|
||||||
section = i[i.find('=')+1:-2]
|
|
||||||
|
|
||||||
if not self.sections.has_key(section) :
|
|
||||||
skipThisSection = True
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check for excluded section
|
# Get the articles
|
||||||
if len(self.excludeSectionKeywords):
|
for h3_item in search_div.findAll('h3'):
|
||||||
key = self.sections[section]
|
byline = h3_item.h6
|
||||||
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
if byline is not None:
|
||||||
if excluded.search(key) or articles.has_key(key):
|
author = self.tag_to_string(byline,usa_alt=False)
|
||||||
skipThisSection = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get the bylines and descriptions
|
|
||||||
if not skipThisSection :
|
|
||||||
lines = sectionblock.contents
|
|
||||||
contentStrings = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if not isinstance(line, Comment) and line.strip and line.strip() > "":
|
|
||||||
contentStrings.append(line.strip())
|
|
||||||
|
|
||||||
# Gather the byline/description pairs
|
|
||||||
bylines = []
|
|
||||||
descriptions = []
|
|
||||||
for contentString in contentStrings:
|
|
||||||
if contentString[0:3] == 'By ' and contentString[3].isupper() :
|
|
||||||
bylines.append(contentString)
|
|
||||||
else:
|
else:
|
||||||
descriptions.append(contentString)
|
author = ''
|
||||||
|
a = h3_item.find('a', href=True)
|
||||||
# Fetch the article titles and URLs
|
if not a:
|
||||||
articleCount = len(sectionblock.findAll('span'))
|
continue
|
||||||
todays_article_count += articleCount
|
|
||||||
for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
|
|
||||||
a = span.find('a', href=True)
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
if not url.startswith("http"):
|
||||||
|
continue
|
||||||
|
if not url.endswith(".html"):
|
||||||
|
continue
|
||||||
|
if 'podcast' in url:
|
||||||
|
continue
|
||||||
|
if 'video' in url:
|
||||||
|
continue
|
||||||
url += '?pagewanted=all'
|
url += '?pagewanted=all'
|
||||||
|
if url in url_list:
|
||||||
|
continue
|
||||||
|
url_list.append(url)
|
||||||
|
self.log("URL %s" % url)
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
desc = h3_item.find('p')
|
||||||
|
if desc is not None:
|
||||||
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
|
else:
|
||||||
|
description = ''
|
||||||
|
if not articles.has_key(section_name):
|
||||||
|
ans.append(section_name)
|
||||||
|
articles[section_name] = []
|
||||||
|
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
|
|
||||||
title = self.tag_to_string(a, use_alt=True)
|
|
||||||
# prepend the section name
|
|
||||||
title = self.sections[section] + " · " + title
|
|
||||||
|
|
||||||
if not isinstance(title, unicode):
|
|
||||||
title = title.decode('utf-8', 'replace')
|
|
||||||
|
|
||||||
# Allow for unattributed, undescribed entries "Editor's Note"
|
|
||||||
if i >= len(descriptions) :
|
|
||||||
description = None
|
|
||||||
else :
|
|
||||||
description = descriptions[i]
|
|
||||||
|
|
||||||
if len(bylines) == articleCount :
|
|
||||||
author = bylines[i]
|
|
||||||
else :
|
|
||||||
author = None
|
|
||||||
|
|
||||||
# Check for duplicates
|
|
||||||
duplicateFound = False
|
|
||||||
if len(articles[feed]) > 1:
|
|
||||||
for article in articles[feed] :
|
|
||||||
if url == article['url'] :
|
|
||||||
duplicateFound = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if duplicateFound:
|
|
||||||
# Continue fetching, don't add this article
|
|
||||||
todays_article_count -= 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not articles.has_key(feed):
|
|
||||||
articles[feed] = []
|
|
||||||
articles[feed].append(
|
|
||||||
dict(title=title, url=url, date=pubdate,
|
|
||||||
description=description, author=author, content=''))
|
|
||||||
# self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
|
|
||||||
|
|
||||||
ans = self.sort_index_by(ans, {'Top Stories':-1})
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
self.dump_ans(ans)
|
return self.filter_ans(ans)
|
||||||
return ans
|
|
||||||
|
def parse_index(self):
|
||||||
|
if self.headlinesOnly:
|
||||||
|
return self.parse_headline_index()
|
||||||
|
else:
|
||||||
|
return self.parse_todays_index()
|
||||||
|
|
||||||
|
def strip_anchors(self,soup):
|
||||||
|
paras = soup.findAll(True)
|
||||||
|
for para in paras:
|
||||||
|
aTags = para.findAll('a')
|
||||||
|
for a in aTags:
|
||||||
|
if a.img is None:
|
||||||
|
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
|
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||||
|
if kicker_tag: # remove Op_Ed author head shots
|
||||||
|
tagline = self.tag_to_string(kicker_tag)
|
||||||
|
if tagline=='Op-Ed Columnist':
|
||||||
|
img_div = soup.find('div','inlineImage module')
|
||||||
|
if img_div:
|
||||||
|
img_div.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
def postprocess_html(self,soup, True):
|
||||||
@ -422,8 +461,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
firstImg = inlineImgs[0]
|
firstImg = inlineImgs[0]
|
||||||
for inlineImg in inlineImgs[1:]:
|
for inlineImg in inlineImgs[1:]:
|
||||||
inlineImg.extract()
|
inlineImg.extract()
|
||||||
# Move firstImg after headline
|
# Move firstImg before article body
|
||||||
cgFirst = soup.find(True, {'class':'columnGroup first'})
|
#article_body = soup.find(True, {'id':'articleBody'})
|
||||||
|
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||||
if cgFirst:
|
if cgFirst:
|
||||||
# Strip all sibling NavigableStrings: noise
|
# Strip all sibling NavigableStrings: noise
|
||||||
navstrings = cgFirst.findAll(text=True, recursive=False)
|
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||||
@ -443,30 +483,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if headline_found:
|
if headline_found:
|
||||||
cgFirst.insert(insertLoc,firstImg)
|
cgFirst.insert(insertLoc,firstImg)
|
||||||
else:
|
else:
|
||||||
self.log(">>> No class:'columnGroup first' found <<<")
|
self.log(">>> No class:'columnGroup first' found <<<")
|
||||||
# Change class="kicker" to <h3>
|
|
||||||
kicker = soup.find(True, {'class':'kicker'})
|
|
||||||
if kicker and kicker.contents[0]:
|
|
||||||
h3Tag = Tag(soup, "h3")
|
|
||||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
|
||||||
use_alt=False)))
|
|
||||||
kicker.replaceWith(h3Tag)
|
|
||||||
|
|
||||||
# Change captions to italic -1
|
# Change captions to italic
|
||||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||||
if caption and caption.contents[0]:
|
if caption and caption.contents[0]:
|
||||||
emTag = Tag(soup, "em")
|
cTag = Tag(soup, "p", [("class", "caption")])
|
||||||
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||||
mp_off = c.find("More Photos")
|
mp_off = c.find("More Photos")
|
||||||
if mp_off >= 0:
|
if mp_off >= 0:
|
||||||
c = c[:mp_off]
|
c = c[:mp_off]
|
||||||
emTag.insert(0, c)
|
cTag.insert(0, c)
|
||||||
#hrTag = Tag(soup, 'hr')
|
caption.replaceWith(cTag)
|
||||||
#hrTag['class'] = 'caption_divider'
|
|
||||||
hrTag = Tag(soup, 'div')
|
|
||||||
hrTag['class'] = 'divider'
|
|
||||||
emTag.insert(1, hrTag)
|
|
||||||
caption.replaceWith(emTag)
|
|
||||||
|
|
||||||
# Change <nyt_headline> to <h2>
|
# Change <nyt_headline> to <h2>
|
||||||
h1 = soup.find('h1')
|
h1 = soup.find('h1')
|
||||||
@ -506,17 +534,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
bTag.insert(0, subhead.contents[0])
|
bTag.insert(0, subhead.contents[0])
|
||||||
subhead.replaceWith(bTag)
|
subhead.replaceWith(bTag)
|
||||||
|
|
||||||
# Synthesize a section header
|
|
||||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
|
||||||
if dsk and dsk.has_key('content'):
|
|
||||||
hTag = Tag(soup,'h3')
|
|
||||||
hTag['class'] = 'section'
|
|
||||||
hTag.insert(0,NavigableString(dsk['content']))
|
|
||||||
articleTag = soup.find(True, attrs={'id':'article'})
|
|
||||||
if articleTag:
|
|
||||||
articleTag.insert(0,hTag)
|
|
||||||
|
|
||||||
# Add class="articleBody" to <div> so we can format with CSS
|
|
||||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||||
if divTag:
|
if divTag:
|
||||||
divTag['class'] = divTag['id']
|
divTag['class'] = divTag['id']
|
||||||
@ -532,11 +549,3 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
|
||||||
paras = soup.findAll(True)
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
|
||||||
return soup
|
|
||||||
|
@ -5,52 +5,186 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
'''
|
'''
|
||||||
import string, re, time
|
import re, string, time
|
||||||
from calibre import strftime
|
from calibre import entity_to_unicode, strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||||
|
|
||||||
def decode(self, src):
|
|
||||||
enc = 'utf-8'
|
|
||||||
if 'iso-8859-1' in src:
|
|
||||||
enc = 'cp1252'
|
|
||||||
return src.decode(enc, 'ignore')
|
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'New York Times'
|
# set headlinesOnly to True for the headlines-only version
|
||||||
__author__ = 'Kovid Goyal/Nick Redding'
|
headlinesOnly = False
|
||||||
language = 'en'
|
|
||||||
requires_version = (0, 6, 36)
|
|
||||||
|
|
||||||
description = 'Daily news from the New York Times (subscription version)'
|
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||||
timefmt = ' [%b %d]'
|
# Otherwise, only the sections named will be included. For example,
|
||||||
|
#
|
||||||
|
# includeSections = ['Politics','Sports']
|
||||||
|
#
|
||||||
|
# would cause only the Politics and Sports sections to be included.
|
||||||
|
|
||||||
|
includeSections = [] # by default, all sections included
|
||||||
|
|
||||||
|
# excludeSections: List of sections to exclude. If empty, all sections found will be included.
|
||||||
|
# Otherwise, the sections named will be excluded. For example,
|
||||||
|
#
|
||||||
|
# excludeSections = ['Politics','Sports']
|
||||||
|
#
|
||||||
|
# would cause the Politics and Sports sections to be excluded. This parameter can be used
|
||||||
|
# in conjuction with includeSections although in most cases using one or the other, but
|
||||||
|
# not both, is sufficient.
|
||||||
|
|
||||||
|
excludeSections = []
|
||||||
|
|
||||||
|
# one_picture_per_article specifies that calibre should only use the first image
|
||||||
|
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||||
|
# will be moved to a location between the headline and the byline.
|
||||||
|
# If one_picture_per_article = False, all images from the article will be included
|
||||||
|
|
||||||
|
# and shown in their original location.
|
||||||
|
one_picture_per_article = True
|
||||||
|
|
||||||
|
# The maximum number of articles that will be downloaded
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
|
||||||
|
if headlinesOnly:
|
||||||
|
title='New York Times Headlines'
|
||||||
|
description = 'Headlines from the New York Times'
|
||||||
|
else:
|
||||||
|
title='New York Times'
|
||||||
|
description = 'Today\'s New York Times'
|
||||||
|
|
||||||
|
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||||
|
language = 'en'
|
||||||
|
requires_version = (0, 7, 5)
|
||||||
|
|
||||||
|
|
||||||
|
timefmt = ''
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
cover_margins = (18,18,'grey99')
|
||||||
|
|
||||||
remove_tags_before = dict(id='article')
|
remove_tags_before = dict(id='article')
|
||||||
remove_tags_after = dict(id='article')
|
remove_tags_after = dict(id='article')
|
||||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink',
|
remove_tags = [dict(attrs={'class':[
|
||||||
'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta',
|
'articleFooter',
|
||||||
'icon enlargeThis','columnGroup last','relatedSearchesModule']}),
|
'articleTools',
|
||||||
dict({'class':re.compile('^subNavigation')}),
|
'columnGroup doubleRule',
|
||||||
dict({'class':re.compile('^leaderboard')}),
|
'columnGroup singleRule',
|
||||||
dict({'class':re.compile('^module')}),
|
'columnGroup last',
|
||||||
dict({'class':'metaFootnote'}),
|
'columnGroup last',
|
||||||
dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead',
|
'doubleRule',
|
||||||
'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline',
|
'dottedLine',
|
||||||
'side_tool', 'side_index','header','readerReviewsCount','readerReviews',
|
'entry-meta',
|
||||||
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
'entry-response module',
|
||||||
|
'icon enlargeThis',
|
||||||
|
'leftNavTabs',
|
||||||
|
'metaFootnote',
|
||||||
|
'module box nav',
|
||||||
|
'nextArticleLink',
|
||||||
|
'nextArticleLink clearfix',
|
||||||
|
'post-tools',
|
||||||
|
'relatedSearchesModule',
|
||||||
|
'side_tool',
|
||||||
|
'singleAd',
|
||||||
|
re.compile('^subNavigation'),
|
||||||
|
re.compile('^leaderboard'),
|
||||||
|
re.compile('^module'),
|
||||||
|
]}),
|
||||||
|
dict(id=[
|
||||||
|
'adxLeaderboard',
|
||||||
|
'adxSponLink',
|
||||||
|
'archive',
|
||||||
|
'articleExtras',
|
||||||
|
'articleInline',
|
||||||
|
'blog_sidebar',
|
||||||
|
'businessSearchBar',
|
||||||
|
'cCol',
|
||||||
|
'entertainmentSearchBar',
|
||||||
|
'footer',
|
||||||
|
'header',
|
||||||
|
'header_search',
|
||||||
|
'inlineBox',
|
||||||
|
'login',
|
||||||
|
'masthead',
|
||||||
|
'masthead-nav',
|
||||||
|
'memberTools',
|
||||||
|
'navigation',
|
||||||
|
'portfolioInline',
|
||||||
|
'readerReviews',
|
||||||
|
'readerReviewsCount',
|
||||||
|
'relatedArticles',
|
||||||
|
'relatedTopics',
|
||||||
|
'respond',
|
||||||
|
'side_search',
|
||||||
|
'side_index',
|
||||||
|
'side_tool',
|
||||||
|
'toolsRight',
|
||||||
|
]),
|
||||||
dict(name=['script', 'noscript', 'style','form','hr'])]
|
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||||
encoding = decode
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.articleHeadline { margin-top:0.5em; margin-bottom:0.25em; }
|
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||||
.credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.timestamp { font-size: small; }
|
.timestamp { text-align: left; font-size: small; }
|
||||||
.caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
a:link {text-decoration: none; }'''
|
a:link {text-decoration: none; }
|
||||||
|
.articleBody { }
|
||||||
|
.authorId {text-align: left; }
|
||||||
|
.image {text-align: center;}
|
||||||
|
.source {text-align: left; }'''
|
||||||
|
|
||||||
|
def filter_ans(self, ans) :
|
||||||
|
total_article_count = 0
|
||||||
|
idx = 0
|
||||||
|
idx_max = len(ans)-1
|
||||||
|
while idx <= idx_max:
|
||||||
|
if self.includeSections != []:
|
||||||
|
if ans[idx][0] not in self.includeSections:
|
||||||
|
print "SECTION NOT INCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
|
if ans[idx][0] in self.excludeSections:
|
||||||
|
print "SECTION EXCLUDED: ",ans[idx][0]
|
||||||
|
del ans[idx]
|
||||||
|
idx_max = idx_max-1
|
||||||
|
continue
|
||||||
|
if self.verbose:
|
||||||
|
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||||
|
for article in ans[idx][1]:
|
||||||
|
total_article_count += 1
|
||||||
|
if self.verbose:
|
||||||
|
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||||
|
article['url'].encode('cp1252','replace')))
|
||||||
|
idx = idx+1
|
||||||
|
|
||||||
|
self.log( "Queued %d articles" % total_article_count )
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def fixChars(self,string):
|
||||||
|
# Replace lsquo (\x91)
|
||||||
|
fixed = re.sub("\x91","‘",string)
|
||||||
|
|
||||||
|
# Replace rsquo (\x92)
|
||||||
|
fixed = re.sub("\x92","’",fixed)
|
||||||
|
|
||||||
|
# Replace ldquo (\x93)
|
||||||
|
fixed = re.sub("\x93","“",fixed)
|
||||||
|
|
||||||
|
# Replace rdquo (\x94)
|
||||||
|
fixed = re.sub("\x94","”",fixed)
|
||||||
|
|
||||||
|
# Replace ndash (\x96)
|
||||||
|
fixed = re.sub("\x96","–",fixed)
|
||||||
|
|
||||||
|
# Replace mdash (\x97)
|
||||||
|
fixed = re.sub("\x97","—",fixed)
|
||||||
|
|
||||||
|
return fixed
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -60,22 +194,19 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
br['USERID'] = self.username
|
br['USERID'] = self.username
|
||||||
br['PASSWORD'] = self.password
|
br['PASSWORD'] = self.password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
|
if 'Please try again' in raw:
|
||||||
raise Exception('Your username and password are incorrect')
|
raise Exception('Your username and password are incorrect')
|
||||||
#open('/t/log.html', 'wb').write(raw)
|
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def get_masthead_url(self):
|
def skip_ad_pages(self, soup):
|
||||||
masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
# Skip ad pages served before actual article
|
||||||
#masthead = 'http://members.cox.net/nickredding/nytlogo.gif'
|
skip_tag = soup.find(True, {'name':'skip'})
|
||||||
br = BasicNewsRecipe.get_browser()
|
if skip_tag is not None:
|
||||||
try:
|
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||||
br.open(masthead)
|
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||||
except:
|
url += '?pagewanted=all'
|
||||||
self.log("\nMasthead unavailable")
|
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||||
masthead = None
|
return self.index_to_soup(url, raw=True)
|
||||||
return masthead
|
|
||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover = None
|
cover = None
|
||||||
@ -93,12 +224,57 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return cover
|
return cover
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
return 'New York Times'
|
return self.title
|
||||||
|
|
||||||
def parse_index(self):
|
def index_to_soup(self, url_or_raw, raw=False):
|
||||||
self.encoding = 'cp1252'
|
'''
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
OVERRIDE of class method
|
||||||
self.encoding = decode
|
deals with various page encodings between index and articles
|
||||||
|
'''
|
||||||
|
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||||
|
if re.match(r'\w+://', url_or_raw):
|
||||||
|
f = self.browser.open(url_or_raw)
|
||||||
|
_raw = f.read()
|
||||||
|
f.close()
|
||||||
|
if not _raw:
|
||||||
|
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||||
|
else:
|
||||||
|
_raw = url_or_raw
|
||||||
|
if raw:
|
||||||
|
return _raw
|
||||||
|
|
||||||
|
if not isinstance(_raw, unicode) and self.encoding:
|
||||||
|
_raw = _raw.decode(docEncoding, 'replace')
|
||||||
|
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
|
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||||
|
return BeautifulSoup(_raw, markupMassage=massage)
|
||||||
|
|
||||||
|
# Entry point
|
||||||
|
print "index_to_soup()"
|
||||||
|
soup = get_the_soup( self.encoding, url_or_raw )
|
||||||
|
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||||
|
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||||
|
if docEncoding == '' :
|
||||||
|
docEncoding = self.encoding
|
||||||
|
|
||||||
|
if self.verbose > 2:
|
||||||
|
self.log( " document encoding: '%s'" % docEncoding)
|
||||||
|
if docEncoding != self.encoding :
|
||||||
|
soup = get_the_soup(docEncoding, url_or_raw)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def massageNCXText(self, description):
|
||||||
|
# Kindle TOC descriptions won't render certain characters
|
||||||
|
if description:
|
||||||
|
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||||
|
# Replace '&' with '&'
|
||||||
|
massaged = re.sub("&","&", massaged)
|
||||||
|
return self.fixChars(massaged)
|
||||||
|
else:
|
||||||
|
return description
|
||||||
|
|
||||||
|
def parse_todays_index(self):
|
||||||
|
|
||||||
def feed_title(div):
|
def feed_title(div):
|
||||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||||
@ -119,12 +295,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return
|
return
|
||||||
if 'podcast' in url:
|
if 'podcast' in url:
|
||||||
return
|
return
|
||||||
|
if '/video/' in url:
|
||||||
|
return
|
||||||
url += '?pagewanted=all'
|
url += '?pagewanted=all'
|
||||||
if url in url_list:
|
if url in url_list:
|
||||||
return
|
return
|
||||||
url_list.append(url)
|
url_list.append(url)
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
#self.log("Title: %s" % title)
|
|
||||||
description = ''
|
description = ''
|
||||||
pubdate = strftime('%a, %d %b')
|
pubdate = strftime('%a, %d %b')
|
||||||
summary = div.find(True, attrs={'class':'summary'})
|
summary = div.find(True, attrs={'class':'summary'})
|
||||||
@ -140,6 +317,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
feed = key if key is not None else 'Uncategorized'
|
feed = key if key is not None else 'Uncategorized'
|
||||||
if not articles.has_key(feed):
|
if not articles.has_key(feed):
|
||||||
|
ans.append(feed)
|
||||||
articles[feed] = []
|
articles[feed] = []
|
||||||
articles[feed].append(
|
articles[feed].append(
|
||||||
dict(title=title, url=url, date=pubdate,
|
dict(title=title, url=url, date=pubdate,
|
||||||
@ -147,46 +325,228 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
content=''))
|
content=''))
|
||||||
|
|
||||||
|
|
||||||
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
|
|
||||||
# Find each instance of class="section-headline", class="story", class="story headline"
|
|
||||||
|
# Find each article
|
||||||
for div in soup.findAll(True,
|
for div in soup.findAll(True,
|
||||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||||
|
|
||||||
if div['class'] in ['section-headline','sectionHeader']:
|
if div['class'] in ['section-headline','sectionHeader']:
|
||||||
key = string.capwords(feed_title(div))
|
key = string.capwords(feed_title(div))
|
||||||
articles[key] = []
|
key = key.replace('Op-ed','Op-Ed')
|
||||||
ans.append(key)
|
key = key.replace('U.s.','U.S.')
|
||||||
#self.log('Section: %s' % key)
|
|
||||||
|
|
||||||
elif div['class'] in ['story', 'story headline'] :
|
elif div['class'] in ['story', 'story headline'] :
|
||||||
handle_article(div)
|
handle_article(div)
|
||||||
elif div['class'] == 'headlinesOnly multiline flush':
|
elif div['class'] == 'headlinesOnly multiline flush':
|
||||||
for lidiv in div.findAll('li'):
|
for lidiv in div.findAll('li'):
|
||||||
handle_article(lidiv)
|
handle_article(lidiv)
|
||||||
|
|
||||||
# ans = self.sort_index_by(ans, {'The Front Page':-1,
|
|
||||||
# 'Dining In, Dining Out':1,
|
|
||||||
# 'Obituaries':2})
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return self.filter_ans(ans)
|
||||||
|
|
||||||
|
def parse_headline_index(self):
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
ans = []
|
||||||
|
url_list = []
|
||||||
|
|
||||||
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
|
# Fetch the content table
|
||||||
|
content_table = soup.find('table',{'id':'content'})
|
||||||
|
if content_table is None:
|
||||||
|
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
||||||
|
|
||||||
|
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||||
|
for div_sec in td_col.findAll('div',recursive=False):
|
||||||
|
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||||
|
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
|
section_name = re.sub(r'^ *$','',section_name)
|
||||||
|
if section_name == '':
|
||||||
|
continue
|
||||||
|
section_name=string.capwords(section_name)
|
||||||
|
if section_name == 'U.s.':
|
||||||
|
section_name = 'U.S.'
|
||||||
|
elif section_name == 'Op-ed':
|
||||||
|
section_name = 'Op-Ed'
|
||||||
|
pubdate = strftime('%a, %d %b')
|
||||||
|
|
||||||
|
search_div = div_sec
|
||||||
|
for next_tag in h6_sec_name.findNextSiblings(True):
|
||||||
|
if next_tag.__class__.__name__ == 'Tag':
|
||||||
|
if next_tag.name == 'div':
|
||||||
|
search_div = next_tag
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get the articles
|
||||||
|
for h3_item in search_div.findAll('h3'):
|
||||||
|
byline = h3_item.h6
|
||||||
|
if byline is not None:
|
||||||
|
author = self.tag_to_string(byline,usa_alt=False)
|
||||||
|
else:
|
||||||
|
author = ''
|
||||||
|
a = h3_item.find('a', href=True)
|
||||||
|
if not a:
|
||||||
|
continue
|
||||||
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
if not url.startswith("http"):
|
||||||
|
continue
|
||||||
|
if not url.endswith(".html"):
|
||||||
|
continue
|
||||||
|
if 'podcast' in url:
|
||||||
|
continue
|
||||||
|
if 'video' in url:
|
||||||
|
continue
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if url in url_list:
|
||||||
|
continue
|
||||||
|
url_list.append(url)
|
||||||
|
self.log("URL %s" % url)
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
|
desc = h3_item.find('p')
|
||||||
|
if desc is not None:
|
||||||
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
|
else:
|
||||||
|
description = ''
|
||||||
|
if not articles.has_key(section_name):
|
||||||
|
ans.append(section_name)
|
||||||
|
articles[section_name] = []
|
||||||
|
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
|
|
||||||
|
|
||||||
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
|
return self.filter_ans(ans)
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
if self.headlinesOnly:
|
||||||
|
return self.parse_headline_index()
|
||||||
|
else:
|
||||||
|
return self.parse_todays_index()
|
||||||
|
|
||||||
|
def strip_anchors(self,soup):
|
||||||
|
paras = soup.findAll(True)
|
||||||
|
for para in paras:
|
||||||
|
aTags = para.findAll('a')
|
||||||
|
for a in aTags:
|
||||||
|
if a.img is None:
|
||||||
|
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||||
|
return soup
|
||||||
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||||
if kicker_tag:
|
if kicker_tag: # remove Op_Ed author head shots
|
||||||
tagline = self.tag_to_string(kicker_tag)
|
tagline = self.tag_to_string(kicker_tag)
|
||||||
#self.log("FOUND KICKER %s" % tagline)
|
|
||||||
if tagline=='Op-Ed Columnist':
|
if tagline=='Op-Ed Columnist':
|
||||||
img_div = soup.find('div','inlineImage module')
|
img_div = soup.find('div','inlineImage module')
|
||||||
#self.log("Searching for photo")
|
|
||||||
if img_div:
|
if img_div:
|
||||||
img_div.extract()
|
img_div.extract()
|
||||||
#self.log("Photo deleted")
|
return self.strip_anchors(soup)
|
||||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
|
||||||
if refresh is None:
|
|
||||||
return soup
|
|
||||||
content = refresh.get('content').partition('=')[2]
|
|
||||||
raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
|
|
||||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
|
||||||
|
|
||||||
|
def postprocess_html(self,soup, True):
|
||||||
|
|
||||||
|
if self.one_picture_per_article:
|
||||||
|
# Remove all images after first
|
||||||
|
largeImg = soup.find(True, {'class':'articleSpanImage'})
|
||||||
|
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
|
||||||
|
if largeImg:
|
||||||
|
for inlineImg in inlineImgs:
|
||||||
|
inlineImg.extract()
|
||||||
|
else:
|
||||||
|
if inlineImgs:
|
||||||
|
firstImg = inlineImgs[0]
|
||||||
|
for inlineImg in inlineImgs[1:]:
|
||||||
|
inlineImg.extract()
|
||||||
|
# Move firstImg before article body
|
||||||
|
#article_body = soup.find(True, {'id':'articleBody'})
|
||||||
|
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||||
|
if cgFirst:
|
||||||
|
# Strip all sibling NavigableStrings: noise
|
||||||
|
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||||
|
[ns.extract() for ns in navstrings]
|
||||||
|
headline_found = False
|
||||||
|
tag = cgFirst.find(True)
|
||||||
|
insertLoc = 0
|
||||||
|
while True:
|
||||||
|
insertLoc += 1
|
||||||
|
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
|
||||||
|
headline_found = True
|
||||||
|
break
|
||||||
|
tag = tag.nextSibling
|
||||||
|
if not tag:
|
||||||
|
headline_found = False
|
||||||
|
break
|
||||||
|
if headline_found:
|
||||||
|
cgFirst.insert(insertLoc,firstImg)
|
||||||
|
else:
|
||||||
|
self.log(">>> No class:'columnGroup first' found <<<")
|
||||||
|
|
||||||
|
# Change captions to italic
|
||||||
|
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||||
|
if caption and caption.contents[0]:
|
||||||
|
cTag = Tag(soup, "p", [("class", "caption")])
|
||||||
|
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||||
|
mp_off = c.find("More Photos")
|
||||||
|
if mp_off >= 0:
|
||||||
|
c = c[:mp_off]
|
||||||
|
cTag.insert(0, c)
|
||||||
|
caption.replaceWith(cTag)
|
||||||
|
|
||||||
|
# Change <nyt_headline> to <h2>
|
||||||
|
h1 = soup.find('h1')
|
||||||
|
if h1:
|
||||||
|
headline = h1.find("nyt_headline")
|
||||||
|
if headline:
|
||||||
|
tag = Tag(soup, "h2")
|
||||||
|
tag['class'] = "headline"
|
||||||
|
tag.insert(0, self.fixChars(headline.contents[0]))
|
||||||
|
h1.replaceWith(tag)
|
||||||
|
else:
|
||||||
|
# Blog entry - replace headline, remove <hr> tags
|
||||||
|
headline = soup.find('title')
|
||||||
|
if headline:
|
||||||
|
tag = Tag(soup, "h2")
|
||||||
|
tag['class'] = "headline"
|
||||||
|
tag.insert(0, self.fixChars(headline.contents[0]))
|
||||||
|
soup.insert(0, tag)
|
||||||
|
hrs = soup.findAll('hr')
|
||||||
|
for hr in hrs:
|
||||||
|
hr.extract()
|
||||||
|
|
||||||
|
# Change <h1> to <h3> - used in editorial blogs
|
||||||
|
masthead = soup.find("h1")
|
||||||
|
if masthead:
|
||||||
|
# Nuke the href
|
||||||
|
if masthead.a:
|
||||||
|
del(masthead.a['href'])
|
||||||
|
tag = Tag(soup, "h3")
|
||||||
|
tag.insert(0, self.fixChars(masthead.contents[0]))
|
||||||
|
masthead.replaceWith(tag)
|
||||||
|
|
||||||
|
# Change <span class="bold"> to <b>
|
||||||
|
for subhead in soup.findAll(True, {'class':'bold'}) :
|
||||||
|
if subhead.contents:
|
||||||
|
bTag = Tag(soup, "b")
|
||||||
|
bTag.insert(0, subhead.contents[0])
|
||||||
|
subhead.replaceWith(bTag)
|
||||||
|
|
||||||
|
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||||
|
if divTag:
|
||||||
|
divTag['class'] = divTag['id']
|
||||||
|
|
||||||
|
# Add class="authorId" to <div> so we can format with CSS
|
||||||
|
divTag = soup.find('div',attrs={'id':'authorId'})
|
||||||
|
if divTag and divTag.contents[0]:
|
||||||
|
tag = Tag(soup, "p")
|
||||||
|
tag['class'] = "authorId"
|
||||||
|
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
|
||||||
|
use_alt=False)))
|
||||||
|
divTag.replaceWith(tag)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
#autor{font-weight: bold}
|
||||||
|
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||||
|
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||||
|
.fgprincipal{font-size: large; font-weight: bold}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
|
remove_tags = [
|
||||||
|
dict(name=['meta','link'])
|
||||||
|
,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
|
for item in soup.findAll('span', attrs={'id':'seccion'}):
|
||||||
|
it = item.a
|
||||||
|
it.name='span'
|
||||||
|
del it['href']
|
||||||
|
del it['title']
|
||||||
|
for item in soup.findAll('p'):
|
||||||
|
it = item.find('h3')
|
||||||
|
if it:
|
||||||
|
it.name='span'
|
||||||
return soup
|
return soup
|
70
resources/recipes/pc_lab.recipe
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class PCLab(BasicNewsRecipe):
|
||||||
|
cover_url = 'http://pclab.pl/img/logo.png'
|
||||||
|
title = u"PC Lab"
|
||||||
|
__author__ = 'ravcio - rlelusz[at]gmail.com'
|
||||||
|
description = u"Articles from PC Lab website"
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 30.0
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
recursions = 0
|
||||||
|
encoding = 'iso-8859-2'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['substance']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['chapters']})
|
||||||
|
,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':['navigation']})
|
||||||
|
]
|
||||||
|
|
||||||
|
#links to RSS feeds
|
||||||
|
feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
|
||||||
|
|
||||||
|
#load second and subsequent page content
|
||||||
|
# in: soup - full page with 'next' button
|
||||||
|
# out: appendtag - tag to which new page is to be added
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
# find the 'Next' button
|
||||||
|
pager = soup.find('div', attrs={'class':'next'})
|
||||||
|
|
||||||
|
if pager:
|
||||||
|
#search for 'a' element with link to next page (exit if not found)
|
||||||
|
a = pager.find('a')
|
||||||
|
if a:
|
||||||
|
nexturl = a['href']
|
||||||
|
|
||||||
|
soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
|
||||||
|
|
||||||
|
pagetext_substance = soup2.find('div', attrs={'class':'substance'})
|
||||||
|
pagetext = pagetext_substance.find('div', attrs={'class':'data'})
|
||||||
|
pagetext.extract()
|
||||||
|
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
|
||||||
|
self.append_page(soup2, appendtag)
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
|
# soup.body contains no title and no navigator, they are in soup
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
|
||||||
|
# finally remove some tags
|
||||||
|
tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
|
||||||
|
[tag.extract() for tag in tags]
|
||||||
|
|
||||||
|
return soup
|
@ -1,13 +1,10 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
politika.rs
|
politika.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class Politika(BasicNewsRecipe):
|
class Politika(BasicNewsRecipe):
|
||||||
title = 'Politika Online'
|
title = 'Politika Online'
|
||||||
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
language = 'sr'
|
delay = 1
|
||||||
|
language = 'sr'
|
||||||
lang = 'sr-Latn-RS'
|
publication_type = 'newspaper'
|
||||||
direction = 'ltr'
|
masthead_url = 'http://static.politika.co.rs/images_new/politika.gif'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = """
|
||||||
|
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
|
h1{font-family: "Times New Roman",Times,serif1,serif}
|
||||||
|
.articledescription{font-family: sans1, sans-serif}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'big_article_home item_details'})]
|
||||||
|
remove_tags_after = dict(attrs={'class':'online_date'})
|
||||||
remove_tags = [
|
remove_tags = [dict(name=['link','meta','iframe','embed','object'])]
|
||||||
dict(name='div', attrs={'class':['send_print','txt-komentar']})
|
|
||||||
,dict(name=['object','link','a'])
|
|
||||||
,dict(name='h1', attrs={'class':'box_header-tags'})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||||
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
|
,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
|
||||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||||
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
|
,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
|
||||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||||
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
,(u'Spektar' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
soup.html['dir' ] = self.direction
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
|
||||||
soup.head.insert(0,mlang)
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
for item in soup.findAll('a', attrs={'class':'category'}):
|
||||||
if ftag.has_key('align'):
|
item.name='span'
|
||||||
del ftag['align']
|
if item.has_key('href'):
|
||||||
return self.adeify_images(soup)
|
del item['href']
|
||||||
|
if item.has_key('title'):
|
||||||
|
del item['title']
|
||||||
|
return soup
|
||||||
|
68
resources/recipes/polityka.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Polityka(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Polityka'
|
||||||
|
__author__ = 'Mateusz Kielar'
|
||||||
|
description = 'Weekly magazine. Last archive issue'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||||
|
remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size: x-large; font-weight: bold}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup('http://archiwum.polityka.pl/')
|
||||||
|
box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
|
||||||
|
feeds = []
|
||||||
|
last = 0
|
||||||
|
self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
|
||||||
|
last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
|
||||||
|
|
||||||
|
while True:
|
||||||
|
index = self.index_to_soup(last_edition)
|
||||||
|
|
||||||
|
|
||||||
|
box_list = index.findAll('div', attrs={'class' : 'box_list'})
|
||||||
|
if len(box_list) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
for box in box_list:
|
||||||
|
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
|
||||||
|
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
|
||||||
|
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
|
||||||
|
print section
|
||||||
|
if not articles.has_key(section):
|
||||||
|
articles[section] = []
|
||||||
|
articles[section].append( {
|
||||||
|
'title' : self.tag_to_string(div.a),
|
||||||
|
'url' : 'http://archiwum.polityka.pl' + div.a['href'],
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
})
|
||||||
|
|
||||||
|
for section in articles:
|
||||||
|
feeds.append((section, articles[section]))
|
||||||
|
|
||||||
|
last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
|
||||||
|
last = last + 1
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
69
resources/recipes/rollingstone.recipe
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
rollingstone.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class RollingStone(BasicNewsRecipe):
|
||||||
|
title = 'Rolling Stone Magazine - free content'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
|
||||||
|
publisher = 'Werner Media inc.'
|
||||||
|
category = 'news, music, USA, world'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'magazine'
|
||||||
|
masthead_url = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Georgia,Times,serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
|
||||||
|
,(re.compile(r'</title>.*?</head>' , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n' )
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags=[
|
||||||
|
dict(attrs={'class':['headerImgHolder','headerContent']})
|
||||||
|
,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
|
||||||
|
,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','iframe','object','embed'])
|
||||||
|
,dict(attrs={'id':'mpStoryHeader'})
|
||||||
|
,dict(attrs={'class':'relatedTopics'})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang','onclick','width','height','name']
|
||||||
|
remove_tags_before=dict(attrs={'class':'bloggerInfo'})
|
||||||
|
remove_tags_after=dict(attrs={'class':'relatedTopics'})
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'All News' , u'http://www.rollingstone.com/siteServices/rss/allNews' )
|
||||||
|
,(u'All Blogs' , u'http://www.rollingstone.com/siteServices/rss/allBlogs' )
|
||||||
|
,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
|
||||||
|
,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
|
||||||
|
,(u'Song Reviews' , u'http://www.rollingstone.com/siteServices/rss/songReviews' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
73
resources/recipes/scprint.recipe
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
|
||||||
|
|
||||||
|
class SCPrintMagazine(BasicNewsRecipe):
|
||||||
|
title = u'SC Print Magazine'
|
||||||
|
__author__ = u'Tony Maro'
|
||||||
|
description = u'Last print version of the data security magazine'
|
||||||
|
INDEX = "http://www.scmagazineus.com/issuearchive/"
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'en'
|
||||||
|
keep_only_tags = [dict(id=['article','review'])]
|
||||||
|
remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
|
||||||
|
LOG_IN = 'http://www.scmagazineus.com/login/'
|
||||||
|
tags = 'News,SC Magazine'
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
articles = []
|
||||||
|
issuelink = printsections = None
|
||||||
|
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
|
||||||
|
if sectit is not None:
|
||||||
|
linkt = sectit.find('a')
|
||||||
|
issuelink = linkt['href']
|
||||||
|
imgt = sectit.find('img')
|
||||||
|
self.cover_url = imgt['src']
|
||||||
|
|
||||||
|
if issuelink is not None:
|
||||||
|
issue = self.index_to_soup(issuelink)
|
||||||
|
if issue is not None:
|
||||||
|
printsections = issue.findAll('div',attrs={'class':'PrintSection'})
|
||||||
|
if printsections is not None:
|
||||||
|
for printsection in printsections:
|
||||||
|
onesection = []
|
||||||
|
sectiontitle = printsection.find('h3').contents[0]
|
||||||
|
articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
|
||||||
|
if articlesec is not None:
|
||||||
|
''' got articles '''
|
||||||
|
for onearticle in articlesec:
|
||||||
|
''' process one article '''
|
||||||
|
arttitlet = onearticle.find('h3')
|
||||||
|
if arttitlet is not None:
|
||||||
|
mylink = arttitlet.find('a')
|
||||||
|
if mylink is not None:
|
||||||
|
if mylink.has_key('title'):
|
||||||
|
arttitle = mylink['title']
|
||||||
|
else:
|
||||||
|
arttitle = 'unknown'
|
||||||
|
if mylink.has_key('href'):
|
||||||
|
artlink = mylink['href']
|
||||||
|
artlink = artlink.replace("/article","/printarticle")
|
||||||
|
artlink = artlink.replace("/review","/printreview")
|
||||||
|
deck = onearticle.find('div',attrs={'class':'deck'})
|
||||||
|
if deck is not None:
|
||||||
|
deck = deck.contents[0]
|
||||||
|
onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
|
||||||
|
articles.append((sectiontitle, onesection))
|
||||||
|
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
br.open(self.LOG_IN)
|
||||||
|
br.select_form(name='aspnetForm')
|
||||||
|
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
|
||||||
|
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
|
||||||
|
raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
|
||||||
|
if 'Logout</a>' not in raw:
|
||||||
|
raise LoginFailed(
|
||||||
|
_('Failed to log in, check your username and password for'
|
||||||
|
' the calibre Periodicals service.'))
|
||||||
|
return br
|
||||||
|
|
55
resources/recipes/siol.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, BlonG'
|
||||||
|
'''
|
||||||
|
www.siol.si
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class Siol(BasicNewsRecipe):
|
||||||
|
title = u'Siol.net'
|
||||||
|
__author__ = u'BlonG'
|
||||||
|
description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos"
|
||||||
|
oldest_article = 3
|
||||||
|
language = 'sl'
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
html2lrf_options = ['--base-font-size', '10']
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'idContent'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='span', attrs={'class':'com1'}),
|
||||||
|
dict(name='div', attrs={'class':'relation'}),
|
||||||
|
dict(name='p', attrs={'class':'path'}),
|
||||||
|
dict(name='div', attrs={'class':'clear_r'}),
|
||||||
|
dict(name='div', attrs={'id':'appendix'}),
|
||||||
|
dict(name='div', attrs={'id':'rail'}),
|
||||||
|
dict(name='div', attrs={'id':'div_comments'}),
|
||||||
|
dict(name='div', attrs={'class':'thumbs'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija')
|
||||||
|
,(u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice')
|
||||||
|
,(u'EU', u'http://www.siol.net/rss.aspx?path=EU')
|
||||||
|
,(u'Svet', u'http://www.siol.net/rss.aspx?path=Svet')
|
||||||
|
,(u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo')
|
||||||
|
,(u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal')
|
||||||
|
,(u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi')
|
||||||
|
,(u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto')
|
||||||
|
,(u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija')
|
||||||
|
,(u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV')
|
||||||
|
]
|
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
|
|||||||
__author__ = 'noxxx'
|
__author__ = 'noxxx'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
description = 'tagesanzeiger.ch: Nichts verpassen'
|
description = 'tagesanzeiger.ch: Nichts verpassen'
|
||||||
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
|
category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
|
@ -4,7 +4,7 @@ class Tagesschau(BasicNewsRecipe):
|
|||||||
title = 'Tagesschau'
|
title = 'Tagesschau'
|
||||||
description = 'Nachrichten der ARD'
|
description = 'Nachrichten der ARD'
|
||||||
publisher = 'ARD'
|
publisher = 'ARD'
|
||||||
language = 'de_DE'
|
language = 'de'
|
||||||
|
|
||||||
__author__ = 'Florian Andreas Pfaff'
|
__author__ = 'Florian Andreas Pfaff'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
195
resources/recipes/vedomosti.recipe
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
u'''
|
||||||
|
Ведомости
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class VedomostiRecipe(BasicNewsRecipe):
|
||||||
|
title = u'Ведомости'
|
||||||
|
__author__ = 'Nikolai Kotchetkov'
|
||||||
|
publisher = 'vedomosti.ru'
|
||||||
|
category = 'press, Russia'
|
||||||
|
description = u'Ежедневная деловая газета'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
|
||||||
|
cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
|
||||||
|
|
||||||
|
#Add feed names if you want them to be sorted (feeds of this list appear first)
|
||||||
|
sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
|
||||||
|
|
||||||
|
encoding = 'cp1251'
|
||||||
|
language = 'ru'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
recursions = 0
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='td', attrs={'class' : ['second_content']})]
|
||||||
|
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class' : 'article_text'})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'class' : ['sep', 'choice', 'articleRightTbl']})]
|
||||||
|
|
||||||
|
feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
|
||||||
|
|
||||||
|
#base URL for relative links
|
||||||
|
base_url = u'http://www.vedomosti.ru'
|
||||||
|
|
||||||
|
extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
|
||||||
|
'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
|
||||||
|
'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
|
||||||
|
'.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
|
||||||
|
'.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
|
||||||
|
'.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
|
||||||
|
'.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
|
||||||
|
'.article_desc {font-size: 1em; font-style:italic;}'
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
try:
|
||||||
|
feedData = parse(self.feeds[0])
|
||||||
|
if not feedData:
|
||||||
|
raise NotImplementedError
|
||||||
|
self.log("parse_index: Feed loaded successfully.")
|
||||||
|
if feedData.feed.has_key('title'):
|
||||||
|
self.title = feedData.feed.title
|
||||||
|
self.log("parse_index: Title updated to: ", self.title)
|
||||||
|
if feedData.feed.has_key('description'):
|
||||||
|
self.description = feedData.feed.description
|
||||||
|
self.log("parse_index: Description updated to: ", self.description)
|
||||||
|
|
||||||
|
def get_virtual_feed_articles(feed):
|
||||||
|
if feeds.has_key(feed):
|
||||||
|
return feeds[feed][1]
|
||||||
|
self.log("Adding new feed: ", feed)
|
||||||
|
articles = []
|
||||||
|
feeds[feed] = (feed, articles)
|
||||||
|
return articles
|
||||||
|
|
||||||
|
feeds = {}
|
||||||
|
|
||||||
|
#Iterate feed items and distribute articles using tags
|
||||||
|
for item in feedData.entries:
|
||||||
|
link = item.get('link', '');
|
||||||
|
title = item.get('title', '');
|
||||||
|
if '' == link or '' == title:
|
||||||
|
continue
|
||||||
|
article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
|
||||||
|
if not item.has_key('tags'):
|
||||||
|
get_virtual_feed_articles('_default').append(article)
|
||||||
|
continue
|
||||||
|
for tag in item.tags:
|
||||||
|
addedToDefault = False
|
||||||
|
term = tag.get('term', '')
|
||||||
|
if '' == term:
|
||||||
|
if (not addedToDefault):
|
||||||
|
get_virtual_feed_articles('_default').append(article)
|
||||||
|
continue
|
||||||
|
get_virtual_feed_articles(term).append(article)
|
||||||
|
|
||||||
|
#Get feed list
|
||||||
|
#Select sorted feeds first of all
|
||||||
|
result = []
|
||||||
|
for feedName in self.sortOrder:
|
||||||
|
if (not feeds.has_key(feedName)): continue
|
||||||
|
result.append(feeds[feedName])
|
||||||
|
del feeds[feedName]
|
||||||
|
result = result + feeds.values()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception, err:
|
||||||
|
self.log(err)
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
#self.log('Original: ', soup.prettify())
|
||||||
|
|
||||||
|
#Find article
|
||||||
|
contents = soup.find('div', {'class':['article_text']})
|
||||||
|
if not contents:
|
||||||
|
self.log('postprocess_html: article div not found!')
|
||||||
|
return soup
|
||||||
|
contents.extract()
|
||||||
|
|
||||||
|
#Find title
|
||||||
|
title = soup.find('h1')
|
||||||
|
if title:
|
||||||
|
contents.insert(0, title)
|
||||||
|
|
||||||
|
#Find article image
|
||||||
|
newstop = soup.find('div', {'class':['newstop']})
|
||||||
|
if newstop:
|
||||||
|
img = newstop.find('img')
|
||||||
|
if img:
|
||||||
|
imgDiv = Tag(soup, 'div')
|
||||||
|
imgDiv['class'] = 'article_img'
|
||||||
|
|
||||||
|
if img.has_key('width'):
|
||||||
|
del(img['width'])
|
||||||
|
if img.has_key('height'):
|
||||||
|
del(img['height'])
|
||||||
|
|
||||||
|
#find description
|
||||||
|
element = img.parent.nextSibling
|
||||||
|
|
||||||
|
img.extract()
|
||||||
|
imgDiv.insert(0, img)
|
||||||
|
|
||||||
|
while element:
|
||||||
|
if not isinstance(element, Tag):
|
||||||
|
continue
|
||||||
|
nextElement = element.nextSibling
|
||||||
|
if 'p' == element.name:
|
||||||
|
element.extract()
|
||||||
|
element['class'] = 'article_img_desc'
|
||||||
|
imgDiv.insert(len(imgDiv.contents), element)
|
||||||
|
element = nextElement
|
||||||
|
|
||||||
|
contents.insert(1, imgDiv)
|
||||||
|
|
||||||
|
#find article abstract
|
||||||
|
abstract = soup.find('p', {'class':['subhead']})
|
||||||
|
if abstract:
|
||||||
|
abstract['class'] = 'article_desc'
|
||||||
|
contents.insert(2, abstract)
|
||||||
|
|
||||||
|
#Find article authors
|
||||||
|
authorsDiv = soup.find('div', {'class':['autors']})
|
||||||
|
if authorsDiv:
|
||||||
|
authorsP = authorsDiv.find('p')
|
||||||
|
if authorsP:
|
||||||
|
authorsP['class'] = 'article_authors'
|
||||||
|
contents.insert(len(contents.contents), authorsP)
|
||||||
|
|
||||||
|
#Fix urls that use relative path
|
||||||
|
urls = contents.findAll('a');
|
||||||
|
if urls:
|
||||||
|
for url in urls:
|
||||||
|
if not url.has_key('href'):
|
||||||
|
continue
|
||||||
|
if '/' == url['href'][0]:
|
||||||
|
url['href'] = self.base_url + url['href']
|
||||||
|
|
||||||
|
body = soup.find('td', {'class':['second_content']})
|
||||||
|
if body:
|
||||||
|
body.replaceWith(contents)
|
||||||
|
|
||||||
|
self.log('Result: ', soup.prettify())
|
||||||
|
return soup
|
||||||
|
|
@ -31,8 +31,9 @@ class WashingtonPost(BasicNewsRecipe):
|
|||||||
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
||||||
('Style',
|
('Style',
|
||||||
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
|
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
|
||||||
('Sports',
|
('NFL Sports',
|
||||||
'http://feeds.washingtonpost.com/wp-dyn/rss/linkset/2010/08/19/LI2010081904067_xml'),
|
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
|
||||||
|
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
|
||||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
72
resources/recipes/ynet.recipe
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||||
|
description = 'This is a recipe of Ynet.co.il. The recipe opens the article page and clicks on an advertisement to not hurt the sites advertising income.'
|
||||||
|
cover_url = 'http://www.bneiakiva.net/uploads/images/ynet%282%29.jpg'
|
||||||
|
title = u'Ynet'
|
||||||
|
__author__ = 'marbs'
|
||||||
|
language = 'he'
|
||||||
|
extra_css='img {max-width:100%;direction: rtl;} #article{direction: rtl;} div{direction: rtl;} title{direction: rtl; } article_description{direction: rtl; } a.article{direction: rtl; } calibre_feed_description{direction: rtl; } body{direction: ltr;}'
|
||||||
|
remove_attributes = ['width']
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
|
||||||
|
remove_javascript = True
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
oldest_article = 1
|
||||||
|
remove_tags = [dict(name='p', attrs={'text':[' ']})]
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['dir'] = 'rtl'
|
||||||
|
soup.body['dir'] = 'rtl'
|
||||||
|
return soup
|
||||||
|
|
||||||
|
feeds =[(u'\u05d7\u05d3\u05e9\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss2.xml'),
|
||||||
|
(u'\u05db\u05dc\u05db\u05dc\u05d4',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss6.xml'),
|
||||||
|
(u'\u05e6\u05e8\u05db\u05e0\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss437.xml'),
|
||||||
|
(u'\u05e1\u05e4\u05d5\u05e8\u05d8',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss3.xml'),
|
||||||
|
(u'\u05ea\u05e8\u05d1\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss538.xml'),
|
||||||
|
(u'\u05de\u05e2\u05d5\u05e8\u05d1\u05d5\u05ea \u05d5\u05d7\u05d1\u05e8\u05d4',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss3262.xml'),
|
||||||
|
(u'\u05d1\u05e8\u05d9\u05d0\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss1208.xml'),
|
||||||
|
(u'\u05d9\u05e8\u05d5\u05e7',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss4872.xml'),
|
||||||
|
(u'\u05de\u05d7\u05e9\u05d1\u05d9\u05dd',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss544.xml'),
|
||||||
|
(u'\u05e8\u05db\u05d1', u'http://www.ynet.co.il/Integration/StoryRss550.xml'),
|
||||||
|
(u'\u05ea\u05d9\u05d9\u05e8\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss598.xml'),
|
||||||
|
(u'\u05d4\u05d5\u05e8\u05d9\u05dd',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss3052.xml'),
|
||||||
|
(u'\u05d0\u05d5\u05db\u05dc',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss975.xml'),
|
||||||
|
(u'\u05d9\u05d4\u05d3\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss4403.xml'),
|
||||||
|
(u'\u05de\u05d3\u05e2 \u05d5\u05d8\u05d1\u05e2',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss2142.xml'),
|
||||||
|
(u'\u05d9\u05d7\u05e1\u05d9\u05dd',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss3925.xml'),
|
||||||
|
(u'\u05d3\u05e2\u05d5\u05ea',
|
||||||
|
u'http://www.ynet.co.il/Integration/StoryRss194.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
#remove from here
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(url)
|
||||||
|
br.follow_link(mechanize.Link(base_url = '', url =url, text = '', tag = 'a', attrs = [{'id':'buzzerATop'}]))
|
||||||
|
#to here to stop supporting ynet...
|
||||||
|
split1 = url.split("-")
|
||||||
|
print_url = 'http://www.ynet.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
|
||||||
|
return print_url
|
@ -6,22 +6,25 @@ Fetch Die Zeit.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class ZeitDe(BasicNewsRecipe):
|
class ZeitDe(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'ZEIT Online'
|
title = 'Zeit Online'
|
||||||
description = 'ZEIT Online'
|
description = 'Zeit Online'
|
||||||
language = 'de'
|
language = 'de'
|
||||||
lang = 'de_DE'
|
|
||||||
|
|
||||||
__author__ = 'Martin Pitt, Sujata Raman and Ingo Paschke'
|
__author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
|
||||||
use_embedded_content = False
|
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
remove_tags = [
|
||||||
no_javascript = True
|
dict(name='iframe'),
|
||||||
encoding = 'utf-8'
|
dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
|
||||||
|
dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
|
||||||
|
dict(name='div', attrs={'id':["place_5","place_4","comments"]})
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(id=['main'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
|
('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
|
||||||
@ -40,71 +43,31 @@ class ZeitDe(BasicNewsRecipe):
|
|||||||
('Sport', 'http://newsfeed.zeit.de/sport/index'),
|
('Sport', 'http://newsfeed.zeit.de/sport/index'),
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
|
||||||
.supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
|
|
||||||
.excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:small;}
|
|
||||||
.title{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
|
|
||||||
.caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
|
|
||||||
.copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
|
|
||||||
.article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
|
|
||||||
.quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
|
|
||||||
.quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
|
|
||||||
.headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
|
|
||||||
.inline{float:left;margin-top:0;margin-right:15px;position:relative;width:180px; }
|
|
||||||
img.inline{float:none}
|
|
||||||
.intertitle{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small;font-weight:700}
|
|
||||||
.ebinfobox{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small;list-style-type:none;float:right;margin-top:0;border-left-style:solid;border-left-width:1px;padding-left:10px;}
|
|
||||||
.infobox {border-style: solid; border-width: 1px;padding:8px;}
|
|
||||||
.infobox dt {font-weight:700;}
|
|
||||||
'''
|
|
||||||
#filter_regexps = [r'ad.de.doubleclick.net/']
|
#filter_regexps = [r'ad.de.doubleclick.net/']
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class':["article"]}) ,
|
|
||||||
dict(name='ul', attrs={'class':["tools"]}) ,
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),
|
|
||||||
dict(name='div', attrs={'class':["pagination block","pagenav","inline link", "copyright"] }),
|
|
||||||
dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
|
|
||||||
dict(name='div', attrs={'id':["place_5","place_4","comments"]})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_attributes = ['style', 'font']
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
ans = article.get('link',None)
|
ans = article.get('link',None)
|
||||||
ans += "?page=all"
|
ans += "?page=all&print=true"
|
||||||
|
|
||||||
if 'video' in ans or 'quiz' in ans :
|
if 'video' in ans or 'quiz' in ans or 'blog' in ans :
|
||||||
ans = None
|
ans = None
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for tag in soup.findAll(name=['ul','li']):
|
||||||
|
tag.name = 'div'
|
||||||
|
|
||||||
|
soup.html['xml:lang'] = self.lang
|
||||||
|
soup.html['lang'] = self.lang
|
||||||
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
try:
|
try:
|
||||||
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
|
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
|
||||||
return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
|
return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
|
||||||
except:
|
except:
|
||||||
return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
|
return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
soup.html['xml:lang'] = self.lang
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
title = soup.find('h2', attrs={'class':'title'})
|
|
||||||
if title is None:
|
|
||||||
print "no title"
|
|
||||||
return soup
|
|
||||||
info = Tag(soup,'ul',[('class','ebinfobox')])
|
|
||||||
tools = soup.find('ul', attrs={'class':'tools'})
|
|
||||||
#author = tools.find('li','author first')
|
|
||||||
for tag in ['author first', 'date', 'date first', 'author', 'source']:
|
|
||||||
line = tools.find('li', tag)
|
|
||||||
if line:
|
|
||||||
info.insert(0,line)
|
|
||||||
title.parent.insert(0,info)
|
|
||||||
tools.extract()
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
|
||||||
|
60
resources/templates/html_export_default.css
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
body{
|
||||||
|
margin:0px;
|
||||||
|
padding: 0.5em;
|
||||||
|
background-color:#F6F3E9;
|
||||||
|
font-size:12px;
|
||||||
|
font-family:Arial, Helvetica, sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreMeta{
|
||||||
|
background-color:#39322B;
|
||||||
|
color:white;
|
||||||
|
padding:10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
|
||||||
|
color:white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreMeta h1{
|
||||||
|
margin:0px;
|
||||||
|
font-size:18px;
|
||||||
|
background-color:#39322B;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreEbookContent{
|
||||||
|
padding:20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreEbNav, .calibreEbNavTop{
|
||||||
|
clear:both;
|
||||||
|
background-color:#39322B;
|
||||||
|
color:white;
|
||||||
|
padding:10px;
|
||||||
|
text-align:center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreEbNavTop{
|
||||||
|
margin-bottom:20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreEbNav a, .calibreEbNavTop a{
|
||||||
|
padding:0px 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreTocIndex{
|
||||||
|
line-height:18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.calibreToc{
|
||||||
|
float:left;
|
||||||
|
margin:20px;
|
||||||
|
width:300px;
|
||||||
|
background-color:#39322B;
|
||||||
|
color:white;
|
||||||
|
padding:10px;
|
||||||
|
}
|
||||||
|
.calibreEbookContent{
|
||||||
|
width:600px;
|
||||||
|
float:left;
|
||||||
|
}
|
74
resources/templates/html_export_default.tmpl
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
${head_content}$
|
||||||
|
|
||||||
|
<link href="${cssLink}$" type="text/css" rel="stylesheet" />
|
||||||
|
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="calibreMeta">
|
||||||
|
<div class="calibreMetaTitle">
|
||||||
|
${pos1=1}$
|
||||||
|
${for title in meta.titles():}$
|
||||||
|
${if pos1:}$
|
||||||
|
<h1>
|
||||||
|
<a href="${tocUrl}$">${print title}$</a>
|
||||||
|
</h1>
|
||||||
|
${:else:}$
|
||||||
|
<div class="calibreMetaSubtitle">${print title}$</div>
|
||||||
|
${:endif}$
|
||||||
|
${pos1=0}$
|
||||||
|
${:endfor}$
|
||||||
|
</div>
|
||||||
|
<div class="calibreMetaAuthor">
|
||||||
|
${print ', '.join(meta.creators())}$
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="calibreMain">
|
||||||
|
|
||||||
|
<div class="calibreEbookContent">
|
||||||
|
${if prevLink or nextLink:}$
|
||||||
|
<div class="calibreEbNavTop">
|
||||||
|
${if prevLink:}$
|
||||||
|
<a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||||
|
${:else:}$
|
||||||
|
<a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||||
|
${:endif}$
|
||||||
|
|
||||||
|
${if nextLink:}$
|
||||||
|
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
|
||||||
|
${:endif}$
|
||||||
|
</div>
|
||||||
|
${:endif}$
|
||||||
|
|
||||||
|
${ebookContent}$
|
||||||
|
</div>
|
||||||
|
|
||||||
|
${if has_toc:}$
|
||||||
|
<div class="calibreToc">
|
||||||
|
<h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
|
||||||
|
${print toc()}$
|
||||||
|
</div>
|
||||||
|
${:endif}$
|
||||||
|
|
||||||
|
<div class="calibreEbNav">
|
||||||
|
${if prevLink:}$
|
||||||
|
<a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||||
|
${:else:}$
|
||||||
|
<a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
|
||||||
|
${:endif}$
|
||||||
|
|
||||||
|
<a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
|
||||||
|
|
||||||
|
${if nextLink:}$
|
||||||
|
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
|
||||||
|
${:endif}$
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
61
resources/templates/html_export_default_index.tmpl
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
|
||||||
|
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
|
||||||
|
<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
|
||||||
|
|
||||||
|
<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
|
||||||
|
|
||||||
|
${for item in meta:}$
|
||||||
|
<meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
|
||||||
|
${:endfor}$
|
||||||
|
|
||||||
|
<link href="${cssLink}$" type="text/css" rel="stylesheet" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="calibreMeta">
|
||||||
|
<div class="calibreMetaTitle">
|
||||||
|
${pos1=1}$
|
||||||
|
${for title in meta.titles():}$
|
||||||
|
${if pos1:}$
|
||||||
|
<h1>
|
||||||
|
<a href="${tocUrl}$">${print title}$</a>
|
||||||
|
</h1>
|
||||||
|
${:else:}$
|
||||||
|
<div class="calibreMetaSubtitle">${print title}$</div>
|
||||||
|
${:endif}$
|
||||||
|
${pos1=0}$
|
||||||
|
${:endfor}$
|
||||||
|
</div>
|
||||||
|
<div class="calibreMetaAuthor">
|
||||||
|
${print ', '.join(meta.creators()),}$
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="calibreMain">
|
||||||
|
<div class="calibreEbookContent">
|
||||||
|
|
||||||
|
${if has_toc:}$
|
||||||
|
<div class="calibreTocIndex">
|
||||||
|
<h2>${print _('Table of contents'),}$</h2>
|
||||||
|
${toc}$
|
||||||
|
</div>
|
||||||
|
${:else:}$
|
||||||
|
<h2>${print _('No table of contents present'),}$</h2>
|
||||||
|
<div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
|
||||||
|
${:endif}$
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="calibreEbNav">
|
||||||
|
${if nextLink:}$
|
||||||
|
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
|
||||||
|
${:endif}$
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -348,8 +348,12 @@ class Build(Command):
|
|||||||
VERSION = 1.0.0
|
VERSION = 1.0.0
|
||||||
CONFIG += %s
|
CONFIG += %s
|
||||||
''')%(ext.name, ' '.join(ext.headers), ' '.join(ext.sources), archs)
|
''')%(ext.name, ' '.join(ext.headers), ' '.join(ext.sources), archs)
|
||||||
|
pro = pro.replace('\\', '\\\\')
|
||||||
open(ext.name+'.pro', 'wb').write(pro)
|
open(ext.name+'.pro', 'wb').write(pro)
|
||||||
subprocess.check_call([QMAKE, '-o', 'Makefile', ext.name+'.pro'])
|
qmc = [QMAKE, '-o', 'Makefile']
|
||||||
|
if iswindows:
|
||||||
|
qmc += ['-spec', 'win32-msvc2008']
|
||||||
|
subprocess.check_call(qmc + [ext.name+'.pro'])
|
||||||
subprocess.check_call([make, '-f', 'Makefile'])
|
subprocess.check_call([make, '-f', 'Makefile'])
|
||||||
objects = glob.glob(obj_pat)
|
objects = glob.glob(obj_pat)
|
||||||
return list(map(self.a, objects))
|
return list(map(self.a, objects))
|
||||||
|
@ -11,7 +11,7 @@ import subprocess, tempfile, os, time
|
|||||||
from setup import Command, installer_name
|
from setup import Command, installer_name
|
||||||
from setup.build_environment import HOST, PROJECT
|
from setup.build_environment import HOST, PROJECT
|
||||||
|
|
||||||
BASE_RSYNC = 'rsync -avz --delete'.split()
|
BASE_RSYNC = ['rsync', '-avz', '--delete']
|
||||||
EXCLUDES = []
|
EXCLUDES = []
|
||||||
for x in [
|
for x in [
|
||||||
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
|
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
|
||||||
@ -42,13 +42,13 @@ class Push(Command):
|
|||||||
threads = []
|
threads = []
|
||||||
for host in (
|
for host in (
|
||||||
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
|
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
|
||||||
'kovid@ox:calibre'
|
'kovid@ox:calibre',
|
||||||
|
r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
|
||||||
):
|
):
|
||||||
rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
|
rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
|
||||||
print '\n\nPushing to:', host, '\n'
|
print '\n\nPushing to:', host, '\n'
|
||||||
threads.append(Thread(target=subprocess.check_call, args=(rcmd,)))
|
threads.append(Thread(target=subprocess.check_call, args=(rcmd,)))
|
||||||
threads[-1].start()
|
threads[-1].start()
|
||||||
subprocess.check_call(rcmd)
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
|
|||||||
from setup.build_environment import msvc, MT, RC
|
from setup.build_environment import msvc, MT, RC
|
||||||
from setup.installer.windows.wix import WixMixIn
|
from setup.installer.windows.wix import WixMixIn
|
||||||
|
|
||||||
QT_DIR = 'C:\\Qt\\4.6.3'
|
QT_DIR = 'Q:\\Qt\\4.7.1'
|
||||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||||
LIBUSB_DIR = 'C:\\libusb'
|
LIBUSB_DIR = 'C:\\libusb'
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
|
@ -28,15 +28,16 @@ If there are no windows binaries already compiled for the version of python you
|
|||||||
|
|
||||||
Run the following command to install python dependencies::
|
Run the following command to install python dependencies::
|
||||||
|
|
||||||
easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
|
easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform
|
||||||
|
|
||||||
|
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
|
||||||
|
|
||||||
Qt
|
Qt
|
||||||
--------
|
--------
|
||||||
|
|
||||||
Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
|
Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
|
||||||
|
|
||||||
configure -opensource -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc -no-qt3support -webkit -xmlpatterns -no-phonon
|
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license && nmake
|
||||||
nmake
|
|
||||||
|
|
||||||
SIP
|
SIP
|
||||||
-----
|
-----
|
||||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import subprocess, tempfile, os, time, sys
|
import subprocess, tempfile, os, time, sys, telnetlib
|
||||||
from threading import RLock
|
from threading import RLock
|
||||||
|
|
||||||
from setup import Command
|
from setup import Command
|
||||||
@ -28,7 +28,12 @@ else:
|
|||||||
def process_default(self, event):
|
def process_default(self, event):
|
||||||
name = getattr(event,
|
name = getattr(event,
|
||||||
'name', None)
|
'name', None)
|
||||||
if name and os.path.splitext(name)[1] == '.py':
|
if not name:
|
||||||
|
return
|
||||||
|
ext = os.path.splitext(name)[1]
|
||||||
|
reload = False
|
||||||
|
if ext == '.py':
|
||||||
|
reload = True
|
||||||
print
|
print
|
||||||
print name, 'changed'
|
print name, 'changed'
|
||||||
self.command.kill_server()
|
self.command.kill_server()
|
||||||
@ -36,6 +41,9 @@ else:
|
|||||||
print self.command.prompt,
|
print self.command.prompt,
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if reload:
|
||||||
|
self.command.reload_browser(delay=1)
|
||||||
|
|
||||||
|
|
||||||
class Server(Command):
|
class Server(Command):
|
||||||
|
|
||||||
@ -75,6 +83,19 @@ class Server(Command):
|
|||||||
self.notifier.start()
|
self.notifier.start()
|
||||||
self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
|
self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
|
||||||
|
|
||||||
|
def reload_browser(self, delay=0.1):
|
||||||
|
time.sleep(delay)
|
||||||
|
try:
|
||||||
|
t = telnetlib.Telnet('localhost', 4242)
|
||||||
|
t.read_until("repl>")
|
||||||
|
t.write('BrowserReload();')
|
||||||
|
t.read_until("repl>")
|
||||||
|
t.close()
|
||||||
|
except:
|
||||||
|
print 'Failed to reload browser'
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
self.lock = RLock()
|
self.lock = RLock()
|
||||||
tdir = tempfile.gettempdir()
|
tdir = tempfile.gettempdir()
|
||||||
@ -85,8 +106,13 @@ class Server(Command):
|
|||||||
print
|
print
|
||||||
self.watch()
|
self.watch()
|
||||||
|
|
||||||
|
first = True
|
||||||
while True:
|
while True:
|
||||||
self.launch_server()
|
self.launch_server()
|
||||||
|
if not first:
|
||||||
|
self.reload_browser()
|
||||||
|
first = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw_input(self.prompt)
|
raw_input(self.prompt)
|
||||||
except:
|
except:
|
||||||
|
@ -21,8 +21,6 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
|
|||||||
filesystem_encoding, plugins, config_dir
|
filesystem_encoding, plugins, config_dir
|
||||||
from calibre.startup import winutil, winutilerror
|
from calibre.startup import winutil, winutilerror
|
||||||
|
|
||||||
import mechanize
|
|
||||||
|
|
||||||
uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
|
uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
|
||||||
|
|
||||||
if False:
|
if False:
|
||||||
@ -269,7 +267,8 @@ def browser(honor_time=True, max_time=2, mobile_browser=False):
|
|||||||
:param honor_time: If True honors pause time in refresh requests
|
:param honor_time: If True honors pause time in refresh requests
|
||||||
:param max_time: Maximum time in seconds to wait during a refresh request
|
:param max_time: Maximum time in seconds to wait during a refresh request
|
||||||
'''
|
'''
|
||||||
opener = mechanize.Browser()
|
from calibre.utils.browser import Browser
|
||||||
|
opener = Browser()
|
||||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||||
opener.set_handle_robots(False)
|
opener.set_handle_robots(False)
|
||||||
opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
|
||||||
@ -445,6 +444,9 @@ xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions = {
|
|||||||
def replace_entities(raw):
|
def replace_entities(raw):
|
||||||
return _ent_pat.sub(entity_to_unicode, raw)
|
return _ent_pat.sub(entity_to_unicode, raw)
|
||||||
|
|
||||||
|
def xml_replace_entities(raw):
|
||||||
|
return _ent_pat.sub(xml_entity_to_unicode, raw)
|
||||||
|
|
||||||
def prepare_string_for_xml(raw, attribute=False):
|
def prepare_string_for_xml(raw, attribute=False):
|
||||||
raw = _ent_pat.sub(entity_to_unicode, raw)
|
raw = _ent_pat.sub(entity_to_unicode, raw)
|
||||||
raw = raw.replace('&', '&').replace('<', '<').replace('>', '>')
|
raw = raw.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.7.26'
|
__version__ = '0.7.28'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -105,7 +105,9 @@ else:
|
|||||||
os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
|
os.makedirs(config_dir, mode=CONFIG_DIR_MODE)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if not os.access(config_dir, os.W_OK) or not os.access(config_dir, os.X_OK):
|
if not os.path.exists(config_dir) or \
|
||||||
|
not os.access(config_dir, os.W_OK) or not \
|
||||||
|
os.access(config_dir, os.X_OK):
|
||||||
print 'No write acces to', config_dir, 'using a temporary dir instead'
|
print 'No write acces to', config_dir, 'using a temporary dir instead'
|
||||||
import tempfile, atexit
|
import tempfile, atexit
|
||||||
config_dir = tempfile.mkdtemp(prefix='calibre-config-')
|
config_dir = tempfile.mkdtemp(prefix='calibre-config-')
|
||||||
|
@ -2,9 +2,7 @@ import os.path
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import textwrap
|
import textwrap, os, glob, functools
|
||||||
import os
|
|
||||||
import glob
|
|
||||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
|
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
@ -95,10 +93,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
|||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
def get_metadata(self, stream, ftype):
|
||||||
if ftype == 'cbr':
|
if ftype == 'cbr':
|
||||||
from calibre.libunrar import extract_member as extract_first
|
from calibre.libunrar import extract_first_alphabetically as extract_first
|
||||||
extract_first
|
extract_first
|
||||||
else:
|
else:
|
||||||
from calibre.libunzip import extract_member as extract_first
|
from calibre.libunzip import extract_member
|
||||||
|
extract_first = functools.partial(extract_member,
|
||||||
|
sort_alphabetically=True)
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
ret = extract_first(stream)
|
ret = extract_first(stream)
|
||||||
mi = MetaInformation(None, None)
|
mi = MetaInformation(None, None)
|
||||||
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
|
|||||||
from calibre.ebooks.rtf.output import RTFOutput
|
from calibre.ebooks.rtf.output import RTFOutput
|
||||||
from calibre.ebooks.tcr.output import TCROutput
|
from calibre.ebooks.tcr.output import TCROutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
|
from calibre.ebooks.html.output import HTMLOutput
|
||||||
from calibre.ebooks.snb.output import SNBOutput
|
from calibre.ebooks.snb.output import SNBOutput
|
||||||
|
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
@ -453,7 +454,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
|
|||||||
from calibre.devices.apple.driver import ITUNES
|
from calibre.devices.apple.driver import ITUNES
|
||||||
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
||||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||||
from calibre.devices.cybook.driver import CYBOOK
|
from calibre.devices.cybook.driver import CYBOOK, ORIZON
|
||||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR
|
BOOQ, ELONEX, POCKETBOOK301, MENTOR
|
||||||
@ -461,7 +462,7 @@ from calibre.devices.iliad.driver import ILIAD
|
|||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||||
from calibre.devices.nook.driver import NOOK
|
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||||
from calibre.devices.prs505.driver import PRS505
|
from calibre.devices.prs505.driver import PRS505
|
||||||
from calibre.devices.android.driver import ANDROID, S60
|
from calibre.devices.android.driver import ANDROID, S60
|
||||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||||
@ -475,7 +476,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
|
|||||||
SOVOS, PICO
|
SOVOS, PICO
|
||||||
from calibre.devices.sne.driver import SNE
|
from calibre.devices.sne.driver import SNE
|
||||||
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
||||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
|
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
|
||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
|
|
||||||
@ -525,6 +526,7 @@ plugins += [
|
|||||||
RTFOutput,
|
RTFOutput,
|
||||||
TCROutput,
|
TCROutput,
|
||||||
TXTOutput,
|
TXTOutput,
|
||||||
|
HTMLOutput,
|
||||||
SNBOutput,
|
SNBOutput,
|
||||||
]
|
]
|
||||||
# Order here matters. The first matched device is the one used.
|
# Order here matters. The first matched device is the one used.
|
||||||
@ -533,6 +535,7 @@ plugins += [
|
|||||||
HANLINV5,
|
HANLINV5,
|
||||||
BLACKBERRY,
|
BLACKBERRY,
|
||||||
CYBOOK,
|
CYBOOK,
|
||||||
|
ORIZON,
|
||||||
ILIAD,
|
ILIAD,
|
||||||
IREXDR1000,
|
IREXDR1000,
|
||||||
IREXDR800,
|
IREXDR800,
|
||||||
@ -546,6 +549,7 @@ plugins += [
|
|||||||
KINDLE2,
|
KINDLE2,
|
||||||
KINDLE_DX,
|
KINDLE_DX,
|
||||||
NOOK,
|
NOOK,
|
||||||
|
NOOK_COLOR,
|
||||||
PRS505,
|
PRS505,
|
||||||
ANDROID,
|
ANDROID,
|
||||||
S60,
|
S60,
|
||||||
@ -586,6 +590,7 @@ plugins += [
|
|||||||
AVANT,
|
AVANT,
|
||||||
MENTOR,
|
MENTOR,
|
||||||
SWEEX,
|
SWEEX,
|
||||||
|
Q600,
|
||||||
KOGAN,
|
KOGAN,
|
||||||
PDNOVEL,
|
PDNOVEL,
|
||||||
SPECTRA,
|
SPECTRA,
|
||||||
@ -892,4 +897,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
|
|||||||
Email, Server, Plugins, Tweaks, Misc]
|
Email, Server, Plugins, Tweaks, Misc]
|
||||||
|
|
||||||
#}}}
|
#}}}
|
||||||
|
|
||||||
|
@ -250,8 +250,11 @@ class OutputProfile(Plugin):
|
|||||||
#: If True, the date is appended to the title of downloaded news
|
#: If True, the date is appended to the title of downloaded news
|
||||||
periodical_date_in_title = True
|
periodical_date_in_title = True
|
||||||
|
|
||||||
#: The character used to represent a star in ratings
|
#: Characters used in jackets and catalogs
|
||||||
|
missing_char = u'x'
|
||||||
ratings_char = u'*'
|
ratings_char = u'*'
|
||||||
|
empty_ratings_char = u' '
|
||||||
|
read_char = u'+'
|
||||||
|
|
||||||
#: Unsupported unicode characters to be replaced during preprocessing
|
#: Unsupported unicode characters to be replaced during preprocessing
|
||||||
unsupported_unicode_chars = []
|
unsupported_unicode_chars = []
|
||||||
@ -287,7 +290,12 @@ class iPadOutput(OutputProfile):
|
|||||||
'macros': {'border-width': '{length}|medium|thick|thin'}
|
'macros': {'border-width': '{length}|medium|thick|thin'}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
ratings_char = u'\u2605'
|
|
||||||
|
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
|
||||||
|
ratings_char = u'\u2605' # filled star
|
||||||
|
empty_ratings_char = u'\u2606' # hollow star
|
||||||
|
read_char = u'\u2713' # check mark
|
||||||
|
|
||||||
touchscreen = True
|
touchscreen = True
|
||||||
# touchscreen_news_css {{{
|
# touchscreen_news_css {{{
|
||||||
touchscreen_news_css = u'''
|
touchscreen_news_css = u'''
|
||||||
@ -498,7 +506,6 @@ class SonyReaderLandscapeOutput(SonyReaderOutput):
|
|||||||
screen_size = (784, 1012)
|
screen_size = (784, 1012)
|
||||||
comic_screen_size = (784, 1012)
|
comic_screen_size = (784, 1012)
|
||||||
|
|
||||||
|
|
||||||
class MSReaderOutput(OutputProfile):
|
class MSReaderOutput(OutputProfile):
|
||||||
|
|
||||||
name = 'Microsoft Reader'
|
name = 'Microsoft Reader'
|
||||||
@ -582,7 +589,12 @@ class KindleOutput(OutputProfile):
|
|||||||
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
||||||
supports_mobi_indexing = True
|
supports_mobi_indexing = True
|
||||||
periodical_date_in_title = False
|
periodical_date_in_title = False
|
||||||
|
|
||||||
|
missing_char = u'x\u2009'
|
||||||
|
empty_ratings_char = u'\u2606'
|
||||||
ratings_char = u'\u2605'
|
ratings_char = u'\u2605'
|
||||||
|
read_char = u'\u2713'
|
||||||
|
|
||||||
mobi_ems_per_blockquote = 2.0
|
mobi_ems_per_blockquote = 2.0
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -603,6 +615,8 @@ class KindleDXOutput(OutputProfile):
|
|||||||
#comic_screen_size = (741, 1022)
|
#comic_screen_size = (741, 1022)
|
||||||
supports_mobi_indexing = True
|
supports_mobi_indexing = True
|
||||||
periodical_date_in_title = False
|
periodical_date_in_title = False
|
||||||
|
ratings_char = u'\u2605'
|
||||||
|
read_char = u'\u2713'
|
||||||
mobi_ems_per_blockquote = 2.0
|
mobi_ems_per_blockquote = 2.0
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -23,6 +23,9 @@ class ANDROID(USBMS):
|
|||||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||||
0xc92 : [0x100]},
|
0xc92 : [0x100]},
|
||||||
|
|
||||||
|
# Eken
|
||||||
|
0x040d : { 0x8510 : [0x0001] },
|
||||||
|
|
||||||
# Motorola
|
# Motorola
|
||||||
0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
|
0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
|
||||||
0x4285 : [0x216]},
|
0x4285 : [0x216]},
|
||||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.ebooks.metadata.epub import set_metadata
|
from calibre.ebooks.metadata.epub import set_metadata
|
||||||
from calibre.library.server.utils import strftime
|
from calibre.library.server.utils import strftime
|
||||||
from calibre.utils.config import config_dir, prefs
|
from calibre.utils.config import config_dir, prefs
|
||||||
from calibre.utils.date import isoformat, now, parse_date
|
from calibre.utils.date import now, parse_date
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
@ -1221,12 +1221,19 @@ class ITUNES(DriverBase):
|
|||||||
return thumb
|
return thumb
|
||||||
|
|
||||||
if isosx:
|
if isosx:
|
||||||
|
# The following commands generate an error, but the artwork does in fact
|
||||||
|
# get sent to the device. Seems like a bug in Apple's automation interface?
|
||||||
|
# Could also be a problem with the integrity of the cover data?
|
||||||
if lb_added:
|
if lb_added:
|
||||||
lb_added.artworks[1].data_.set(cover_data)
|
try:
|
||||||
|
lb_added.artworks[1].data_.set(cover_data)
|
||||||
|
except:
|
||||||
|
if DEBUG:
|
||||||
|
self.log.warning(" iTunes automation interface reported an error"
|
||||||
|
" when adding artwork to '%s' in the iTunes Library" % metadata.title)
|
||||||
|
pass
|
||||||
|
|
||||||
if db_added:
|
if db_added:
|
||||||
# The following command generates an error, but the artwork does in fact
|
|
||||||
# get sent to the device. Seems like a bug in Apple's automation interface
|
|
||||||
try:
|
try:
|
||||||
db_added.artworks[1].data_.set(cover_data)
|
db_added.artworks[1].data_.set(cover_data)
|
||||||
except:
|
except:
|
||||||
@ -2521,11 +2528,11 @@ class ITUNES(DriverBase):
|
|||||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = isoformat(now())
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = isoformat(now())
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.warning(" missing <metadata> block in OPF file")
|
self.log.warning(" missing <metadata> block in OPF file")
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
|
@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
|
|||||||
|
|
||||||
VENDOR_ID = [0x0fca]
|
VENDOR_ID = [0x0fca]
|
||||||
PRODUCT_ID = [0x8004, 0x0004]
|
PRODUCT_ID = [0x8004, 0x0004]
|
||||||
BCD = [0x0200, 0x0107, 0x0210, 0x0201]
|
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
|
||||||
|
|
||||||
VENDOR_NAME = 'RIM'
|
VENDOR_NAME = 'RIM'
|
||||||
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
|
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
|
||||||
|
@ -5,7 +5,7 @@ __copyright__ = '2009, John Schember <john at nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Device driver for Bookeen's Cybook Gen 3 and Opus
|
Device driver for Bookeen's Cybook Gen 3 and Opus and Orizon
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@ -56,3 +56,23 @@ class CYBOOK(USBMS):
|
|||||||
if isunix:
|
if isunix:
|
||||||
return device_info[3] == 'Bookeen' and (device_info[4] == 'Cybook Gen3' or device_info[4] == 'Cybook Opus')
|
return device_info[3] == 'Bookeen' and (device_info[4] == 'Cybook Gen3' or device_info[4] == 'Cybook Opus')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
class ORIZON(CYBOOK):
|
||||||
|
|
||||||
|
name = 'Orizon Device Interface'
|
||||||
|
gui_name = 'Orizon'
|
||||||
|
description = _('Communicate with the Cybook Orizon eBook reader.')
|
||||||
|
|
||||||
|
BCD = [0x319]
|
||||||
|
|
||||||
|
WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_ORIZON__-FD')
|
||||||
|
WINDOWS_CARD_A_MEM = re.compile('CYBOOK_ORIZON__-SD')
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def can_handle(cls, device_info, debug=False):
|
||||||
|
if isunix:
|
||||||
|
return device_info[3] == 'Bookeen' and device_info[4] == 'Cybook Orizon'
|
||||||
|
return True
|
||||||
|
|
||||||
|
@ -74,9 +74,9 @@ class DevicePlugin(Plugin):
|
|||||||
if bcd is None or len(bcd) == 0:
|
if bcd is None or len(bcd) == 0:
|
||||||
return True
|
return True
|
||||||
for c in bcd:
|
for c in bcd:
|
||||||
# Bug in winutil.get_usb_devices converts a to :
|
rev = 'rev_%4.4x'%c
|
||||||
rev = ('rev_%4.4x'%c).replace('a', ':')
|
# Bug in winutil.get_usb_devices sometimes converts a to :
|
||||||
if rev in device_id:
|
if rev in device_id or rev.replace('a', ':') in device_id:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -17,15 +17,15 @@ class IRIVER_STORY(USBMS):
|
|||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub', 'pdf', 'txt']
|
FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt']
|
||||||
|
|
||||||
VENDOR_ID = [0x1006]
|
VENDOR_ID = [0x1006]
|
||||||
PRODUCT_ID = [0x4023]
|
PRODUCT_ID = [0x4023, 0x4025]
|
||||||
BCD = [0x0323]
|
BCD = [0x0323]
|
||||||
|
|
||||||
VENDOR_NAME = 'IRIVER'
|
VENDOR_NAME = 'IRIVER'
|
||||||
WINDOWS_MAIN_MEM = 'STORY'
|
WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05']
|
||||||
WINDOWS_CARD_A_MEM = 'STORY'
|
WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']
|
||||||
|
|
||||||
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
|
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
|
||||||
#OSX_CARD_A_MEM = 'Kindle Card Storage Media'
|
#OSX_CARD_A_MEM = 'Kindle Card Storage Media'
|
||||||
|
@ -503,7 +503,11 @@ class KOBO(USBMS):
|
|||||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||||
|
|
||||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||||
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
|
||||||
|
t = (ContentID,)
|
||||||
|
cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
|
||||||
|
result = cursor.fetchone()
|
||||||
|
datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
|
||||||
|
|
||||||
t = (datelastread,ContentID,)
|
t = (datelastread,ContentID,)
|
||||||
|
|
||||||
|
@ -72,6 +72,15 @@ class SWEEX(USBMS):
|
|||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class Q600(SWEEX):
|
||||||
|
|
||||||
|
name = 'Digma Q600 Device interface'
|
||||||
|
gui_name = 'Q600'
|
||||||
|
description = _('Communicate with the Digma Q600')
|
||||||
|
|
||||||
|
BCD = [0x325]
|
||||||
|
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
||||||
|
|
||||||
class KOGAN(SWEEX):
|
class KOGAN(SWEEX):
|
||||||
|
|
||||||
name = 'Kogan Device Interface'
|
name = 'Kogan Device Interface'
|
||||||
@ -115,12 +124,15 @@ class PDNOVEL_KOBO(PDNOVEL):
|
|||||||
|
|
||||||
BCD = [0x222]
|
BCD = [0x222]
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = 'eBooks/Kobo'
|
EBOOK_DIR_MAIN = 'eBooks'
|
||||||
|
|
||||||
def upload_cover(self, path, filename, metadata, filepath):
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
coverdata = getattr(metadata, 'thumbnail', None)
|
coverdata = getattr(metadata, 'thumbnail', None)
|
||||||
if coverdata and coverdata[2]:
|
if coverdata and coverdata[2]:
|
||||||
with open(os.path.join(path, '.thumbnail', filename+'.jpg'), 'wb') as coverfile:
|
dirpath = os.path.join(path, '.thumbnail')
|
||||||
|
if not os.path.exists(dirpath):
|
||||||
|
os.makedirs(dirpath)
|
||||||
|
with open(os.path.join(dirpath, filename+'.jpg'), 'wb') as coverfile:
|
||||||
coverfile.write(coverdata[2])
|
coverfile.write(coverdata[2])
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,3 +80,14 @@ class NOOK(USBMS):
|
|||||||
|
|
||||||
def sanitize_path_components(self, components):
|
def sanitize_path_components(self, components):
|
||||||
return [x.replace('#', '_') for x in components]
|
return [x.replace('#', '_') for x in components]
|
||||||
|
|
||||||
|
class NOOK_COLOR(NOOK):
|
||||||
|
gui_name = _('Nook Color')
|
||||||
|
description = _('Communicate with the Nook Color eBook reader.')
|
||||||
|
|
||||||
|
PRODUCT_ID = [0x002]
|
||||||
|
BCD = [0x216]
|
||||||
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOK_DISK'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'My Files/Books'
|
||||||
|
|
||||||
|
@ -610,7 +610,11 @@ class XMLCache(object):
|
|||||||
# is not new, compare its Sony DB date against localtime and gmtime.
|
# is not new, compare its Sony DB date against localtime and gmtime.
|
||||||
# Count the matches. When we must set a date, use the one with the most
|
# Count the matches. When we must set a date, use the one with the most
|
||||||
# matches. Use localtime if the case of a tie, and hope it is right.
|
# matches. Use localtime if the case of a tie, and hope it is right.
|
||||||
timestamp = os.path.getmtime(path)
|
try:
|
||||||
|
timestamp = os.path.getmtime(path)
|
||||||
|
except:
|
||||||
|
debug_print('Failed to get timestamp for:', path)
|
||||||
|
timestamp = time.time()
|
||||||
rec_date = record.get('date', None)
|
rec_date = record.get('date', None)
|
||||||
|
|
||||||
def clean(x):
|
def clean(x):
|
||||||
@ -619,13 +623,20 @@ class XMLCache(object):
|
|||||||
x.replace(u'\0', '')
|
x.replace(u'\0', '')
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
def record_set(k, v):
|
||||||
|
try:
|
||||||
|
record.set(k, clean(v))
|
||||||
|
except:
|
||||||
|
# v is not suitable for XML, ignore
|
||||||
|
pass
|
||||||
|
|
||||||
if not getattr(book, '_new_book', False): # book is not new
|
if not getattr(book, '_new_book', False): # book is not new
|
||||||
if record.get('tz', None) is not None:
|
if record.get('tz', None) is not None:
|
||||||
use_tz_var = True
|
use_tz_var = True
|
||||||
if strftime(timestamp, zone=time.gmtime) == rec_date:
|
if strftime(timestamp, zone=time.gmtime) == rec_date:
|
||||||
gtz_count += 1
|
gtz_count += 1
|
||||||
elif strftime(timestamp, zone=time.localtime) == rec_date:
|
elif strftime(timestamp, zone=time.localtime) == rec_date:
|
||||||
ltz_count += 1
|
ltz_count += 1
|
||||||
else: # book is new. Set the time using the current votes
|
else: # book is new. Set the time using the current votes
|
||||||
if use_tz_var:
|
if use_tz_var:
|
||||||
tz = time.localtime
|
tz = time.localtime
|
||||||
@ -639,22 +650,25 @@ class XMLCache(object):
|
|||||||
debug_print("Use GMT TZ for new book", book.lpath)
|
debug_print("Use GMT TZ for new book", book.lpath)
|
||||||
date = strftime(timestamp, zone=tz)
|
date = strftime(timestamp, zone=tz)
|
||||||
record.set('date', clean(date))
|
record.set('date', clean(date))
|
||||||
record.set('size', clean(str(os.stat(path).st_size)))
|
try:
|
||||||
|
record.set('size', clean(str(os.stat(path).st_size)))
|
||||||
|
except:
|
||||||
|
record.set('size', '0')
|
||||||
title = book.title if book.title else _('Unknown')
|
title = book.title if book.title else _('Unknown')
|
||||||
record.set('title', clean(title))
|
record_set('title', title)
|
||||||
ts = book.title_sort
|
ts = book.title_sort
|
||||||
if not ts:
|
if not ts:
|
||||||
ts = title_sort(title)
|
ts = title_sort(title)
|
||||||
record.set('titleSorter', clean(ts))
|
record_set('titleSorter', ts)
|
||||||
if self.use_author_sort:
|
if self.use_author_sort:
|
||||||
if book.author_sort:
|
if book.author_sort:
|
||||||
aus = book.author_sort
|
aus = book.author_sort
|
||||||
else:
|
else:
|
||||||
debug_print('Author_sort is None for book', book.lpath)
|
debug_print('Author_sort is None for book', book.lpath)
|
||||||
aus = authors_to_sort_string(book.authors)
|
aus = authors_to_sort_string(book.authors)
|
||||||
record.set('author', clean(aus))
|
record_set('author', aus)
|
||||||
else:
|
else:
|
||||||
record.set('author', clean(authors_to_string(book.authors)))
|
record_set('author', authors_to_string(book.authors))
|
||||||
ext = os.path.splitext(path)[1]
|
ext = os.path.splitext(path)[1]
|
||||||
if ext:
|
if ext:
|
||||||
ext = ext[1:].lower()
|
ext = ext[1:].lower()
|
||||||
|
@ -32,7 +32,7 @@ def detect(aBuf):
|
|||||||
ENCODING_PATS = [
|
ENCODING_PATS = [
|
||||||
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
|
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
|
||||||
re.IGNORECASE),
|
re.IGNORECASE),
|
||||||
re.compile(r'''<meta\s+?[^<>]+?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
|
re.compile(r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-a-z0-9]+)[^'"]*?['"][^<>]*>''',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
]
|
]
|
||||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||||
|
@ -144,7 +144,10 @@ class DocAnalysis(object):
|
|||||||
|
|
||||||
# Normalize the histogram into percents
|
# Normalize the histogram into percents
|
||||||
totalLines = len(self.lines)
|
totalLines = len(self.lines)
|
||||||
h = [ float(count)/totalLines for count in hRaw ]
|
if totalLines > 0:
|
||||||
|
h = [ float(count)/totalLines for count in hRaw ]
|
||||||
|
else:
|
||||||
|
h = []
|
||||||
#print "\nhRaw histogram lengths are: "+str(hRaw)
|
#print "\nhRaw histogram lengths are: "+str(hRaw)
|
||||||
#print " percents are: "+str(h)+"\n"
|
#print " percents are: "+str(h)+"\n"
|
||||||
|
|
||||||
|
33
src/calibre/ebooks/html/meta.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
|
||||||
|
|
||||||
|
class EasyMeta(object):
|
||||||
|
|
||||||
|
def __init__(self, meta):
|
||||||
|
self.meta = meta
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
meta = self.meta
|
||||||
|
for item_name in meta.items:
|
||||||
|
for item in meta[item_name]:
|
||||||
|
if namespace(item.term) == DC11_NS:
|
||||||
|
yield { 'name': barename(item.term), 'value': item.value }
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
count = 0
|
||||||
|
for item in self:
|
||||||
|
count = count+1
|
||||||
|
return count
|
||||||
|
|
||||||
|
def titles(self):
|
||||||
|
for item in self.meta['title']:
|
||||||
|
yield item.value
|
||||||
|
|
||||||
|
def creators(self):
|
||||||
|
for item in self.meta['creator']:
|
||||||
|
yield item.value
|
209
src/calibre/ebooks/html/output.py
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, re, shutil
|
||||||
|
|
||||||
|
from calibre.utils import zipfile
|
||||||
|
|
||||||
|
from os.path import dirname, abspath, relpath, exists, basename
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
from templite import Templite
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import element
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
|
from urllib import unquote
|
||||||
|
|
||||||
|
from calibre.ebooks.html.meta import EasyMeta
|
||||||
|
|
||||||
|
class HTMLOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'HTML Output'
|
||||||
|
author = 'Fabian Grassl'
|
||||||
|
file_type = 'zip'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='template_css',
|
||||||
|
help=_('CSS file used for the output instead of the default file')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='template_html_index',
|
||||||
|
help=_('Template used for generation of the html index file instead of the default file')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='template_html',
|
||||||
|
help=_('Template used for the generation of the html contents of the book instead of the default file')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='extract_to',
|
||||||
|
help=_('Extract the contents of the generated ZIP file to the '
|
||||||
|
'specified directory. WARNING: The contents of the directory '
|
||||||
|
'will be deleted.')
|
||||||
|
),
|
||||||
|
])
|
||||||
|
|
||||||
|
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||||
|
|
||||||
|
def generate_toc(self, oeb_book, ref_url, output_dir):
|
||||||
|
'''
|
||||||
|
Generate table of contents
|
||||||
|
'''
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
def build_node(current_node, parent=None):
|
||||||
|
if parent is None:
|
||||||
|
parent = etree.Element('ul')
|
||||||
|
elif len(current_node.nodes):
|
||||||
|
parent = element(parent, ('ul'))
|
||||||
|
for node in current_node.nodes:
|
||||||
|
point = element(parent, 'li')
|
||||||
|
href = relpath(abspath(unquote(node.href)), dirname(ref_url))
|
||||||
|
link = element(point, 'a', href=href)
|
||||||
|
title = node.title
|
||||||
|
if title:
|
||||||
|
title = re.sub(r'\s+', ' ', title)
|
||||||
|
link.text=title
|
||||||
|
build_node(node, point)
|
||||||
|
return parent
|
||||||
|
wrap = etree.Element('div')
|
||||||
|
wrap.append(build_node(oeb_book.toc))
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def generate_html_toc(self, oeb_book, ref_url, output_dir):
|
||||||
|
root = self.generate_toc(oeb_book, ref_url, output_dir)
|
||||||
|
return etree.tostring(root, pretty_print=True, encoding='utf-8',
|
||||||
|
xml_declaration=False)
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
|
||||||
|
# read template files
|
||||||
|
if opts.template_html_index is not None:
|
||||||
|
template_html_index_data = open(opts.template_html_index, 'rb').read()
|
||||||
|
else:
|
||||||
|
template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
|
||||||
|
|
||||||
|
if opts.template_html is not None:
|
||||||
|
template_html_data = open(opts.template_html, 'rb').read()
|
||||||
|
else:
|
||||||
|
template_html_data = P('templates/html_export_default.tmpl', data=True)
|
||||||
|
|
||||||
|
if opts.template_css is not None:
|
||||||
|
template_css_data = open(opts.template_css, 'rb').read()
|
||||||
|
else:
|
||||||
|
template_css_data = P('templates/html_export_default.css', data=True)
|
||||||
|
|
||||||
|
template_html_index_data = template_html_index_data.decode('utf-8')
|
||||||
|
template_html_data = template_html_data.decode('utf-8')
|
||||||
|
template_css_data = template_css_data.decode('utf-8')
|
||||||
|
|
||||||
|
self.log = log
|
||||||
|
self.opts = opts
|
||||||
|
meta = EasyMeta(oeb_book.metadata)
|
||||||
|
|
||||||
|
tempdir = os.path.realpath(PersistentTemporaryDirectory())
|
||||||
|
output_file = os.path.join(tempdir,
|
||||||
|
basename(re.sub(r'\.zip', '', output_path)+'.html'))
|
||||||
|
output_dir = re.sub(r'\.html', '', output_file)+'_files'
|
||||||
|
|
||||||
|
if not exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
|
||||||
|
with open(css_path, 'wb') as f:
|
||||||
|
f.write(template_css_data.encode('utf-8'))
|
||||||
|
|
||||||
|
with open(output_file, 'wb') as f:
|
||||||
|
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
|
||||||
|
templite = Templite(template_html_index_data)
|
||||||
|
nextLink = oeb_book.spine[0].href
|
||||||
|
nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
|
||||||
|
cssLink = relpath(abspath(css_path), dirname(output_file))
|
||||||
|
tocUrl = relpath(output_file, dirname(output_file))
|
||||||
|
t = templite.render(has_toc=bool(oeb_book.toc.count()),
|
||||||
|
toc=html_toc, meta=meta, nextLink=nextLink,
|
||||||
|
tocUrl=tocUrl, cssLink=cssLink,
|
||||||
|
firstContentPageLink=nextLink)
|
||||||
|
f.write(t)
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
for item in oeb_book.manifest:
|
||||||
|
path = abspath(unquote(item.href))
|
||||||
|
dir = dirname(path)
|
||||||
|
if not exists(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
|
if item.spine_position is not None:
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(str(item))
|
||||||
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
|
for item in oeb_book.spine:
|
||||||
|
path = abspath(unquote(item.href))
|
||||||
|
dir = dirname(path)
|
||||||
|
root = item.data.getroottree()
|
||||||
|
|
||||||
|
# get & clean HTML <HEAD>-data
|
||||||
|
head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
|
||||||
|
head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
|
||||||
|
head_content = re.sub(r'\<\/?head.*\>', '', head_content)
|
||||||
|
head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
|
||||||
|
head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>', head_content)
|
||||||
|
|
||||||
|
# get & clean HTML <BODY>-data
|
||||||
|
body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
|
||||||
|
ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
|
||||||
|
ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
|
||||||
|
ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ebook_content)
|
||||||
|
|
||||||
|
# generate link to next page
|
||||||
|
if item.spine_position+1 < len(oeb_book.spine):
|
||||||
|
nextLink = oeb_book.spine[item.spine_position+1].href
|
||||||
|
nextLink = relpath(abspath(nextLink), dir)
|
||||||
|
else:
|
||||||
|
nextLink = None
|
||||||
|
|
||||||
|
# generate link to previous page
|
||||||
|
if item.spine_position > 0:
|
||||||
|
prevLink = oeb_book.spine[item.spine_position-1].href
|
||||||
|
prevLink = relpath(abspath(prevLink), dir)
|
||||||
|
else:
|
||||||
|
prevLink = None
|
||||||
|
|
||||||
|
cssLink = relpath(abspath(css_path), dir)
|
||||||
|
tocUrl = relpath(output_file, dir)
|
||||||
|
firstContentPageLink = oeb_book.spine[0].href
|
||||||
|
|
||||||
|
# render template
|
||||||
|
templite = Templite(template_html_data)
|
||||||
|
toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
|
||||||
|
t = templite.render(ebookContent=ebook_content,
|
||||||
|
prevLink=prevLink, nextLink=nextLink,
|
||||||
|
has_toc=bool(oeb_book.toc.count()), toc=toc,
|
||||||
|
tocUrl=tocUrl, head_content=head_content,
|
||||||
|
meta=meta, cssLink=cssLink,
|
||||||
|
firstContentPageLink=firstContentPageLink)
|
||||||
|
|
||||||
|
# write html to file
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(t)
|
||||||
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
|
zfile = ZipFile(output_path, "w")
|
||||||
|
zfile.add_dir(output_dir, basename(output_dir))
|
||||||
|
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
|
||||||
|
|
||||||
|
if opts.extract_to:
|
||||||
|
if os.path.exists(opts.extract_to):
|
||||||
|
shutil.rmtree(opts.extract_to)
|
||||||
|
os.makedirs(opts.extract_to)
|
||||||
|
zfile.extractall(opts.extract_to)
|
||||||
|
self.log('Zip file extracted to', opts.extract_to)
|
||||||
|
|
||||||
|
zfile.close()
|
||||||
|
|
||||||
|
# cleanup temp dir
|
||||||
|
shutil.rmtree(tempdir)
|
@ -9,11 +9,13 @@ Fetch metadata using Amazon AWS
|
|||||||
import sys, re
|
import sys, re
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
def find_asin(br, isbn):
|
def find_asin(br, isbn):
|
||||||
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
||||||
@ -47,13 +49,12 @@ def get_social_metadata(title, authors, publisher, isbn):
|
|||||||
return mi
|
return mi
|
||||||
br = browser()
|
br = browser()
|
||||||
asin = to_asin(br, isbn)
|
asin = to_asin(br, isbn)
|
||||||
if asin:
|
if asin and get_metadata(br, asin, mi):
|
||||||
if get_metadata(br, asin, mi):
|
return mi
|
||||||
return mi
|
|
||||||
from calibre.ebooks.metadata.xisbn import xisbn
|
from calibre.ebooks.metadata.xisbn import xisbn
|
||||||
for i in xisbn.get_associated_isbns(isbn):
|
for i in xisbn.get_associated_isbns(isbn):
|
||||||
asin = to_asin(br, i)
|
asin = to_asin(br, i)
|
||||||
if get_metadata(br, asin, mi):
|
if asin and get_metadata(br, asin, mi):
|
||||||
return mi
|
return mi
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
@ -70,7 +71,10 @@ def get_metadata(br, asin, mi):
|
|||||||
return False
|
return False
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
root = html.fromstring(raw)
|
try:
|
||||||
|
root = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
||||||
if ratings:
|
if ratings:
|
||||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||||
@ -95,13 +99,13 @@ def get_metadata(br, asin, mi):
|
|||||||
# remove all attributes from tags
|
# remove all attributes from tags
|
||||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||||
# Collapse whitespace
|
# Collapse whitespace
|
||||||
desc = re.sub('\n+', '\n', desc)
|
#desc = re.sub('\n+', '\n', desc)
|
||||||
desc = re.sub(' +', ' ', desc)
|
#desc = re.sub(' +', ' ', desc)
|
||||||
# Remove the notice about text referring to out of print editions
|
# Remove the notice about text referring to out of print editions
|
||||||
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
||||||
# Remove comments
|
# Remove comments
|
||||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||||
mi.comments = desc
|
mi.comments = sanitize_comments_html(desc)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -112,7 +116,7 @@ def main(args=sys.argv):
|
|||||||
print
|
print
|
||||||
|
|
||||||
# Test sophisticated comment formatting
|
# Test sophisticated comment formatting
|
||||||
print get_social_metadata('Swan Thieves', None, None, '9780316065795')
|
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
|
||||||
print
|
print
|
||||||
|
|
||||||
# Random tests
|
# Random tests
|
||||||
|
@ -109,9 +109,11 @@ class OCFZipReader(OCFReader):
|
|||||||
raise EPubException("not a ZIP .epub OCF container")
|
raise EPubException("not a ZIP .epub OCF container")
|
||||||
self.root = root
|
self.root = root
|
||||||
if self.root is None:
|
if self.root is None:
|
||||||
self.root = os.getcwdu()
|
name = getattr(stream, 'name', False)
|
||||||
if hasattr(stream, 'name'):
|
if name:
|
||||||
self.root = os.path.abspath(os.path.dirname(stream.name))
|
self.root = os.path.abspath(os.path.dirname(name))
|
||||||
|
else:
|
||||||
|
self.root = os.getcwdu()
|
||||||
super(OCFZipReader, self).__init__()
|
super(OCFZipReader, self).__init__()
|
||||||
|
|
||||||
def open(self, name, mode='r'):
|
def open(self, name, mode='r'):
|
||||||
|
@ -12,6 +12,7 @@ from calibre.utils.logging import default_log
|
|||||||
from calibre.utils.titlecase import titlecase
|
from calibre.utils.titlecase import titlecase
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
from calibre.ebooks.metadata.covers import check_for_cover
|
from calibre.ebooks.metadata.covers import check_for_cover
|
||||||
|
from calibre.utils.html2text import html2text
|
||||||
|
|
||||||
metadata_config = None
|
metadata_config = None
|
||||||
|
|
||||||
@ -48,6 +49,11 @@ class MetadataSource(Plugin): # {{{
|
|||||||
#: member.
|
#: member.
|
||||||
string_customization_help = None
|
string_customization_help = None
|
||||||
|
|
||||||
|
#: Set this to true if your plugin returns HTML markup in comments.
|
||||||
|
#: Then if the user disables HTML, calibre will automagically convert
|
||||||
|
#: the HTML to Markdown.
|
||||||
|
has_html_comments = False
|
||||||
|
|
||||||
type = _('Metadata download')
|
type = _('Metadata download')
|
||||||
|
|
||||||
def __call__(self, title, author, publisher, isbn, verbose, log=None,
|
def __call__(self, title, author, publisher, isbn, verbose, log=None,
|
||||||
@ -79,6 +85,13 @@ class MetadataSource(Plugin): # {{{
|
|||||||
mi.comments = None
|
mi.comments = None
|
||||||
if not c.get('tags', True):
|
if not c.get('tags', True):
|
||||||
mi.tags = []
|
mi.tags = []
|
||||||
|
if self.has_html_comments and mi.comments and \
|
||||||
|
c.get('textcomments', False):
|
||||||
|
try:
|
||||||
|
mi.comments = html2text(mi.comments)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
mi.comments = None
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self.exception = e
|
self.exception = e
|
||||||
@ -132,11 +145,17 @@ class MetadataSource(Plugin): # {{{
|
|||||||
setattr(w, '_'+x, cb)
|
setattr(w, '_'+x, cb)
|
||||||
cb.setChecked(c.get(x, True))
|
cb.setChecked(c.get(x, True))
|
||||||
w._layout.addWidget(cb)
|
w._layout.addWidget(cb)
|
||||||
|
|
||||||
|
cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
|
||||||
|
setattr(w, '_textcomments', cb)
|
||||||
|
cb.setChecked(c.get('textcomments', False))
|
||||||
|
w._layout.addWidget(cb)
|
||||||
|
|
||||||
return w
|
return w
|
||||||
|
|
||||||
def save_settings(self, w):
|
def save_settings(self, w):
|
||||||
dl_settings = {}
|
dl_settings = {}
|
||||||
for x in ('rating', 'tags', 'comments'):
|
for x in ('rating', 'tags', 'comments', 'textcomments'):
|
||||||
dl_settings[x] = getattr(w, '_'+x).isChecked()
|
dl_settings[x] = getattr(w, '_'+x).isChecked()
|
||||||
c = self.config_store()
|
c = self.config_store()
|
||||||
c.set(self.name, dl_settings)
|
c.set(self.name, dl_settings)
|
||||||
@ -210,6 +229,8 @@ class Amazon(MetadataSource): # {{{
|
|||||||
metadata_type = 'social'
|
metadata_type = 'social'
|
||||||
description = _('Downloads social metadata from amazon.com')
|
description = _('Downloads social metadata from amazon.com')
|
||||||
|
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
def fetch(self):
|
def fetch(self):
|
||||||
if not self.isbn:
|
if not self.isbn:
|
||||||
return
|
return
|
||||||
|
@ -182,6 +182,7 @@ class TOC(list):
|
|||||||
except:
|
except:
|
||||||
play_order = 1
|
play_order = 1
|
||||||
href = fragment = text = None
|
href = fragment = text = None
|
||||||
|
nd = dest
|
||||||
nl = nl_path(np)
|
nl = nl_path(np)
|
||||||
if nl:
|
if nl:
|
||||||
nl = nl[0]
|
nl = nl[0]
|
||||||
@ -190,17 +191,14 @@ class TOC(list):
|
|||||||
text += etree.tostring(txt, method='text',
|
text += etree.tostring(txt, method='text',
|
||||||
encoding=unicode, with_tail=False)
|
encoding=unicode, with_tail=False)
|
||||||
content = content_path(np)
|
content = content_path(np)
|
||||||
if not content or not text:
|
if content and text:
|
||||||
return
|
content = content[0]
|
||||||
content = content[0]
|
src = get_attr(content, attr='src')
|
||||||
src = get_attr(content, attr='src')
|
if src:
|
||||||
if src is None:
|
purl = urlparse(unquote(content.get('src')))
|
||||||
return
|
href, fragment = purl[2], purl[5]
|
||||||
|
nd = dest.add_item(href, fragment, text)
|
||||||
purl = urlparse(unquote(content.get('src')))
|
nd.play_order = play_order
|
||||||
href, fragment = purl[2], purl[5]
|
|
||||||
nd = dest.add_item(href, fragment, text)
|
|
||||||
nd.play_order = play_order
|
|
||||||
|
|
||||||
for c in np_path(np):
|
for c in np_path(np):
|
||||||
process_navpoint(c, nd)
|
process_navpoint(c, nd)
|
||||||
|
@ -275,7 +275,15 @@ class MobiMLizer(object):
|
|||||||
# <mbp:frame-set/> does not exist lalalala
|
# <mbp:frame-set/> does not exist lalalala
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
or style['visibility'] == 'hidden':
|
or style['visibility'] == 'hidden':
|
||||||
return
|
id_ = elem.get('id', None)
|
||||||
|
if id_:
|
||||||
|
# Keep anchors so people can use display:none
|
||||||
|
# to generate hidden TOCs
|
||||||
|
elem.clear()
|
||||||
|
elem.text = None
|
||||||
|
elem.set('id', id_)
|
||||||
|
else:
|
||||||
|
return
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
istate = copy.copy(istates[-1])
|
istate = copy.copy(istates[-1])
|
||||||
istate.rendered = False
|
istate.rendered = False
|
||||||
@ -355,11 +363,15 @@ class MobiMLizer(object):
|
|||||||
if value == getattr(self.profile, prop):
|
if value == getattr(self.profile, prop):
|
||||||
result = '100%'
|
result = '100%'
|
||||||
else:
|
else:
|
||||||
|
# Amazon's renderer does not support
|
||||||
|
# img sizes in units other than px
|
||||||
|
# See #7520 for test case
|
||||||
try:
|
try:
|
||||||
ems = int(round(float(value) / self.profile.fbase))
|
pixs = int(round(float(value) / \
|
||||||
|
(72./self.profile.dpi)))
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
result = "%dem" % ems
|
result = "%d"%pixs
|
||||||
istate.attrib[prop] = result
|
istate.attrib[prop] = result
|
||||||
elif tag == 'hr' and asfloat(style['width']) > 0:
|
elif tag == 'hr' and asfloat(style['width']) > 0:
|
||||||
prop = style['width'] / self.profile.width
|
prop = style['width'] / self.profile.width
|
||||||
@ -378,6 +390,15 @@ class MobiMLizer(object):
|
|||||||
for attr in ('rowspan', 'colspan','width','border','scope'):
|
for attr in ('rowspan', 'colspan','width','border','scope'):
|
||||||
if attr in elem.attrib:
|
if attr in elem.attrib:
|
||||||
istate.attrib[attr] = elem.attrib[attr]
|
istate.attrib[attr] = elem.attrib[attr]
|
||||||
|
if tag == 'q':
|
||||||
|
t = elem.text
|
||||||
|
if not t:
|
||||||
|
t = ''
|
||||||
|
elem.text = u'\u201c' + t
|
||||||
|
t = elem.tail
|
||||||
|
if not t:
|
||||||
|
t = ''
|
||||||
|
elem.tail = u'\u201d' + t
|
||||||
text = None
|
text = None
|
||||||
if elem.text:
|
if elem.text:
|
||||||
if istate.preserve:
|
if istate.preserve:
|
||||||
@ -406,6 +427,12 @@ class MobiMLizer(object):
|
|||||||
parent = bstate.para if bstate.inline is None else bstate.inline
|
parent = bstate.para if bstate.inline is None else bstate.inline
|
||||||
if parent is not None:
|
if parent is not None:
|
||||||
vtag = etree.SubElement(parent, XHTML(vtag))
|
vtag = etree.SubElement(parent, XHTML(vtag))
|
||||||
|
# Add anchors
|
||||||
|
for child in vbstate.body:
|
||||||
|
if child is not vbstate.para:
|
||||||
|
vtag.append(child)
|
||||||
|
else:
|
||||||
|
break
|
||||||
for child in vbstate.para:
|
for child in vbstate.para:
|
||||||
vtag.append(child)
|
vtag.append(child)
|
||||||
return
|
return
|
||||||
|
@ -221,7 +221,10 @@ class MetadataHeader(BookHeader):
|
|||||||
else:
|
else:
|
||||||
end = self.section_offset(number + 1)
|
end = self.section_offset(number + 1)
|
||||||
self.stream.seek(start)
|
self.stream.seek(start)
|
||||||
return self.stream.read(end - start)
|
try:
|
||||||
|
return self.stream.read(end - start)
|
||||||
|
except OverflowError:
|
||||||
|
return self.stream.read(os.stat(self.stream.name).st_size - start)
|
||||||
|
|
||||||
|
|
||||||
class MobiReader(object):
|
class MobiReader(object):
|
||||||
@ -398,6 +401,8 @@ class MobiReader(object):
|
|||||||
elem.getparent().remove(elem)
|
elem.getparent().remove(elem)
|
||||||
fname = self.name.encode('ascii', 'replace')
|
fname = self.name.encode('ascii', 'replace')
|
||||||
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
||||||
|
if not fname:
|
||||||
|
fname = 'dummy'
|
||||||
htmlfile = os.path.join(output_dir,
|
htmlfile = os.path.join(output_dir,
|
||||||
ascii_filename(fname) + '.html')
|
ascii_filename(fname) + '.html')
|
||||||
try:
|
try:
|
||||||
@ -564,6 +569,10 @@ class MobiReader(object):
|
|||||||
for attr in self.IMAGE_ATTRS:
|
for attr in self.IMAGE_ATTRS:
|
||||||
recindex = attrib.pop(attr, None) or recindex
|
recindex = attrib.pop(attr, None) or recindex
|
||||||
if recindex is not None:
|
if recindex is not None:
|
||||||
|
try:
|
||||||
|
recindex = '%05d'%int(recindex)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
attrib['src'] = 'images/%s.jpg' % recindex
|
attrib['src'] = 'images/%s.jpg' % recindex
|
||||||
for attr in ('width', 'height'):
|
for attr in ('width', 'height'):
|
||||||
if attr in attrib:
|
if attr in attrib:
|
||||||
|
@ -787,6 +787,8 @@ class Manifest(object):
|
|||||||
# Convert to Unicode and normalize line endings
|
# Convert to Unicode and normalize line endings
|
||||||
data = self.oeb.decode(data)
|
data = self.oeb.decode(data)
|
||||||
data = self.oeb.html_preprocessor(data)
|
data = self.oeb.html_preprocessor(data)
|
||||||
|
# There could be null bytes in data if it had � entities in it
|
||||||
|
data = data.replace('\0', '')
|
||||||
|
|
||||||
# Remove DOCTYPE declaration as it messes up parsing
|
# Remove DOCTYPE declaration as it messes up parsing
|
||||||
# In particular, it causes tostring to insert xmlns
|
# In particular, it causes tostring to insert xmlns
|
||||||
|
@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
|
|||||||
with open(path, 'wb') as f:
|
with open(path, 'wb') as f:
|
||||||
f.write(str(item))
|
f.write(str(item))
|
||||||
item.unload_data_from_memory(memory=path)
|
item.unload_data_from_memory(memory=path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -143,11 +143,17 @@ def render_jacket(mi, output_profile,
|
|||||||
if comments:
|
if comments:
|
||||||
comments = comments_to_html(comments)
|
comments = comments_to_html(comments)
|
||||||
|
|
||||||
|
try:
|
||||||
|
author = mi.format_authors()
|
||||||
|
except:
|
||||||
|
author = ''
|
||||||
|
|
||||||
def generate_html(comments):
|
def generate_html(comments):
|
||||||
args = dict(xmlns=XHTML_NS,
|
args = dict(xmlns=XHTML_NS,
|
||||||
title_str=title_str,
|
title_str=title_str,
|
||||||
css=css,
|
css=css,
|
||||||
title=title,
|
title=title,
|
||||||
|
author=author,
|
||||||
pubdate_label=_('Published'), pubdate=pubdate,
|
pubdate_label=_('Published'), pubdate=pubdate,
|
||||||
series_label=_('Series'), series=series,
|
series_label=_('Series'), series=series,
|
||||||
rating_label=_('Rating'), rating=rating,
|
rating_label=_('Rating'), rating=rating,
|
||||||
|
@ -133,7 +133,11 @@ class DetectStructure(object):
|
|||||||
|
|
||||||
|
|
||||||
def elem_to_link(self, item, elem, counter):
|
def elem_to_link(self, item, elem, counter):
|
||||||
text = xml2text(elem)
|
text = xml2text(elem).strip()
|
||||||
|
if not text:
|
||||||
|
text = elem.get('title', '')
|
||||||
|
if not text:
|
||||||
|
text = elem.get('alt', '')
|
||||||
text = text[:100].strip()
|
text = text[:100].strip()
|
||||||
id = elem.get('id', 'calibre_toc_%d'%counter)
|
id = elem.get('id', 'calibre_toc_%d'%counter)
|
||||||
elem.set('id', id)
|
elem.set('id', id)
|
||||||
|
@ -223,7 +223,6 @@ class MessageBox(QMessageBox):
|
|||||||
if default_button is not None:
|
if default_button is not None:
|
||||||
self.setDefaultButton(default_button)
|
self.setDefaultButton(default_button)
|
||||||
|
|
||||||
|
|
||||||
def copy_to_clipboard(self):
|
def copy_to_clipboard(self):
|
||||||
QApplication.clipboard().setText('%s: %s\n\n%s' %
|
QApplication.clipboard().setText('%s: %s\n\n%s' %
|
||||||
(self.title, self.msg, self.det_msg))
|
(self.title, self.msg, self.det_msg))
|
||||||
@ -715,13 +714,13 @@ def build_forms(srcdir, info=None):
|
|||||||
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
|
dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', re.DOTALL).sub(r'_("\1")', dat)
|
||||||
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
|
dat = dat.replace('_("MMM yyyy")', '"MMM yyyy"')
|
||||||
dat = pat.sub(sub, dat)
|
dat = pat.sub(sub, dat)
|
||||||
|
dat = dat.replace('from QtWebKit.QWebView import QWebView',
|
||||||
|
'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
|
||||||
|
|
||||||
if form.endswith('viewer%smain.ui'%os.sep):
|
if form.endswith('viewer%smain.ui'%os.sep):
|
||||||
info('\t\tPromoting WebView')
|
info('\t\tPromoting WebView')
|
||||||
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
||||||
dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
|
dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
|
||||||
dat = dat.replace('from QtWebKit.QWebView import QWebView',
|
|
||||||
'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
|
|
||||||
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
||||||
|
|
||||||
open(compiled_form, 'wb').write(dat)
|
open(compiled_form, 'wb').write(dat)
|
||||||
|
@ -192,14 +192,15 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
_('At least two books must be selected for merging'),
|
_('At least two books must be selected for merging'),
|
||||||
show=True)
|
show=True)
|
||||||
dest_id, src_books, src_ids = self.books_to_merge(rows)
|
dest_id, src_books, src_ids = self.books_to_merge(rows)
|
||||||
|
title = self.gui.library_view.model().db.title(dest_id, index_is_id=True)
|
||||||
if safe_merge:
|
if safe_merge:
|
||||||
if not confirm('<p>'+_(
|
if not confirm('<p>'+_(
|
||||||
'Book formats and metadata from the selected books '
|
'Book formats and metadata from the selected books '
|
||||||
'will be added to the <b>first selected book.</b> '
|
'will be added to the <b>first selected book</b> (%s). '
|
||||||
'ISBN will <i>not</i> be merged.<br><br> '
|
'ISBN will <i>not</i> be merged.<br><br> '
|
||||||
'The second and subsequently selected books will not '
|
'The second and subsequently selected books will not '
|
||||||
'be deleted or changed.<br><br>'
|
'be deleted or changed.<br><br>'
|
||||||
'Please confirm you want to proceed.')
|
'Please confirm you want to proceed.')%title
|
||||||
+'</p>', 'merge_books_safe', self.gui):
|
+'</p>', 'merge_books_safe', self.gui):
|
||||||
return
|
return
|
||||||
self.add_formats(dest_id, src_books)
|
self.add_formats(dest_id, src_books)
|
||||||
@ -207,14 +208,14 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
else:
|
else:
|
||||||
if not confirm('<p>'+_(
|
if not confirm('<p>'+_(
|
||||||
'Book formats and metadata from the selected books will be merged '
|
'Book formats and metadata from the selected books will be merged '
|
||||||
'into the <b>first selected book</b>. '
|
'into the <b>first selected book</b> (%s). '
|
||||||
'ISBN will <i>not</i> be merged.<br><br>'
|
'ISBN will <i>not</i> be merged.<br><br>'
|
||||||
'After merger the second and '
|
'After merger the second and '
|
||||||
'subsequently selected books will be <b>deleted</b>. <br><br>'
|
'subsequently selected books will be <b>deleted</b>. <br><br>'
|
||||||
'All book formats of the first selected book will be kept '
|
'All book formats of the first selected book will be kept '
|
||||||
'and any duplicate formats in the second and subsequently selected books '
|
'and any duplicate formats in the second and subsequently selected books '
|
||||||
'will be permanently <b>deleted</b> from your computer.<br><br> '
|
'will be permanently <b>deleted</b> from your computer.<br><br> '
|
||||||
'Are you <b>sure</b> you want to proceed?')
|
'Are you <b>sure</b> you want to proceed?')%title
|
||||||
+'</p>', 'merge_books', self.gui):
|
+'</p>', 'merge_books', self.gui):
|
||||||
return
|
return
|
||||||
if len(rows)>5:
|
if len(rows)>5:
|
||||||
@ -233,6 +234,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
ci = self.gui.library_view.model().index(dest_row, 0)
|
ci = self.gui.library_view.model().index(dest_row, 0)
|
||||||
if ci.isValid():
|
if ci.isValid():
|
||||||
self.gui.library_view.setCurrentIndex(ci)
|
self.gui.library_view.setCurrentIndex(ci)
|
||||||
|
self.gui.library_view.model().current_changed(ci, ci)
|
||||||
|
|
||||||
def add_formats(self, dest_id, src_books, replace=False):
|
def add_formats(self, dest_id, src_books, replace=False):
|
||||||
for src_book in src_books:
|
for src_book in src_books:
|
||||||
|
@ -35,7 +35,6 @@ class ViewAction(InterfaceAction):
|
|||||||
self.qaction.setMenu(self.view_menu)
|
self.qaction.setMenu(self.view_menu)
|
||||||
ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
|
ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
|
||||||
|
|
||||||
|
|
||||||
def location_selected(self, loc):
|
def location_selected(self, loc):
|
||||||
enabled = loc == 'library'
|
enabled = loc == 'library'
|
||||||
for action in list(self.view_menu.actions())[1:]:
|
for action in list(self.view_menu.actions())[1:]:
|
||||||
@ -134,6 +133,9 @@ class ViewAction(InterfaceAction):
|
|||||||
rows = self.gui.current_view().selectionModel().selectedRows()
|
rows = self.gui.current_view().selectionModel().selectedRows()
|
||||||
self._view_books(rows)
|
self._view_books(rows)
|
||||||
|
|
||||||
|
def view_triggered(self, index):
|
||||||
|
self._view_books([index])
|
||||||
|
|
||||||
def view_specific_book(self, index):
|
def view_specific_book(self, index):
|
||||||
self._view_books([index])
|
self._view_books([index])
|
||||||
|
|
||||||
|
@ -5,11 +5,13 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, collections
|
import os, collections, sys
|
||||||
|
from Queue import Queue
|
||||||
|
|
||||||
from PyQt4.Qt import QLabel, QPixmap, QSize, QWidget, Qt, pyqtSignal, \
|
from PyQt4.Qt import QPixmap, QSize, QWidget, Qt, pyqtSignal, \
|
||||||
QVBoxLayout, QScrollArea, QPropertyAnimation, QEasingCurve, \
|
QPropertyAnimation, QEasingCurve, QThread, QApplication, QFontInfo, \
|
||||||
QSizePolicy, QPainter, QRect, pyqtProperty
|
QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette
|
||||||
|
from PyQt4.QtWebKit import QWebView
|
||||||
|
|
||||||
from calibre import fit_image, prepare_string_for_xml
|
from calibre import fit_image, prepare_string_for_xml
|
||||||
from calibre.gui2.widgets import IMAGE_EXTENSIONS
|
from calibre.gui2.widgets import IMAGE_EXTENSIONS
|
||||||
@ -67,10 +69,7 @@ class CoverView(QWidget): # {{{
|
|||||||
|
|
||||||
def __init__(self, vertical, parent=None):
|
def __init__(self, vertical, parent=None):
|
||||||
QWidget.__init__(self, parent)
|
QWidget.__init__(self, parent)
|
||||||
self.setMaximumSize(QSize(120, 120))
|
self._current_pixmap_size = QSize(120, 120)
|
||||||
self.setMinimumSize(QSize(120 if vertical else 20, 120 if vertical else
|
|
||||||
20))
|
|
||||||
self._current_pixmap_size = self.maximumSize()
|
|
||||||
self.vertical = vertical
|
self.vertical = vertical
|
||||||
|
|
||||||
self.animation = QPropertyAnimation(self, 'current_pixmap_size', self)
|
self.animation = QPropertyAnimation(self, 'current_pixmap_size', self)
|
||||||
@ -79,8 +78,9 @@ class CoverView(QWidget): # {{{
|
|||||||
self.animation.setStartValue(QSize(0, 0))
|
self.animation.setStartValue(QSize(0, 0))
|
||||||
self.animation.valueChanged.connect(self.value_changed)
|
self.animation.valueChanged.connect(self.value_changed)
|
||||||
|
|
||||||
self.setSizePolicy(QSizePolicy.Expanding if vertical else
|
self.setSizePolicy(
|
||||||
QSizePolicy.Minimum, QSizePolicy.Expanding)
|
QSizePolicy.Expanding if vertical else QSizePolicy.Minimum,
|
||||||
|
QSizePolicy.Expanding)
|
||||||
|
|
||||||
self.default_pixmap = QPixmap(I('book.png'))
|
self.default_pixmap = QPixmap(I('book.png'))
|
||||||
self.pixmap = self.default_pixmap
|
self.pixmap = self.default_pixmap
|
||||||
@ -109,20 +109,6 @@ class CoverView(QWidget): # {{{
|
|||||||
self.current_pixmap_size = QSize(self.pwidth, self.pheight)
|
self.current_pixmap_size = QSize(self.pwidth, self.pheight)
|
||||||
self.animation.setEndValue(self.current_pixmap_size)
|
self.animation.setEndValue(self.current_pixmap_size)
|
||||||
|
|
||||||
def relayout(self, parent_size):
|
|
||||||
if self.vertical:
|
|
||||||
self.setMaximumSize(parent_size.width(),
|
|
||||||
min(int(parent_size.height()/2.),int(4/3. * parent_size.width())+1))
|
|
||||||
else:
|
|
||||||
self.setMaximumSize(1+int(3/4. * parent_size.height()),
|
|
||||||
parent_size.height())
|
|
||||||
self.resize(self.maximumSize())
|
|
||||||
self.animation.stop()
|
|
||||||
self.do_layout()
|
|
||||||
|
|
||||||
def sizeHint(self):
|
|
||||||
return self.maximumSize()
|
|
||||||
|
|
||||||
def show_data(self, data):
|
def show_data(self, data):
|
||||||
self.animation.stop()
|
self.animation.stop()
|
||||||
same_item = data.get('id', True) == self.data.get('id', False)
|
same_item = data.get('id', True) == self.data.get('id', False)
|
||||||
@ -165,70 +151,187 @@ class CoverView(QWidget): # {{{
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Book Info {{{
|
# Book Info {{{
|
||||||
class Label(QLabel):
|
|
||||||
|
|
||||||
mr = pyqtSignal(object)
|
class RenderComments(QThread):
|
||||||
|
|
||||||
|
rdone = pyqtSignal(object, object)
|
||||||
|
|
||||||
|
def __init__(self, parent):
|
||||||
|
QThread.__init__(self, parent)
|
||||||
|
self.queue = Queue()
|
||||||
|
self.start()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
rows, comments = self.queue.get()
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
import time
|
||||||
|
time.sleep(0.001)
|
||||||
|
oint = sys.getcheckinterval()
|
||||||
|
sys.setcheckinterval(5)
|
||||||
|
try:
|
||||||
|
self.rdone.emit(rows, comments_to_html(comments))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
sys.setcheckinterval(oint)
|
||||||
|
|
||||||
|
|
||||||
|
class BookInfo(QWebView):
|
||||||
|
|
||||||
link_clicked = pyqtSignal(object)
|
link_clicked = pyqtSignal(object)
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, vertical, parent=None):
|
||||||
QLabel.__init__(self)
|
QWebView.__init__(self, parent)
|
||||||
self.setTextFormat(Qt.RichText)
|
self.vertical = vertical
|
||||||
self.setText('')
|
self.renderer = RenderComments(self)
|
||||||
self.setWordWrap(True)
|
self.renderer.rdone.connect(self._show_data, type=Qt.QueuedConnection)
|
||||||
self.setAlignment(Qt.AlignTop)
|
self.page().setLinkDelegationPolicy(self.page().DelegateAllLinks)
|
||||||
self.linkActivated.connect(self.link_activated)
|
self.linkClicked.connect(self.link_activated)
|
||||||
self._link_clicked = False
|
self._link_clicked = False
|
||||||
self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)
|
|
||||||
|
|
||||||
def link_activated(self, link):
|
def link_activated(self, link):
|
||||||
self._link_clicked = True
|
self._link_clicked = True
|
||||||
link = unicode(link)
|
link = unicode(link.toString())
|
||||||
self.link_clicked.emit(link)
|
self.link_clicked.emit(link)
|
||||||
|
|
||||||
def mouseReleaseEvent(self, ev):
|
def turnoff_scrollbar(self, *args):
|
||||||
QLabel.mouseReleaseEvent(self, ev)
|
self.page().mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
|
||||||
if not self._link_clicked:
|
|
||||||
self.mr.emit(ev)
|
|
||||||
self._link_clicked = False
|
|
||||||
|
|
||||||
class BookInfo(QScrollArea):
|
|
||||||
|
|
||||||
def __init__(self, vertical, parent=None):
|
|
||||||
QScrollArea.__init__(self, parent)
|
|
||||||
self.vertical = vertical
|
|
||||||
self.setWidgetResizable(True)
|
|
||||||
self.label = Label()
|
|
||||||
self.setWidget(self.label)
|
|
||||||
self.link_clicked = self.label.link_clicked
|
|
||||||
self.mr = self.label.mr
|
|
||||||
self.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
|
|
||||||
|
|
||||||
def show_data(self, data):
|
def show_data(self, data):
|
||||||
self.label.setText('')
|
|
||||||
rows = render_rows(data)
|
rows = render_rows(data)
|
||||||
rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
|
rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
|
||||||
k, t in rows])
|
k, t in rows])
|
||||||
comments = ''
|
comments = data.get(_('Comments'), '')
|
||||||
if data.get(_('Comments'), '') not in ('', u'None'):
|
if comments and comments != u'None':
|
||||||
comments = data[_('Comments')]
|
self.renderer.queue.put((rows, comments))
|
||||||
comments = comments_to_html(comments)
|
self._show_data(rows, '')
|
||||||
|
|
||||||
|
|
||||||
|
def _show_data(self, rows, comments):
|
||||||
|
f = QFontInfo(QApplication.font(self.parent())).pixelSize()
|
||||||
|
p = unicode(QApplication.palette().color(QPalette.Normal,
|
||||||
|
QPalette.Window).name())
|
||||||
|
c = unicode(QApplication.palette().color(QPalette.Normal,
|
||||||
|
QPalette.WindowText).name())
|
||||||
|
templ = u'''\
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<style type="text/css">
|
||||||
|
body, td {background-color: %s; font-size: %dpx; color: %s }
|
||||||
|
a { text-decoration: none; color: blue }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
%%s
|
||||||
|
</body>
|
||||||
|
<html>
|
||||||
|
'''%(p, f, c)
|
||||||
if self.vertical:
|
if self.vertical:
|
||||||
if comments:
|
if comments:
|
||||||
rows += u'<tr><td colspan="2">%s</td></tr>'%comments
|
rows += u'<tr><td colspan="2">%s</td></tr>'%comments
|
||||||
self.label.setText(u'<table>%s</table>'%rows)
|
self.setHtml(templ%(u'<table>%s</table>'%rows))
|
||||||
else:
|
else:
|
||||||
left_pane = u'<table>%s</table>'%rows
|
left_pane = u'<table>%s</table>'%rows
|
||||||
right_pane = u'<div>%s</div>'%comments
|
right_pane = u'<div>%s</div>'%comments
|
||||||
self.label.setText(u'<table><tr><td valign="top" '
|
self.setHtml(templ%(u'<table><tr><td valign="top" '
|
||||||
'style="padding-right:2em">%s</td><td valign="top">%s</td></tr></table>'
|
'style="padding-right:2em">%s</td><td valign="top">%s</td></tr></table>'
|
||||||
% (left_pane, right_pane))
|
% (left_pane, right_pane)))
|
||||||
|
|
||||||
|
def mouseDoubleClickEvent(self, ev):
|
||||||
|
ev.ignore()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class DetailsLayout(QLayout): # {{{
|
||||||
|
|
||||||
|
def __init__(self, vertical, parent):
|
||||||
|
QLayout.__init__(self, parent)
|
||||||
|
self.vertical = vertical
|
||||||
|
self._children = []
|
||||||
|
|
||||||
|
self.min_size = QSize(190, 200) if vertical else QSize(120, 120)
|
||||||
|
self.setContentsMargins(0, 0, 0, 0)
|
||||||
|
|
||||||
|
def minimumSize(self):
|
||||||
|
return QSize(self.min_size)
|
||||||
|
|
||||||
|
def addItem(self, child):
|
||||||
|
if len(self._children) > 2:
|
||||||
|
raise ValueError('This layout can only manage two children')
|
||||||
|
self._children.append(child)
|
||||||
|
|
||||||
|
def itemAt(self, i):
|
||||||
|
try:
|
||||||
|
return self._children[i]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def takeAt(self, i):
|
||||||
|
try:
|
||||||
|
self._children.pop(i)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
return len(self._children)
|
||||||
|
|
||||||
|
def sizeHint(self):
|
||||||
|
return QSize(self.min_size)
|
||||||
|
|
||||||
|
def setGeometry(self, r):
|
||||||
|
QLayout.setGeometry(self, r)
|
||||||
|
self.do_layout(r)
|
||||||
|
|
||||||
|
def cover_height(self, r):
|
||||||
|
mh = min(int(r.height()/2.), int(4/3. * r.width())+1)
|
||||||
|
try:
|
||||||
|
ph = self._children[0].widget().pixmap.height()
|
||||||
|
except:
|
||||||
|
ph = 0
|
||||||
|
if ph > 0:
|
||||||
|
mh = min(mh, ph)
|
||||||
|
return mh
|
||||||
|
|
||||||
|
def cover_width(self, r):
|
||||||
|
mw = 1 + int(3/4. * r.height())
|
||||||
|
try:
|
||||||
|
pw = self._children[0].widget().pixmap.width()
|
||||||
|
except:
|
||||||
|
pw = 0
|
||||||
|
if pw > 0:
|
||||||
|
mw = min(mw, pw)
|
||||||
|
return mw
|
||||||
|
|
||||||
|
|
||||||
|
def do_layout(self, rect):
|
||||||
|
if len(self._children) != 2:
|
||||||
|
return
|
||||||
|
left, top, right, bottom = self.getContentsMargins()
|
||||||
|
r = rect.adjusted(+left, +top, -right, -bottom)
|
||||||
|
x = r.x()
|
||||||
|
y = r.y()
|
||||||
|
cover, details = self._children
|
||||||
|
if self.vertical:
|
||||||
|
ch = self.cover_height(r)
|
||||||
|
cover.setGeometry(QRect(x, y, r.width(), ch))
|
||||||
|
cover.widget().do_layout()
|
||||||
|
y += ch + 5
|
||||||
|
details.setGeometry(QRect(x, y, r.width(), r.height()-ch-5))
|
||||||
|
else:
|
||||||
|
cw = self.cover_width(r)
|
||||||
|
cover.setGeometry(QRect(x, y, cw, r.height()))
|
||||||
|
cover.widget().do_layout()
|
||||||
|
x += cw + 5
|
||||||
|
details.setGeometry(QRect(x, y, r.width() - cw - 5, r.height()))
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class BookDetails(QWidget): # {{{
|
class BookDetails(QWidget): # {{{
|
||||||
|
|
||||||
resized = pyqtSignal(object)
|
|
||||||
show_book_info = pyqtSignal()
|
show_book_info = pyqtSignal()
|
||||||
open_containing_folder = pyqtSignal(int)
|
open_containing_folder = pyqtSignal(int)
|
||||||
view_specific_format = pyqtSignal(int, object)
|
view_specific_format = pyqtSignal(int, object)
|
||||||
@ -269,23 +372,14 @@ class BookDetails(QWidget): # {{{
|
|||||||
def __init__(self, vertical, parent=None):
|
def __init__(self, vertical, parent=None):
|
||||||
QWidget.__init__(self, parent)
|
QWidget.__init__(self, parent)
|
||||||
self.setAcceptDrops(True)
|
self.setAcceptDrops(True)
|
||||||
self._layout = QVBoxLayout()
|
self._layout = DetailsLayout(vertical, self)
|
||||||
if not vertical:
|
|
||||||
self._layout.setDirection(self._layout.LeftToRight)
|
|
||||||
self.setLayout(self._layout)
|
self.setLayout(self._layout)
|
||||||
|
|
||||||
self.cover_view = CoverView(vertical, self)
|
self.cover_view = CoverView(vertical, self)
|
||||||
self.cover_view.relayout(self.size())
|
|
||||||
self.resized.connect(self.cover_view.relayout, type=Qt.QueuedConnection)
|
|
||||||
self._layout.addWidget(self.cover_view)
|
self._layout.addWidget(self.cover_view)
|
||||||
self.book_info = BookInfo(vertical, self)
|
self.book_info = BookInfo(vertical, self)
|
||||||
self._layout.addWidget(self.book_info)
|
self._layout.addWidget(self.book_info)
|
||||||
self.book_info.link_clicked.connect(self._link_clicked)
|
self.book_info.link_clicked.connect(self._link_clicked)
|
||||||
self.book_info.mr.connect(self.mouseReleaseEvent)
|
|
||||||
if vertical:
|
|
||||||
self.setMinimumSize(QSize(190, 200))
|
|
||||||
else:
|
|
||||||
self.setMinimumSize(120, 120)
|
|
||||||
self.setCursor(Qt.PointingHandCursor)
|
self.setCursor(Qt.PointingHandCursor)
|
||||||
|
|
||||||
def _link_clicked(self, link):
|
def _link_clicked(self, link):
|
||||||
@ -299,17 +393,15 @@ class BookDetails(QWidget): # {{{
|
|||||||
open_local_file(val)
|
open_local_file(val)
|
||||||
|
|
||||||
|
|
||||||
def mouseReleaseEvent(self, ev):
|
def mouseDoubleClickEvent(self, ev):
|
||||||
ev.accept()
|
ev.accept()
|
||||||
self.show_book_info.emit()
|
self.show_book_info.emit()
|
||||||
|
|
||||||
def resizeEvent(self, ev):
|
|
||||||
self.resized.emit(self.size())
|
|
||||||
|
|
||||||
def show_data(self, data):
|
def show_data(self, data):
|
||||||
self.cover_view.show_data(data)
|
|
||||||
self.book_info.show_data(data)
|
self.book_info.show_data(data)
|
||||||
self.setToolTip('<p>'+_('Click to open Book Details window') +
|
self.cover_view.show_data(data)
|
||||||
|
self._layout.do_layout(self.rect())
|
||||||
|
self.setToolTip('<p>'+_('Double-click to open Book Details window') +
|
||||||
'<br><br>' + _('Path') + ': ' + data.get(_('Path'), ''))
|
'<br><br>' + _('Path') + ': ' + data.get(_('Path'), ''))
|
||||||
|
|
||||||
def reset_info(self):
|
def reset_info(self):
|
||||||
|
@ -23,7 +23,9 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
('generate_recently_added', True),
|
('generate_recently_added', True),
|
||||||
('note_tag','*'),
|
('note_tag','*'),
|
||||||
('numbers_as_text', False),
|
('numbers_as_text', False),
|
||||||
('read_tag','+')]
|
('read_tag','+'),
|
||||||
|
('wishlist_tag','Wishlist'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# Output synced to the connected device?
|
# Output synced to the connected device?
|
||||||
|
@ -42,28 +42,28 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QLabel" name="label_4">
|
<widget class="QLabel" name="label_4">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Additional note tag prefix:</string>
|
<string>Additional note tag prefix:</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="1">
|
<item row="3" column="1">
|
||||||
<widget class="QLineEdit" name="note_tag">
|
<widget class="QLineEdit" name="note_tag">
|
||||||
<property name="toolTip">
|
<property name="toolTip">
|
||||||
<string extracomment="Default: *"/>
|
<string extracomment="Default: *"/>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="1">
|
<item row="5" column="1">
|
||||||
<widget class="QLineEdit" name="exclude_genre">
|
<widget class="QLineEdit" name="exclude_genre">
|
||||||
<property name="toolTip">
|
<property name="toolTip">
|
||||||
<string extracomment="Default: \[[\w]*\]"/>
|
<string extracomment="Default: \[[\w]*\]"/>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0">
|
<item row="5" column="0">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Regex pattern describing tags to exclude as genres:</string>
|
<string>Regex pattern describing tags to exclude as genres:</string>
|
||||||
@ -76,7 +76,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="1">
|
<item row="6" column="1">
|
||||||
<widget class="QLabel" name="label_6">
|
<widget class="QLabel" name="label_6">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Regex tips:
|
<string>Regex tips:
|
||||||
@ -88,7 +88,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="0">
|
<item row="7" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -101,34 +101,44 @@
|
|||||||
</property>
|
</property>
|
||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
<item row="8" column="0">
|
<item row="9" column="0">
|
||||||
<widget class="QCheckBox" name="generate_titles">
|
<widget class="QCheckBox" name="generate_titles">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Include 'Titles' Section</string>
|
<string>Include 'Titles' Section</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="10" column="0">
|
<item row="11" column="0">
|
||||||
<widget class="QCheckBox" name="generate_recently_added">
|
<widget class="QCheckBox" name="generate_recently_added">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Include 'Recently Added' Section</string>
|
<string>Include 'Recently Added' Section</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="11" column="0">
|
<item row="12" column="0">
|
||||||
<widget class="QCheckBox" name="numbers_as_text">
|
<widget class="QCheckBox" name="numbers_as_text">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Sort numbers as text</string>
|
<string>Sort numbers as text</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="9" column="0">
|
<item row="10" column="0">
|
||||||
<widget class="QCheckBox" name="generate_series">
|
<widget class="QCheckBox" name="generate_series">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Include 'Series' Section</string>
|
<string>Include 'Series' Section</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="2" column="1">
|
||||||
|
<widget class="QLineEdit" name="wishlist_tag"/>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0">
|
||||||
|
<widget class="QLabel" name="label_5">
|
||||||
|
<property name="text">
|
||||||
|
<string>Wishlist tag:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
143
src/calibre/gui2/comments_editor.py
Normal file
@ -47,6 +47,8 @@ class BulkConfig(Config):
|
|||||||
self.show_pane)
|
self.show_pane)
|
||||||
self.connect(self.groups, SIGNAL('entered(QModelIndex)'),
|
self.connect(self.groups, SIGNAL('entered(QModelIndex)'),
|
||||||
self.show_group_help)
|
self.show_group_help)
|
||||||
|
rb = self.buttonBox.button(self.buttonBox.RestoreDefaults)
|
||||||
|
rb.setVisible(False)
|
||||||
self.groups.setMouseTracking(True)
|
self.groups.setMouseTracking(True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,6 +28,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne
|
|||||||
if log is None:
|
if log is None:
|
||||||
log = Log()
|
log = Log()
|
||||||
from calibre.library import db
|
from calibre.library import db
|
||||||
|
from calibre.utils.config import prefs
|
||||||
|
prefs.refresh()
|
||||||
db = db()
|
db = db()
|
||||||
db.catalog_plugin_on_device_temp_mapping = dbspec
|
db.catalog_plugin_on_device_temp_mapping = dbspec
|
||||||
|
|
||||||
|