Merge
@ -19,9 +19,93 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
# - title: "Launch of a new website that catalogues DRM free books. http://drmfree.calibre-ebook.com"
|
||||
# description: "A growing catalogue of DRM free books. Books that you actually own after buying instead of renting."
|
||||
# type: major
|
||||
- version: 0.7.48
|
||||
date: 2011-03-04
|
||||
|
||||
new features:
|
||||
- title: "Changes to the internal database structure used by calibre"
|
||||
description: >
|
||||
"These changes will allow calibre, in the future, to support book language, arbitrary book identifiers and keep track of when the metadata for a book was last modified. WARNING: Because of these changes, if you downgrade calibre versions after upgrading to 0.7.48, you will lose any changes you make to the ISBN of book entries in your calibre database, so do not downgrade unless you really have to. Also note that the first time you start calibre after this update, the startup will be slow as the database structure is being changed."
|
||||
|
||||
- title: "Launch of a new website that catalogues DRM free ebooks. http://drmfree.calibre-ebook.com"
|
||||
description: "A growing catalogue of DRM free ebooks. Ebooks that you actually own after paying, instead of just renting."
|
||||
type: major
|
||||
|
||||
- title: "News download: Add an option to keep at most x issues of a particular periodical in the calibre library. Use the Advanced tab in the Fetch news dialog for your news source to set this option."
|
||||
tickets: [9168]
|
||||
|
||||
- title: "You can now right click on the cover in the book details panel to copy/paste a new cover."
|
||||
tickets: [9255]
|
||||
|
||||
- title: "Add an entry to the add books drop down menu to easily add formats to an existing book record"
|
||||
|
||||
- title: "Tag browser: Clicking on a nested category now searches for the category alone. Clicking twice searches for the category and all its descendants and so on."
|
||||
tickets: [9166, 9169]
|
||||
|
||||
- title: "Add a button to the Manage authors dialog to copy author sort values to author"
|
||||
|
||||
- title: "Decrease startup times on large libraries by using a faster algorithm to parse stored dates"
|
||||
|
||||
- title: "Add quick create links to easily create custom columns of commonly used types to the add custom column dialog"
|
||||
|
||||
- title: "Allow drag drop of images to change cover in book details window."
|
||||
tickets: [9226]
|
||||
|
||||
- title: "Device susbsytem: Create a drive info file named driveinfo.calibre in the root of each device drive for USB connected devices. This file contains various useful data. API Change: The open method of the device plugins now accepts an extra parameter library_uuid which is the id of the calibre library connected tot eh device"
|
||||
|
||||
bug fixes:
|
||||
- title: "Conversion pipeline: Fix regression in 0.7.46 that caused loss of some CSS information when converting HTML produced by Microsoft Word. Also remove empty tags from microsoft namespaces when parsing HTML"
|
||||
|
||||
- title: "Try harder to ensure that the worker log temporary files are deleted in windows"
|
||||
|
||||
- title: "CHM Input: Handle CHM files that dont specify a topics file."
|
||||
tickets: [9253]
|
||||
|
||||
- title: "Fix regression that caused memory leak in Tag Browser. This would show up as the memory usage of calibre increasing when switching libraries."
|
||||
tickets: [9246]
|
||||
|
||||
- title: "Fix bug that caused preferences->behavior to not show the output format set by the welcome wizard, and instead default to showing EPUB"
|
||||
|
||||
- title: "Fix bug that caused wrong books to be deleted from library if you choose 'delete from library and device' while the library is sorted by the On device column"
|
||||
|
||||
- title: "MOBI Input: Ignore all ASCII control codes except CR, NL and Tab."
|
||||
tickets: [9219]
|
||||
|
||||
improved recipes:
|
||||
- Credit Slips
|
||||
- Seattle Times
|
||||
- MacWorld
|
||||
- Austin Statesman
|
||||
- EPL Talk
|
||||
- Gawker
|
||||
- Deadspin
|
||||
|
||||
new recipes:
|
||||
- title: "Thai Post Today and Daily Post"
|
||||
author: "Chotechai P."
|
||||
|
||||
- title: "RBC.ru"
|
||||
author: Chewi
|
||||
|
||||
- title: Helsingin Sanomat
|
||||
author: oneillpt
|
||||
|
||||
- title: "LWN Weekly"
|
||||
author: David Cavalca
|
||||
|
||||
- title: "New York Times Sports and Technology Blogs"
|
||||
author: rylsfan
|
||||
|
||||
- title: "Historia and Buctaras"
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: "Buffalo News"
|
||||
author: ChappyOnIce
|
||||
|
||||
- title: "Dotpod"
|
||||
author: Federico Escalada
|
||||
|
||||
|
||||
|
||||
- version: 0.7.47
|
||||
date: 2011-02-25
|
||||
@ -90,7 +174,7 @@
|
||||
author: Ricardo Jurado
|
||||
|
||||
- title: "Various Romanian news sources"
|
||||
author: Silviu Coatara
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: "Osnews.pl and SwiatCzytnikow"
|
||||
author: Tomasz Dlugosz
|
||||
|
@ -349,3 +349,9 @@ public_smtp_relay_delay = 301
|
||||
# after a restart of calibre.
|
||||
draw_hidden_section_indicators = True
|
||||
|
||||
#: The maximum width and height for covers saved in the calibre library
|
||||
# All covers in the calibre library will be resized, preserving aspect ratio,
|
||||
# to fit within this size. This is to prevent slowdowns caused by extremely
|
||||
# large covers
|
||||
maximum_cover_size = (1200, 1600)
|
||||
|
||||
|
BIN
resources/images/id_card.png
Normal file
After Width: | Height: | Size: 6.3 KiB |
BIN
resources/images/news/cotidianul.png
Normal file
After Width: | Height: | Size: 495 B |
BIN
resources/images/news/credit_slips.png
Normal file
After Width: | Height: | Size: 4.7 KiB |
BIN
resources/images/news/ele.png
Normal file
After Width: | Height: | Size: 414 B |
BIN
resources/images/news/felicia.png
Normal file
After Width: | Height: | Size: 840 B |
BIN
resources/images/news/financiarul.png
Normal file
After Width: | Height: | Size: 302 B |
BIN
resources/images/news/hitro.png
Normal file
After Width: | Height: | Size: 521 B |
BIN
resources/images/news/imperatortravel.png
Normal file
After Width: | Height: | Size: 556 B |
BIN
resources/images/news/kamikaze.png
Normal file
After Width: | Height: | Size: 262 B |
BIN
resources/images/news/kompiutierra.png
Normal file
After Width: | Height: | Size: 654 B |
BIN
resources/images/news/lwn_weekly.png
Normal file
After Width: | Height: | Size: 387 B |
BIN
resources/images/news/monden.png
Normal file
After Width: | Height: | Size: 437 B |
BIN
resources/images/news/nytimes_sports.png
Normal file
After Width: | Height: | Size: 2.1 KiB |
BIN
resources/images/news/nytimes_tech.png
Normal file
After Width: | Height: | Size: 11 KiB |
BIN
resources/images/news/promotor.png
Normal file
After Width: | Height: | Size: 728 B |
BIN
resources/images/news/rbc_ru.png
Normal file
After Width: | Height: | Size: 371 B |
BIN
resources/images/news/timesnewroman.png
Normal file
After Width: | Height: | Size: 494 B |
BIN
resources/images/news/trombon.png
Normal file
After Width: | Height: | Size: 375 B |
BIN
resources/images/news/wallstreetro.png
Normal file
After Width: | Height: | Size: 768 B |
@ -1,8 +1,8 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Todd Chapman'
|
||||
__copyright__ = 'Todd Chapman'
|
||||
__version__ = 'v0.1'
|
||||
__date__ = '26 February 2011'
|
||||
__version__ = 'v0.2'
|
||||
__date__ = '2 March 2011'
|
||||
|
||||
'''
|
||||
http://www.buffalonews.com/RSS/
|
||||
@ -12,12 +12,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1298680852(BasicNewsRecipe):
|
||||
title = u'Buffalo News'
|
||||
__author__ = 'ChappyOnIce'
|
||||
language = 'en'
|
||||
oldest_article = 2
|
||||
language = 'en'
|
||||
__author__ = 'ChappyOnIce'
|
||||
max_articles_per_feed = 20
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png'
|
||||
remove_javascript = True
|
||||
extra_css = 'body {text-align: justify;}\n \
|
||||
p {text-indent: 20px;}'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['main-content-left']})
|
||||
]
|
||||
@ -28,9 +32,7 @@ class AdvancedUserRecipe1298680852(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':['body storyContent']})
|
||||
conversion_options = {
|
||||
'base_font_size' : 14,
|
||||
}
|
||||
|
||||
feeds = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'),
|
||||
(u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'),
|
||||
(u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'),
|
||||
|
69
resources/recipes/cotidianul.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
cotidianul.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Cotidianul(BasicNewsRecipe):
|
||||
title = u'Cotidianul'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u''
|
||||
publisher = u'Cotidianul'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'titlu'})
|
||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
||||
, dict(name='div', attrs={'align':'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['space']})
|
||||
, dict(name='div', attrs={'id':['title_desc']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['space']})
|
||||
, dict(name='span', attrs={'class':['date']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,35 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = 'zotzot'
|
||||
__copyright__ = 'zotzo'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CreditSlips(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'zotzot'
|
||||
language = 'en'
|
||||
version = 1
|
||||
__author__ = 'zotzot'
|
||||
version = 2
|
||||
title = u'Credit Slips.org'
|
||||
publisher = u'Bankr-L'
|
||||
category = u'Economic blog'
|
||||
description = u'All things about credit.'
|
||||
cover_url = 'http://bit.ly/hyZSTr'
|
||||
oldest_article = 50
|
||||
description = u'A discussion on credit and bankruptcy'
|
||||
cover_url = 'http://bit.ly/eAKNCB'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
conversion_options = {
|
||||
'comments': description,
|
||||
'tags': category,
|
||||
'language': 'en',
|
||||
'publisher': publisher,
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml')
|
||||
]
|
||||
conversion_options = {
|
||||
'comments': description,
|
||||
'tags': category,
|
||||
'language': 'en',
|
||||
'publisher': publisher
|
||||
}
|
||||
(u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
|
||||
img {float: left; margin-right: 0.5em;}
|
||||
.author {font-family:Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
p {font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
body {font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
h2 = soup.find('h2')
|
||||
h2.replaceWith(h2.prettify() + '<p><em>Posted by ' + article.author + '</em></p>')
|
||||
|
@ -24,7 +24,7 @@ class Economist(BasicNewsRecipe):
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info', 'share_inline_header']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
|
@ -18,7 +18,8 @@ class Economist(BasicNewsRecipe):
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
|
49
resources/recipes/el_pais_babelia.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElPaisBabelia(BasicNewsRecipe):
|
||||
|
||||
title = 'El Pais Babelia'
|
||||
__author__ = 'oneillpt'
|
||||
description = 'El Pais Babelia'
|
||||
INDEX = 'http://www.elpais.com/suple/babelia/'
|
||||
language = 'es'
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class':'estructura_2col'})
|
||||
keep_tags = [dict(name='div', attrs={'class':'estructura_2col'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'votos estirar'}),
|
||||
dict(name='div', attrs={'id':'utilidades'}),
|
||||
dict(name='div', attrs={'class':'info_relacionada'}),
|
||||
dict(name='div', attrs={'class':'mod_apoyo'}),
|
||||
dict(name='div', attrs={'class':'contorno_f'}),
|
||||
dict(name='div', attrs={'class':'pestanias'}),
|
||||
dict(name='div', attrs={'class':'otros_webs'}),
|
||||
dict(name='div', attrs={'id':'pie'})
|
||||
]
|
||||
#no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
feeds = []
|
||||
for section in soup.findAll('div', attrs={'class':'contenedor_nuevo'}):
|
||||
section_title = self.tag_to_string(section.find('h1'))
|
||||
articles = []
|
||||
for post in section.findAll('a', href=True):
|
||||
url = post['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.elpais.es'+url
|
||||
title = self.tag_to_string(post)
|
||||
if str(post).find('class=') > 0:
|
||||
klass = post['class']
|
||||
if klass != "":
|
||||
self.log()
|
||||
self.log('--> post: ', post)
|
||||
self.log('--> url: ', url)
|
||||
self.log('--> title: ', title)
|
||||
self.log('--> class: ', klass)
|
||||
articles.append({'title':title, 'url':url})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
58
resources/recipes/ele.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
ele.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ele(BasicNewsRecipe):
|
||||
title = u'Ele'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Dezv\u0103luie ceea ce e\u015fti'
|
||||
publisher = u'Ele'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'article_title'})
|
||||
, dict(name='div', attrs={'class':'article_text'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.ele.ro/rss_must_read')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = 'zotzot'
|
||||
__copyright__ = 'zotzo'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
http://www.epltalk.com
|
||||
@ -9,10 +9,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class EPLTalkRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'The Gaffer'
|
||||
language = 'en'
|
||||
version = 1
|
||||
version = 2
|
||||
__author__ = 'rylsfan'
|
||||
|
||||
title = u'EPL Talk'
|
||||
publisher = u'The Gaffer'
|
||||
@ -21,17 +20,40 @@ class EPLTalkRecipe(BasicNewsRecipe):
|
||||
description = u'News and Analysis from the English Premier League'
|
||||
cover_url = 'http://bit.ly/hJxZPu'
|
||||
|
||||
oldest_article = 45
|
||||
max_articles_per_feed = 150
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'pd-rating'})]
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk')]
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
{'class': 'tweetmeme_button'},
|
||||
{'class': 'eplrelated'},
|
||||
{'p': 'Related posts:<ol>'},
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
feeds =[
|
||||
(u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk'),
|
||||
(u'MLS Talk', u'http://feeds.feedburner.com/majorleaguesoccertalksite'),
|
||||
#(),
|
||||
#(),
|
||||
#(),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
|
||||
img {float: left; margin-right: 0.5em;}
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
@ -1,52 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
evz.ro
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EVZ_Ro(BasicNewsRecipe):
|
||||
title = 'evz.ro'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Romania'
|
||||
publisher = 'evz.ro'
|
||||
category = 'news, politics, Romania'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
class EvenimentulZilei(BasicNewsRecipe):
|
||||
title = u'Evenimentul Zilei'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = ''
|
||||
publisher = u'Evenimentul Zilei'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
|
||||
extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
|
||||
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'single'})
|
||||
, dict(name='img', attrs={'id':'placeholder'})
|
||||
, dict(name='a', attrs={'id':'holderlink'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['form','embed','iframe','object','base','link','script','noscript'])
|
||||
,dict(attrs={'class':['section','statsInfo','email il']})
|
||||
,dict(attrs={'id' :'gallery'})
|
||||
dict(name='p', attrs={'class':['articleInfo']})
|
||||
, dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
|
||||
, dict(name='div', attrs={'id':['bannerAddoceansArticle']})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(attrs={'class':'section'})
|
||||
keep_only_tags = [dict(attrs={'class':'single'})]
|
||||
remove_attributes = ['height','width']
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.evz.ro/rss.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
return self.adeify_images(soup)
|
||||
|
48
resources/recipes/felicia.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
revistafelicia.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Felicia(BasicNewsRecipe):
|
||||
title = u'Revista Felicia'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'O revist\u0103 pentru sufletul t\u0103u'
|
||||
publisher = u'Revista Felicia'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'header'})
|
||||
, dict(name='div', attrs={'id':'contentArticol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
|
||||
, dict(name='div',attrs={'class':['content']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.revistafelicia.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
55
resources/recipes/financiarul.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
financiarul.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Financiarul(BasicNewsRecipe):
|
||||
title = u'Financiarul'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'FIN.ro'
|
||||
publisher = u'Financiarul'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'col2ContentLeftL'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['infoArticol']})
|
||||
, dict(name='ul', attrs={'class':'navSectiuni'})
|
||||
, dict(name='div', attrs={'class':'separator separatorTop'})
|
||||
, dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
|
||||
, dict(name='ul', attrs={'class':['related']})
|
||||
, dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class':['related']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.financiarul.com/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
31
resources/recipes/helsingin_sanomat.recipe
Normal file
@ -0,0 +1,31 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1298137661(BasicNewsRecipe):
|
||||
title = u'Helsingin Sanomat'
|
||||
__author__ = 'oneillpt'
|
||||
language = 'fi'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
conversion_options = {
|
||||
'linearize_tables' : True
|
||||
}
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'id':'articleCommentUrl'}),
|
||||
dict(name='p', attrs={'class':'newsSummary'}),
|
||||
dict(name='div', attrs={'class':'headerTools'})
|
||||
]
|
||||
|
||||
feeds = [(u'Uutiset - HS.fi', u'http://www.hs.fi/uutiset/rss/'), (u'Politiikka - HS.fi', u'http://www.hs.fi/politiikka/rss/'),
|
||||
(u'Ulkomaat - HS.fi', u'http://www.hs.fi/ulkomaat/rss/'), (u'Kulttuuri - HS.fi', u'http://www.hs.fi/kulttuuri/rss/'),
|
||||
(u'Kirjat - HS.fi', u'http://www.hs.fi/kulttuuri/kirjat/rss/'), (u'Elokuvat - HS.fi', u'http://www.hs.fi/kulttuuri/elokuvat/rss/')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
j = url.rfind("/")
|
||||
s = url[j:]
|
||||
i = s.rfind("?ref=rss")
|
||||
if i > 0:
|
||||
s = s[:i]
|
||||
return "http://www.hs.fi/tulosta" + s
|
43
resources/recipes/hitro.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
hit.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Hit(BasicNewsRecipe):
|
||||
title = u'HIT'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'IT'
|
||||
publisher = 'HIT'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,IT'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.hit.ro/lib/images/frontend/hit_logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'art_titl'})
|
||||
, dict(name='div', attrs={'id':'continut_articol'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.hit.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
68
resources/recipes/imperatortravel.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
imperatortravel.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Imperatortravel(BasicNewsRecipe):
|
||||
title = u'Imperator Travel'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'C\u0103l\u0103torii'
|
||||
publisher = u'Imperator Travel'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Turism,Calatorii'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article first_main_article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['meta']})
|
||||
, dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
|
||||
, dict(name='div', attrs={'class':['connect_widget']})
|
||||
, dict(name='ul', attrs={'class':['similar-posts']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class':['similar-posts']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
53
resources/recipes/kamikaze.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
kamikazeonline.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Kamikaze(BasicNewsRecipe):
|
||||
title = u'Kamikaze'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'S\u0103pt\u0103m\u00e2nal sc\u0103pat de sub control'
|
||||
publisher = 'Kamikaze'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.kamikazeonline.ro/wp-content/themes/kamikaze/images/kamikazeonline_header.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['connect_confirmation_cell connect_confirmation_cell_no_like']})
|
||||
, dict(name='h3', attrs={'id':['comments']})
|
||||
, dict(name='ul', attrs={'class':['addtoany_list']})
|
||||
, dict(name='p', attrs={'class':['postmetadata']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='p', attrs={'class':['postmetadata']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.kamikazeonline.ro/feed/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
46
resources/recipes/komchadluek.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class KomChadLuek(BasicNewsRecipe):
|
||||
|
||||
title= 'KomChadLuek'
|
||||
description = 'Komchadluek News'
|
||||
__author__ = 'ballsaii and Chotechai'
|
||||
__license__ = 'GPL v3'
|
||||
publisher= 'Nation Media Group'
|
||||
category = 'news, Thai'
|
||||
language = 'th'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
remove_javascript=True
|
||||
|
||||
cover_url = 'http://www.komchadluek.net/images_layout2/komchadluek_headerlogo.png'
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'h2'))
|
||||
keep_only_tags.append(dict(name = 'div', attrs={'id':'news_detail_news'}))
|
||||
|
||||
remove_tags_after=[dict(name='hr')]
|
||||
|
||||
feeds =(
|
||||
(u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07','http://www.komchadluek.net/rss/politic.xml'),
|
||||
(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28','http://www.komchadluek.net/rss/sport.xml'),
|
||||
(u'\u0e40\u0e01\u0e29\u0e15\u0e23','http://www.komchadluek.net/rss/agriculture.xml'),
|
||||
(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28','http://www.komchadluek.net/rss/foreign.xml'),
|
||||
(u'\u0e1a\u0e31\u0e19\u0e40\u0e17\u0e34\u0e07','http://www.komchadluek.net/rss/entertainment.xml'),
|
||||
(u'\u0e1c\u0e39\u0e49\u0e2b\u0e0d\u0e34\u0e07-\u0e41\u0e1f\u0e0a\u0e31\u0e48\u0e19','http://www.komchadluek.net/rss/fashion.xml'),
|
||||
(u'\u0e1e\u0e23\u0e30\u0e40\u0e04\u0e23\u0e37\u0e48\u0e2d\u0e07','http://www.komchadluek.net/rss/amulet.xml'),
|
||||
(u'\u0e20\u0e39\u0e21\u0e34\u0e20\u0e32\u0e04-\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e04\u0e21\u0e17\u0e49\u0e2d\u0e07\u0e16\u0e34\u0e48\u0e19','http://www.komchadluek.net/rss/local.xml'),
|
||||
(u'\u0e25\u0e38\u0e07\u0e41\u0e08\u0e48\u0e21','http://www.komchadluek.net/rss/unclecham.xml'),
|
||||
(u'\u0e44\u0e25\u0e1f\u0e4c\u0e2a\u0e44\u0e15\u0e25\u0e4c','http://www.komchadluek.net/rss/lifestyle.xml'),
|
||||
(u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08-\u0e01\u0e32\u0e23\u0e15\u0e25\u0e32\u0e14','http://www.komchadluek.net/rss/economic.xml'),
|
||||
(u'\u0e2d\u0e32\u0e2b\u0e32\u0e23','http://www.komchadluek.net/rss/food.xml'),
|
||||
(u'\u0e04\u0e19\u0e23\u0e31\u0e01\u0e1a\u0e49\u0e32\u0e19-\u0e22\u0e32\u0e19\u0e22\u0e19\u0e15\u0e4c','http://www.komchadluek.net/rss/homecar.xml'),
|
||||
(u'\u0e14\u0e39\u0e14\u0e27\u0e07-\u0e42\u0e2b\u0e23\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c','http://www.komchadluek.net/rss/horoscope.xml'),
|
||||
(u'\u0e27\u0e34\u0e17\u0e22\u0e4c\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c-\u0e44\u0e2d\u0e17\u0e35','http://www.komchadluek.net/rss/scienceit.xml'),
|
||||
(u'\u0e28\u0e32\u0e2a\u0e19\u0e32 \u0e28\u0e34\u0e25\u0e1b\u0e30-\u0e27\u0e31\u0e12\u0e19\u0e18\u0e23\u0e23\u0e21 \u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02','http://www.komchadluek.net/rss/artculture.xml'),
|
||||
(u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32', 'http://www.komchadluek.net/rss/education.xml'),
|
||||
(u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21','http://www.komchadluek.net/rss/article.xml'),
|
||||
(u'\u0e2d\u0e32\u0e0a\u0e0d\u0e32\u0e01\u0e23\u0e23\u0e21', 'http://www.komchadluek.net/rss/crime.xml')
|
||||
)
|
@ -9,28 +9,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Computerra(BasicNewsRecipe):
|
||||
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
|
||||
recursion = 50
|
||||
oldest_article = 100
|
||||
__author__ = 'Vadim Dyadkin'
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Vadim Dyadkin (edited by A. Chewi)'
|
||||
max_articles_per_feed = 50
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
simultaneous_downloads = 5
|
||||
language = 'ru'
|
||||
description = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u044b, \u043e\u043a\u043e\u043b\u043e\u043d\u0430\u0443\u0447\u043d\u044b\u0435 \u0438 \u043e\u043a\u043e\u043b\u043e\u0444\u0438\u043b\u043e\u0441\u043e\u0444\u0441\u043a\u0438\u0435 \u0441\u0442\u0430\u0442\u044c\u0438, \u0433\u0430\u0434\u0436\u0435\u0442\u044b.'
|
||||
description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
|
||||
|
||||
feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
|
||||
|
||||
feeds = [(u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430', 'http://feeds.feedburner.com/ct_news/'),]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
||||
dict(name='ul', attrs={'class': "related_post"}),
|
||||
dict(name='p', attrs={'class': 'info'}),
|
||||
dict(name='a', attrs={'rel': 'tag', 'class': 'twitter-share-button', 'type': 'button_count'}),
|
||||
dict(name='h2', attrs={}),]
|
||||
|
||||
extra_css = 'body { text-align: justify; }'
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner:origLink', article.get('guid'))
|
||||
dict(name='a', attrs={'class': 'twitter-share-button'}),
|
||||
dict(name='a', attrs={'type': 'button_count'}),
|
||||
dict(name='h2', attrs={})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=true'
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.com.ar
|
||||
'''
|
||||
@ -19,9 +19,10 @@ class Lanacion(BasicNewsRecipe):
|
||||
language = 'es_AR'
|
||||
publication_type = 'newspaper'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = """ h1{font-family: Georgia,serif}
|
||||
h2{color: #626262}
|
||||
masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = """
|
||||
h1{font-family: Georgia,serif}
|
||||
h2{color: #626262; font-weight: normal; font-size: 1.1em}
|
||||
body{font-family: Arial,sans-serif}
|
||||
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
|
||||
.notaFecha{color: #808080}
|
||||
@ -37,47 +38,78 @@ class Lanacion(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
|
||||
,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] })
|
||||
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
||||
,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
|
||||
,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
||||
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
|
||||
]
|
||||
|
||||
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
|
||||
remove_attributes = ['height','width','visible','onclick','data-count','name']
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
|
||||
,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' )
|
||||
,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' )
|
||||
,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' )
|
||||
,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' )
|
||||
,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' )
|
||||
,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' )
|
||||
,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' )
|
||||
,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' )
|
||||
,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' )
|
||||
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
|
||||
,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' )
|
||||
,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' )
|
||||
,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' )
|
||||
,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' )
|
||||
,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' )
|
||||
,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' )
|
||||
,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' )
|
||||
,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' )
|
||||
,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' )
|
||||
,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' )
|
||||
,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' )
|
||||
,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' )
|
||||
,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' )
|
||||
,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' )
|
||||
,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' )
|
||||
(u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' )
|
||||
,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
|
||||
,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
|
||||
,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
|
||||
,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
|
||||
,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
|
||||
,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
|
||||
,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
|
||||
,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
|
||||
,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
|
||||
,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
|
||||
,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
|
||||
,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
|
||||
,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
|
||||
,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
|
||||
,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
|
||||
,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
|
||||
,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
|
||||
,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
|
||||
,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
|
||||
,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
|
||||
,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
|
||||
,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
|
||||
,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
|
||||
,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
|
||||
,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.set_debug_redirects(True)
|
||||
br.set_debug_responses(True)
|
||||
br.set_debug_http(True)
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = BasicNewsRecipe.get_article_url(self,article)
|
||||
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
|
||||
return None
|
||||
return link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -3,8 +3,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1292550626(BasicNewsRecipe):
|
||||
title = 'Leduc - Wetaskiwin Pipestone Flyer'
|
||||
__author__ = 'Brian Hahn'
|
||||
description = 'News from Alberta, Canada'
|
||||
oldest_article = 56
|
||||
description = '''Provides news from central Alberta, Canada. This is a
|
||||
weekly publication that provides coverage from the Cities of Leduc and
|
||||
Wetaskiwin, including news from two complete counties, plus the towns and
|
||||
villages within. The counties of Leduc and Wetaskiwin provide news
|
||||
coverage of agriculture, sports, government, family, events and opinion.
|
||||
This publication updated weekly every Thursday.'''
|
||||
oldest_article = 13
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
@ -16,25 +21,32 @@ class AdvancedUserRecipe1292550626(BasicNewsRecipe):
|
||||
cover_url = 'http://www.pipestoneflyer.ca/images/calibre-cover.jpg'
|
||||
remove_tags_before = dict(id='ContentPanel')
|
||||
remove_tags_after = dict(id='ContentPanel')
|
||||
remove_tags = [dict(name='div', attrs={'id':'StoryNav'}),dict(name='div', attrs={'id':'BottomAds'}),dict(name='div', attrs={'id':'MoreStoryLinks'})]
|
||||
remove_tags = [dict(name='div',
|
||||
attrs={'id':'StoryNav'}),dict(name='div',
|
||||
attrs={'id':'BottomAds'}),dict(name='div', attrs={'id':'MoreStoryLinks'})]
|
||||
extra_css = 'img { margin:5px }'
|
||||
feeds = [
|
||||
('Feature', 'http://www.pipestoneflyer.ca/Feature.rss'),
|
||||
('Editors Desk', 'http://www.pipestoneflyer.ca/Editor%27s%20Desk.rss'),
|
||||
('Letters', 'http://www.pipestoneflyer.ca/Letters.rss'),
|
||||
('A Loco Viewpoint', 'http://www.pipestoneflyer.ca/A%20Loco%20Viewpoint.rss'),
|
||||
('Lifes Doorway', 'http://www.pipestoneflyer.ca/Life%27s%20Doorway.rss'),
|
||||
('From the Otherside', 'http://www.pipestoneflyer.ca/From%20the%20Otherside.rss'),
|
||||
('Opinion', 'http://www.pipestoneflyer.ca/Opinion.rss'),
|
||||
('Community', 'http://www.pipestoneflyer.ca/Community.rss'),
|
||||
('Sports', 'http://www.pipestoneflyer.ca/Sports.rss'),
|
||||
('Chambers', 'http://www.pipestoneflyer.ca/Chambers.rss'),
|
||||
('Government', 'http://www.pipestoneflyer.ca/Government.rss'),
|
||||
('Environment', 'http://www.pipestoneflyer.ca/Environment.rss'),
|
||||
('Health', 'http://www.pipestoneflyer.ca/Health.rss'),
|
||||
('Funnies', 'http://www.pipestoneflyer.ca/Funnies.rss'),
|
||||
('Faith', 'http://www.pipestoneflyer.ca/Faith.rss'),
|
||||
('News and Views', 'http://www.pipestoneflyer.ca/News%20and%20Views.rss'),
|
||||
('Obituaries', 'http://www.pipestoneflyer.ca/Obituaries.rss'),
|
||||
('Police Blotter', 'http://www.pipestoneflyer.ca/Police%20Blotter.rss'),
|
||||
]
|
||||
('Feature', 'http://www.pipestoneflyer.ca/Feature.rss'),
|
||||
('Editors Desk', 'http://www.pipestoneflyer.ca/Editor%27s%20Desk.rss'),
|
||||
('Letters', 'http://www.pipestoneflyer.ca/Letters.rss'),
|
||||
('A Loco Viewpoint',
|
||||
'http://www.pipestoneflyer.ca/A%20Loco%20Viewpoint.rss'),
|
||||
('Lifes Doorway', 'http://www.pipestoneflyer.ca/Life%27s%20Doorway.rss'),
|
||||
('From the Otherside',
|
||||
'http://www.pipestoneflyer.ca/From%20the%20Otherside.rss'),
|
||||
('Opinion', 'http://www.pipestoneflyer.ca/Opinion.rss'),
|
||||
('Community', 'http://www.pipestoneflyer.ca/Community.rss'),
|
||||
('Sports', 'http://www.pipestoneflyer.ca/Sports.rss'),
|
||||
('Chambers', 'http://www.pipestoneflyer.ca/Chambers.rss'),
|
||||
('Government', 'http://www.pipestoneflyer.ca/Government.rss'),
|
||||
('Travel ', 'http://www.pipestoneflyer.ca/Travel%20.rss'),
|
||||
('Environment', 'http://www.pipestoneflyer.ca/Environment.rss'),
|
||||
('Health', 'http://www.pipestoneflyer.ca/Health.rss'),
|
||||
('Funnies', 'http://www.pipestoneflyer.ca/Funnies.rss'),
|
||||
('Events', 'http://www.pipestoneflyer.ca/Events.rss'),
|
||||
('Faith', 'http://www.pipestoneflyer.ca/Faith.rss'),
|
||||
('News and Views', 'http://www.pipestoneflyer.ca/News%20and%20Views.rss'),
|
||||
('Obituaries', 'http://www.pipestoneflyer.ca/Obituaries.rss'),
|
||||
('Police Blotter', 'http://www.pipestoneflyer.ca/Police%20Blotter.rss'),
|
||||
('Careers', 'http://www.pipestoneflyer.ca/Careers.rss'),
|
||||
]
|
||||
|
104
resources/recipes/lwn_weekly.recipe
Normal file
@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Davide Cavalca <davide125 at tiscali.it>'
|
||||
'''
|
||||
lwn.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class WeeklyLWN(BasicNewsRecipe):
|
||||
title = 'LWN.net Weekly Edition'
|
||||
description = 'Weekly summary of what has happened in the free software world.'
|
||||
__author__ = 'Davide Cavalca'
|
||||
language = 'en'
|
||||
|
||||
cover_url = 'http://lwn.net/images/lcorner.png'
|
||||
#masthead_url = 'http://lwn.net/images/lcorner.png'
|
||||
publication_type = 'magazine'
|
||||
|
||||
remove_tags_before = dict(attrs={'class':'PageHeadline'})
|
||||
remove_tags_after = dict(attrs={'class':'ArticleText'})
|
||||
remove_tags = [dict(name=['h2', 'form'])]
|
||||
|
||||
conversion_options = { 'linearize_tables' : True }
|
||||
|
||||
oldest_article = 7.0
|
||||
needs_subscription = 'optional'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://lwn.net/login')
|
||||
br.select_form(name='loginform')
|
||||
br['Username'] = self.username
|
||||
br['Password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
if self.username is not None and self.password is not None:
|
||||
index_url = 'http://lwn.net/current/bigpage'
|
||||
else:
|
||||
index_url = 'http://lwn.net/free/bigpage'
|
||||
soup = self.index_to_soup(index_url)
|
||||
body = soup.body
|
||||
|
||||
articles = {}
|
||||
ans = []
|
||||
url_re = re.compile('^http://lwn.net/Articles/')
|
||||
|
||||
while True:
|
||||
tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
|
||||
if tag_title == None:
|
||||
break
|
||||
|
||||
tag_section = tag_title.findPrevious(name='p', attrs={'class':'Cat1HL'})
|
||||
if tag_section == None:
|
||||
section = 'Front Page'
|
||||
else:
|
||||
section = tag_section.string
|
||||
|
||||
tag_section2 = tag_title.findPrevious(name='p', attrs={'class':'Cat2HL'})
|
||||
if tag_section2 != None:
|
||||
if tag_section2.findPrevious(name='p', attrs={'class':'Cat1HL'}) == tag_section:
|
||||
section = "%s: %s" %(section, tag_section2.string)
|
||||
|
||||
if section not in articles.keys():
|
||||
articles[section] = []
|
||||
if section not in ans:
|
||||
ans.append(section)
|
||||
|
||||
body = tag_title
|
||||
while True:
|
||||
tag_url = body.findNext(name='a', attrs={'href':url_re})
|
||||
if tag_url == None:
|
||||
break
|
||||
body = tag_url
|
||||
if tag_url.string == None:
|
||||
continue
|
||||
elif tag_url.string == 'Full Story':
|
||||
break
|
||||
elif tag_url.string.startswith('Comments ('):
|
||||
break
|
||||
else:
|
||||
continue
|
||||
|
||||
if tag_url == None:
|
||||
break
|
||||
|
||||
article = dict(
|
||||
title=tag_title.string,
|
||||
url=tag_url['href'].split('#')[0],
|
||||
description='', content='', date='')
|
||||
articles[section].append(article)
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
if not ans:
|
||||
raise Exception('Could not find any articles.')
|
||||
|
||||
return ans
|
||||
|
||||
# vim: expandtab:ts=4:sw=4
|
@ -11,7 +11,6 @@ http://www.macworld.co.uk/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
@ -36,26 +35,17 @@ class macWorld(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url+'&print')
|
||||
|
||||
response = br.follow_link(url, nr = 0)
|
||||
html = response.read()
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article'})
|
||||
dict(name='div', attrs={'id':'content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['toolBar','mac_tags','toolBar btmTools','textAds']}),
|
||||
{'class':['toolBar','mac_tags','toolBar btmTools','textAds']},
|
||||
dict(name='p', attrs={'class':'breadcrumbs'}),
|
||||
dict(name='div', attrs={'id':['breadcrumb','sidebar','comments']})
|
||||
dict(id=['breadcrumb','sidebar','comments','topContentWrapper',
|
||||
'rightColumn', 'aboveFootPromo', 'storyCarousel']),
|
||||
{'class':lambda x: x and ('tools' in x or 'toolBar'
|
||||
in x)}
|
||||
|
||||
]
|
||||
|
||||
|
@ -1,7 +1,20 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Users of Kindle 3 (with limited system-level CJK support)
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn it to True if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
|
||||
folder in Kindle 3
|
||||
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||
clean up the indentation
|
||||
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||
@ -19,21 +32,17 @@ import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
class MPHKRecipe(BasicNewsRecipe):
|
||||
IsCJKWellSupported = True # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = ('Hong Kong Chinese Newspaper (http://news.mingpao.com). If'
|
||||
'you are using a Kindle with firmware < 3.1, customize the'
|
||||
'recipe')
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
@ -48,12 +57,14 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment page content
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']})
|
||||
]
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']})] # for the finance page
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
@ -61,7 +72,12 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</h1>'),
|
||||
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||
lambda match: '')
|
||||
lambda match: ''),
|
||||
# skip <br> after title in life.mingpao.com fetched article
|
||||
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "<div id='newscontent'>"),
|
||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "</b>")
|
||||
]
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
@ -129,28 +145,55 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
|
||||
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
|
||||
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - finance
|
||||
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
@ -171,15 +214,33 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
@ -201,6 +262,22 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -213,18 +290,18 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if self.IsCJKWellSupported == True:
|
||||
# use Chinese title
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
# use English title
|
||||
title = self.short_title() + ' ' + self.get_fetchformatteddate()
|
||||
if True: # force date in title
|
||||
# title += strftime(self.timefmt)
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if self.IsCJKWellSupported == True:
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
|
66
resources/recipes/monden.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
monden.info
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Monden(BasicNewsRecipe):
|
||||
title = u'Monden'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
|
||||
publisher = u'Monden'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Muzica'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['postAuthor']})
|
||||
, dict(name='div', attrs={'class':['postLike']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['postLike']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.monden.info/feed/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -14,7 +14,7 @@ class NationalGeoRo(BasicNewsRecipe):
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'S\u0103 avem grij\u0103 de planet\u0103'
|
||||
publisher = 'National Geographic'
|
||||
oldest_article = 5
|
||||
oldest_article = 35
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
@ -1,14 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf-8 -*-
|
||||
#Based on Lars Jacob's Taz Digiabo recipe
|
||||
#Based on veezh's original recipe and Kovid Goyal's New York Times recipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, veezh'
|
||||
__copyright__ = '2011, Snaab'
|
||||
|
||||
'''
|
||||
www.nrc.nl
|
||||
'''
|
||||
import os, urllib2, zipfile
|
||||
import os, zipfile
|
||||
import time
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
class NRCHandelsblad(BasicNewsRecipe):
|
||||
|
||||
title = u'NRC Handelsblad'
|
||||
description = u'De EPUB-versie van NRC'
|
||||
description = u'De ePaper-versie van NRC'
|
||||
language = 'nl'
|
||||
lang = 'nl-NL'
|
||||
needs_subscription = True
|
||||
|
||||
__author__ = 'veezh'
|
||||
__author__ = 'Snaab'
|
||||
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://login.nrc.nl/login')
|
||||
br.select_form(nr=0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def build_index(self):
|
||||
|
||||
today = time.strftime("%Y%m%d")
|
||||
|
||||
domain = "http://digitaleeditie.nrc.nl"
|
||||
|
||||
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
|
||||
# print url
|
||||
#print url
|
||||
|
||||
try:
|
||||
f = urllib2.urlopen(url)
|
||||
except urllib2.HTTPError:
|
||||
br = self.get_browser()
|
||||
f = br.open(url)
|
||||
except:
|
||||
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
|
||||
raise ValueError('Krant van vandaag nog niet beschikbaar')
|
||||
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
tmp.close()
|
||||
|
||||
f.close()
|
||||
br.close()
|
||||
if zipfile.is_zipfile(tmp):
|
||||
try:
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
|
||||
zfile.extractall(self.output_dir)
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
except zipfile.BadZipfile:
|
||||
self.report_progress(0,_('BadZip error, continuing'))
|
||||
|
||||
tmp.close()
|
||||
index = os.path.join(self.output_dir, 'content.opf')
|
||||
index = os.path.join(self.output_dir, 'metadata.opf')
|
||||
|
||||
self.report_progress(1,_('epub downloaded and extracted'))
|
||||
|
||||
|
55
resources/recipes/nytimes_sports.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = 'zotzo'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
"""
|
||||
http://fifthdown.blogs.nytimes.com/
|
||||
http://offthedribble.blogs.nytimes.com/
|
||||
http://thequad.blogs.nytimes.com/
|
||||
http://slapshot.blogs.nytimes.com/
|
||||
http://goal.blogs.nytimes.com/
|
||||
http://bats.blogs.nytimes.com/
|
||||
http://straightsets.blogs.nytimes.com/
|
||||
http://formulaone.blogs.nytimes.com/
|
||||
http://onpar.blogs.nytimes.com/
|
||||
"""
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NYTimesSports(BasicNewsRecipe):
|
||||
title = 'New York Times Sports Beat'
|
||||
language = 'en'
|
||||
__author__ = 'rylsfan'
|
||||
description = 'Indepth sports from the New York Times'
|
||||
publisher = 'The New York Times'
|
||||
category = 'Sports'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 25
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
#cover_url ='http://bit.ly/h8F4DO'
|
||||
feeds = [
|
||||
(u'The Fifth Down', u'http://fifthdown.blogs.nytimes.com/feed/'),
|
||||
(u'Off The Dribble', u'http://offthedribble.blogs.nytimes.com/feed/'),
|
||||
(u'The Quad', u'http://thequad.blogs.nytimes.com/feed/'),
|
||||
(u'Slap Shot', u'http://slapshot.blogs.nytimes.com/feed/'),
|
||||
(u'Goal', u'http://goal.blogs.nytimes.com/feed/'),
|
||||
(u'Bats', u'http://bats.blogs.nytimes.com/feed/'),
|
||||
(u'Straight Sets', u'http://straightsets.blogs.nytimes.com/feed/'),
|
||||
(u'Formula One', u'http://formulaone.blogs.nytimes.com/feed/'),
|
||||
(u'On Par', u'http://onpar.blogs.nytimes.com/feed/'),
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'header'}),
|
||||
dict(name='h1'),
|
||||
dict(name='h2'),
|
||||
dict(name='div', attrs={'class':'entry-content'})]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
46
resources/recipes/nytimes_tech.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = 'zotzo'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
"""
|
||||
http://pogue.blogs.nytimes.com/
|
||||
"""
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NYTimesTechnology(BasicNewsRecipe):
|
||||
title = 'New York Times Technology Beat'
|
||||
language = 'en'
|
||||
__author__ = 'David Pogue'
|
||||
description = 'The latest in technology from David Pogue'
|
||||
publisher = 'The New York Times'
|
||||
category = 'Technology'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 25
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
cover_url ='http://bit.ly/g0SKJT'
|
||||
feeds = [
|
||||
(u'Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
|
||||
(u'Bits', u'http://bits.blogs.nytimes.com/feed/'),
|
||||
(u'Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
|
||||
(u'Open', u'http://open.blogs.nytimes.com/feed/')
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'header'}),
|
||||
dict(name='h1'),
|
||||
dict(name='h2'),
|
||||
dict(name='div', attrs={'class':'entry-content'})]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif;
|
||||
font-weight:bold;font-size:large;}
|
||||
|
||||
h2{font-family:Arial,Helvetica,sans-serif;
|
||||
font-weight:normal;font-size:small;}
|
||||
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
21
resources/recipes/post_today.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1299061355(BasicNewsRecipe):
|
||||
title = u'Post Today'
|
||||
language = 'th'
|
||||
__author__ = "Chotechai P."
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://upload.wikimedia.org/wikipedia/th/2/2e/Posttoday_Logo.png'
|
||||
feeds = [(u'Breaking News', u'http://www.posttoday.com/rss/src/breakingnews.xml'), (u'\u0e02\u0e48\u0e32\u0e27', u'http://www.posttoday.com/rss/src/news.xml'), (u'\u0e27\u0e34\u0e40\u0e04\u0e23\u0e32\u0e30\u0e2b\u0e4c', u'http://www.posttoday.com/rss/src/analyse.xml'), (u'\u0e40\u0e21\u0e32\u0e17\u0e4c\u0e01\u0e31\u0e19\u0e43\u0e2b\u0e49 z', u'http://www.posttoday.com/rss/src/mouth.xml'), (u'\u0e44\u0e17\u0e22\u0e42\u0e0b\u0e44\u0e0b\u0e15\u0e35\u0e49', u'http://www.posttoday.com/rss/src/thaisociety.xml'), (u'\u0e44\u0e25\u0e1f\u0e4c\u0e2a\u0e44\u0e15\u0e25\u0e4c', u'http://www.posttoday.com/rss/src/lifestyle.xml'), (u'\u0e0a\u0e35\u0e49\u0e0a\u0e48\u0e2d\u0e07\u0e23\u0e27\u0e22', u'http://www.posttoday.com/rss/src/moneyguide.xml'), (u'\u0e1a\u0e49\u0e32\u0e19-\u0e04\u0e2d\u0e19\u0e42\u0e14', u'http://www.posttoday.com/rss/src/homecondo.xml'), (u'\u0e22\u0e32\u0e19\u0e22\u0e19\u0e15\u0e4c', u'http://www.posttoday.com/rss/src/motor.xml'), (u'\u0e14\u0e34\u0e08\u0e34\u0e15\u0e2d\u0e25\u0e44\u0e25\u0e1f\u0e4c', u'http://www.posttoday.com/rss/src/digitallife.xml'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://www.posttoday.com/rss/src/sport.xml'), (u'\u0e23\u0e2d\u0e1a\u0e42\u0e25\u0e01', u'http://www.posttoday.com/rss/src/world.xml'), (u'\u0e01\u0e34\u0e19-\u0e40\u0e17\u0e35\u0e48\u0e22\u0e27', u'http://www.posttoday.com/rss/src/eattravel.xml'), (u'Mind & Soul', u'http://www.posttoday.com/rss/src/mindsoul.xml'), (u'\u0e1a\u0e25\u0e47\u0e2d\u0e01 \u0e1a\u0e01.', u'http://www.posttoday.com/rss/src/blogs.xml')]
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' :
|
||||
'articleContents'}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'label'))
|
||||
remove_tags.append(dict(name = 'span'))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' :
|
||||
'socialBookmark'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' :
|
||||
'misc'}))
|
70
resources/recipes/promotor.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
promotor.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Promotor(BasicNewsRecipe):
|
||||
title = u'Promotor'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Auto-moto'
|
||||
publisher = u'Promotor'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,TV,Auto'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'casetatitluarticol'})
|
||||
, dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
|
||||
, dict(name='div', attrs={'class':'textb'})
|
||||
, dict(name='div', attrs={'class':'contentarticol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
|
||||
, dict(name='div', attrs={'class':['etichetagry']})
|
||||
, dict(name='span', attrs={'class':['textb']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['etichetagry']})
|
||||
, dict(name='span', attrs={'class':['textb']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.promotor.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
49
resources/recipes/rbc_ru.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1286819935(BasicNewsRecipe):
|
||||
title = u'RBC.ru'
|
||||
__author__ = 'A. Chewi'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
remove_attributes = ['style']
|
||||
language = 'ru'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
|
||||
keep_only_tags = [dict(name='h2', attrs={}),
|
||||
dict(name='div', attrs={'class': 'box _ga1_on_'}),
|
||||
dict(name='h1', attrs={'class': 'news_section'}),
|
||||
dict(name='div', attrs={'class': 'news_body dotted_border_bottom'}),
|
||||
dict(name='table', attrs={'class': 'newsBody'}),
|
||||
dict(name='h2', attrs={'class': 'black'})]
|
||||
|
||||
feeds = [(u'Главные новости', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/mainnews.rss'),
|
||||
(u'Политика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/politics.rss'),
|
||||
(u'Экономика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/economics.rss'),
|
||||
(u'Общество', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/society.rss'),
|
||||
(u'Происшествия', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/incidents.rss'),
|
||||
(u'Финансовые новости Quote.rbc.ru', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/quote.ru/mainnews.rss')]
|
||||
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class': "video-frame"}),
|
||||
dict(name='div', attrs={'class': "photo-container videoContainer videoSWFLinks videoPreviewSlideContainer notes"}),
|
||||
dict(name='div', attrs={'class': "notes"}),
|
||||
dict(name='div', attrs={'class': "publinks"}),
|
||||
dict(name='a', attrs={'class': "print"}),
|
||||
dict(name='div', attrs={'class': "photo-report_new notes newslider"}),
|
||||
dict(name='div', attrs={'class': "videoContainer"}),
|
||||
dict(name='div', attrs={'class': "videoPreviewSlideContainer"}),
|
||||
dict(name='a', attrs={'class': "videoPreviewContainer"}),
|
||||
dict(name='a', attrs={'class': "red"}),]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=true'
|
@ -69,12 +69,16 @@ class SeattleTimes(BasicNewsRecipe):
|
||||
u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(id='content')]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='p', attrs={'class':'permission'})
|
||||
dict(name=['object','link','script']),
|
||||
{'class':['permission', 'note', 'bottomtools',
|
||||
'homedelivery']},
|
||||
dict(id=["rightcolumn", 'footer', 'adbottom']),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url
|
||||
start_url, sep, rest_url = url.rpartition('_')
|
||||
rurl, rsep, article_id = start_url.rpartition('/')
|
||||
return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
|
||||
|
@ -10,7 +10,9 @@ class AdvancedUserRecipe1278049615(BasicNewsRecipe):
|
||||
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
|
||||
feeds = [(u'News',
|
||||
u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
|
||||
(u'Local', u'http://www.statesman.com/section-rss.do?source=local&includeSubSections=true'),
|
||||
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
|
||||
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
|
||||
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
|
||||
@ -28,8 +30,11 @@ class AdvancedUserRecipe1278049615(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables':True}
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'cxArticleOptions'}),
|
||||
{'class':['perma', 'comments', 'trail', 'share-buttons',
|
||||
'toggle_show_on']},
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'cxArticleHeader'}),
|
||||
dict(name='div', attrs={'id':'cxArticleBodyText'}),
|
||||
dict(name='div', attrs={'id':['cxArticleBodyText',
|
||||
'content']}),
|
||||
]
|
||||
|
@ -7,6 +7,7 @@ swiatczytnikow.pl
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class swiatczytnikow(BasicNewsRecipe):
|
||||
title = u'Swiat Czytnikow'
|
||||
|
17
resources/recipes/thai_post_daily.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
|
||||
title = u'Thai Post Daily'
|
||||
__author__ = 'Chotechai P.'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
|
||||
feeds = [(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace(url, 'http://www.thaipost.net/print/' + url [32:])
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-logo'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-site_name'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-breadcrumb'}))
|
52
resources/recipes/timesnewroman.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
timesnewroman.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TimesNewRoman(BasicNewsRecipe):
|
||||
title = u'Times New Roman'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Cotidian independent de umor voluntar'
|
||||
publisher = u'Times New Roman'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Fun'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'page'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['articleinfo']})
|
||||
, dict(name='div',attrs={'class':['vergefacebooklike']})
|
||||
, dict(name='div', attrs={'class':'cleared'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'cleared'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
51
resources/recipes/trombon.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
trombon.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Trombon(BasicNewsRecipe):
|
||||
title = u'Trombon'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Parodii si Pamflete'
|
||||
publisher = u'Trombon'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Fun'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.trombon.ro/i/trombon.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'articol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['info_2']})
|
||||
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'article_vote'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/trombon/ABWb?format=xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
54
resources/recipes/wallstreetro.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
wall-street.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class WallStreetRo(BasicNewsRecipe):
|
||||
title = u'Wall Street'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = ''
|
||||
publisher = 'Wall Street'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article_header'})
|
||||
, dict(name='div', attrs={'class':'article_text'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['page_breadcrumbs']})
|
||||
, dict(name='div', attrs={'id':['article_user_toolbox']})
|
||||
, dict(name='p', attrs={'class':['comments_count_container']})
|
||||
, dict(name='div', attrs={'class':['article_left_column']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'clearfloat'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -5,8 +5,9 @@
|
||||
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
|
||||
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
|
||||
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
|
||||
"select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
|
||||
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
|
||||
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
|
||||
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
|
||||
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
|
||||
"shorten": "def evaluate(self, formatter, kwargs, mi, locals,\n val, leading, center_string, trailing):\n l = max(0, int(leading))\n t = max(0, int(trailing))\n if len(val) > l + len(center_string) + t:\n return val[0:l] + center_string + ('' if t == 0 else val[-t:])\n else:\n return val\n",
|
||||
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val)\n",
|
||||
@ -19,11 +20,13 @@
|
||||
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
|
||||
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
|
||||
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",
|
||||
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
|
||||
"format_date": "def evaluate(self, formatter, kwargs, mi, locals, val, format_string):\n if not val:\n return ''\n try:\n dt = parse_date(val)\n s = format_date(dt, format_string)\n except:\n s = 'BAD DATE'\n return s\n",
|
||||
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
|
||||
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
|
||||
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
|
||||
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
|
||||
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
|
||||
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
|
||||
"raw_field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return unicode(getattr(mi, name, None))\n",
|
||||
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x else 0)\n y = float(y if y else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
|
||||
}
|
@ -4,6 +4,7 @@
|
||||
# #
|
||||
# #
|
||||
# copyright 2002 Paul Henry Tremblay #
|
||||
# Copyright 2011 Kovid Goyal
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, #
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||
@ -293,6 +294,23 @@
|
||||
<h3>Annotation</h3>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
<!-- tables -->
|
||||
<xsl:template match="fb:table">
|
||||
<table>
|
||||
<xsl:apply-templates/>
|
||||
</table>
|
||||
</xsl:template>
|
||||
<xsl:template match="fb:tr">
|
||||
<tr><xsl:apply-templates/></tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="fb:td">
|
||||
<xsl:element name="td">
|
||||
<xsl:if test="@align">
|
||||
<xsl:attribute name="align"><xsl:value-of select="@align"/></xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<!-- epigraph -->
|
||||
<xsl:template match="fb:epigraph">
|
||||
<blockquote class="epigraph">
|
||||
|
@ -68,6 +68,10 @@ if isosx:
|
||||
|
||||
extensions = [
|
||||
|
||||
Extension('speedup',
|
||||
['calibre/utils/speedup.c'],
|
||||
),
|
||||
|
||||
Extension('icu',
|
||||
['calibre/utils/icu.c'],
|
||||
libraries=icu_libs,
|
||||
|
@ -61,8 +61,9 @@ def osx_version():
|
||||
if m:
|
||||
return int(m.group(1)), int(m.group(2)), int(m.group(3))
|
||||
|
||||
|
||||
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
|
||||
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
|
||||
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
|
||||
|
||||
def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
'''
|
||||
@ -83,8 +84,35 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
one = one.decode(filesystem_encoding)
|
||||
one = one.replace('..', substitute)
|
||||
# Windows doesn't like path components that end with a period
|
||||
if one.endswith('.'):
|
||||
if one and one[-1] in ('.', ' '):
|
||||
one = one[:-1]+'_'
|
||||
# Names starting with a period are hidden on Unix
|
||||
if one.startswith('.'):
|
||||
one = '_' + one[1:]
|
||||
return one
|
||||
|
||||
def sanitize_file_name_unicode(name, substitute='_'):
|
||||
'''
|
||||
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
|
||||
The set of invalid characters is the union of the invalid characters in Windows,
|
||||
OS X and Linux. Also removes leading and trailing whitespace.
|
||||
**WARNING:** This function also replaces path separators, so only pass file names
|
||||
and not full paths to it.
|
||||
'''
|
||||
if not isinstance(name, unicode):
|
||||
return sanitize_file_name(name, substitute=substitute, as_unicode=True)
|
||||
chars = [substitute if c in _filename_sanitize_unicode else c for c in
|
||||
name]
|
||||
one = u''.join(chars)
|
||||
one = re.sub(r'\s', ' ', one).strip()
|
||||
one = re.sub(r'^\.+$', '_', one)
|
||||
one = one.replace('..', substitute)
|
||||
# Windows doesn't like path components that end with a period or space
|
||||
if one and one[-1] in ('.', ' '):
|
||||
one = one[:-1]+'_'
|
||||
# Names starting with a period are hidden on Unix
|
||||
if one.startswith('.'):
|
||||
one = '_' + one[1:]
|
||||
return one
|
||||
|
||||
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.47'
|
||||
__version__ = '0.7.48'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
@ -69,6 +69,7 @@ if plugins is None:
|
||||
'chmlib',
|
||||
'chm_extra',
|
||||
'icu',
|
||||
'speedup',
|
||||
] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
|
@ -57,7 +57,7 @@ class ANDROID(USBMS):
|
||||
0x413c : { 0xb007 : [0x0100, 0x0224]},
|
||||
|
||||
# LG
|
||||
0x1004 : { 0x61cc : [0x100] },
|
||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
|
||||
|
||||
# Archos
|
||||
0x0e79 : {
|
||||
@ -78,6 +78,9 @@ class ANDROID(USBMS):
|
||||
# Xperia
|
||||
0x13d3 : { 0x3304 : [0x0001, 0x0002] },
|
||||
|
||||
# CREEL?? Also Nextbook
|
||||
0x5e3 : { 0x726 : [0x222] },
|
||||
|
||||
}
|
||||
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||
|
@ -701,7 +701,7 @@ class ITUNES(DriverBase):
|
||||
self.log.info("ITUNES.get_file(): exporting '%s'" % path)
|
||||
outfile.write(open(self.cached_books[path]['lib_book'].location().path).read())
|
||||
|
||||
def open(self):
|
||||
def open(self, library_uuid):
|
||||
'''
|
||||
Perform any device specific initialization. Called after the device is
|
||||
detected but before any other functions that communicate with the device.
|
||||
@ -2512,7 +2512,12 @@ class ITUNES(DriverBase):
|
||||
# Refresh epub metadata
|
||||
with open(fpath,'r+b') as zfo:
|
||||
# Touch the OPF timestamp
|
||||
try:
|
||||
zf_opf = ZipFile(fpath,'r')
|
||||
except:
|
||||
raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
|
||||
None,
|
||||
level=UserFeedback.WARN)
|
||||
fnames = zf_opf.namelist()
|
||||
opf = [x for x in fnames if '.opf' in x][0]
|
||||
if opf:
|
||||
|
@ -61,7 +61,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin):
|
||||
detected_device=None) :
|
||||
self.open()
|
||||
|
||||
def open(self):
|
||||
def open(self, library_uuid):
|
||||
# Make sure the Bambook library is ready
|
||||
if not is_bambook_lib_ready():
|
||||
raise OpenFeedback(_("Unable to connect to Bambook, you need to install Bambook library first."))
|
||||
|
@ -47,6 +47,7 @@ class FOLDER_DEVICE(USBMS):
|
||||
#: Icon for this device
|
||||
icon = I('devices/folder.png')
|
||||
METADATA_CACHE = '.metadata.calibre'
|
||||
DRIVEINFO = '.driveinfo.calibre'
|
||||
|
||||
_main_prefix = ''
|
||||
_card_a_prefix = None
|
||||
@ -77,7 +78,8 @@ class FOLDER_DEVICE(USBMS):
|
||||
only_presence=False):
|
||||
return self.is_connected, self
|
||||
|
||||
def open(self):
|
||||
def open(self, library_uuid):
|
||||
self.current_library_uuid = library_uuid
|
||||
if not self._main_prefix:
|
||||
return False
|
||||
return True
|
||||
|
@ -116,6 +116,7 @@ class BOOX(HANLINV3):
|
||||
author = 'Jesus Manuel Marinho Valcarce'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
METADATA_CACHE = '.metadata.calibre'
|
||||
DRIVEINFO = '.driveinfo.calibre'
|
||||
|
||||
# Ordered list of supported formats
|
||||
FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
|
||||
|
@ -215,7 +215,7 @@ class DevicePlugin(Plugin):
|
||||
|
||||
return True
|
||||
|
||||
def open(self):
|
||||
def open(self, library_uuid):
|
||||
'''
|
||||
Perform any device specific initialization. Called after the device is
|
||||
detected but before any other functions that communicate with the device.
|
||||
@ -260,6 +260,8 @@ class DevicePlugin(Plugin):
|
||||
Ask device for device information. See L{DeviceInfoQuery}.
|
||||
|
||||
:return: (device name, device version, software version on device, mime type)
|
||||
The tuple can optionally have a fifth element, which is a
|
||||
drive information diction. See usbms.driver for an example.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
@ -447,6 +449,15 @@ class DevicePlugin(Plugin):
|
||||
'''
|
||||
pass
|
||||
|
||||
def set_driveinfo_name(self, location_code, name):
|
||||
'''
|
||||
Set the device name in the driveinfo file to 'name'. This setting will
|
||||
persist until the file is re-created or the name is changed again.
|
||||
|
||||
Non-disk devices will ignore this request.
|
||||
'''
|
||||
pass
|
||||
|
||||
class BookList(list):
|
||||
'''
|
||||
A list of books. Each Book object must have the fields
|
||||
|
@ -272,6 +272,7 @@ class NEXTBOOK(USBMS):
|
||||
VENDOR_NAME = 'NEXT2'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14'
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
THUMBNAIL_HEIGHT = 120
|
||||
|
||||
'''
|
||||
def upload_cover(self, path, filename, metadata, filepath):
|
||||
|
@ -213,7 +213,7 @@ def main():
|
||||
|
||||
for d in connected_devices:
|
||||
try:
|
||||
d.open()
|
||||
d.open(None)
|
||||
except:
|
||||
continue
|
||||
else:
|
||||
|
@ -240,7 +240,7 @@ class PRS500(DeviceConfig, DevicePlugin):
|
||||
def set_progress_reporter(self, report_progress):
|
||||
self.report_progress = report_progress
|
||||
|
||||
def open(self) :
|
||||
def open(self, library_uuid) :
|
||||
"""
|
||||
Claim an interface on the device for communication.
|
||||
Requires write privileges to the device file.
|
||||
|
@ -153,9 +153,6 @@ class PRS505(USBMS):
|
||||
# updated on every connect
|
||||
self.WANTS_UPDATED_THUMBNAILS = self.settings().extra_customization[2]
|
||||
|
||||
def get_device_information(self, end_session=True):
|
||||
return (self.gui_name, '', '', '')
|
||||
|
||||
def filename_callback(self, fname, mi):
|
||||
if getattr(mi, 'application_id', None) is not None:
|
||||
base = fname.rpartition('.')[0]
|
||||
|
@ -700,7 +700,7 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
|
||||
|
||||
|
||||
def open(self):
|
||||
def open(self, library_uuid):
|
||||
time.sleep(5)
|
||||
self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
|
||||
if islinux:
|
||||
@ -722,6 +722,7 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
time.sleep(7)
|
||||
self.open_osx()
|
||||
|
||||
self.current_library_uuid = library_uuid
|
||||
self.post_open_callback()
|
||||
|
||||
def post_open_callback(self):
|
||||
|
@ -10,17 +10,18 @@ driver. It is intended to be subclassed with the relevant parts implemented
|
||||
for a particular device.
|
||||
'''
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import os, re, time, json, uuid
|
||||
from itertools import cycle
|
||||
|
||||
from calibre.constants import numeric_version
|
||||
from calibre import prints, isbytestring
|
||||
from calibre.constants import filesystem_encoding, DEBUG
|
||||
from calibre.devices.usbms.cli import CLI
|
||||
from calibre.devices.usbms.device import Device
|
||||
from calibre.devices.usbms.books import BookList, Book
|
||||
from calibre.ebooks.metadata.book.json_codec import JsonCodec
|
||||
from calibre.utils.config import from_json, to_json
|
||||
from calibre.utils.date import now, isoformat
|
||||
|
||||
BASE_TIME = None
|
||||
def debug_print(*args):
|
||||
@ -52,10 +53,59 @@ class USBMS(CLI, Device):
|
||||
FORMATS = []
|
||||
CAN_SET_METADATA = []
|
||||
METADATA_CACHE = 'metadata.calibre'
|
||||
DRIVEINFO = 'driveinfo.calibre'
|
||||
|
||||
def _update_driveinfo_record(self, dinfo, prefix, location_code, name=None):
|
||||
if not isinstance(dinfo, dict):
|
||||
dinfo = {}
|
||||
if dinfo.get('device_store_uuid', None) is None:
|
||||
dinfo['device_store_uuid'] = unicode(uuid.uuid4())
|
||||
if dinfo.get('device_name') is None:
|
||||
dinfo['device_name'] = self.get_gui_name()
|
||||
if name is not None:
|
||||
dinfo['device_name'] = name
|
||||
dinfo['location_code'] = location_code
|
||||
dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None)
|
||||
dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version])
|
||||
dinfo['date_last_connected'] = isoformat(now())
|
||||
dinfo['prefix'] = prefix.replace('\\', '/')
|
||||
return dinfo
|
||||
|
||||
def _update_driveinfo_file(self, prefix, location_code, name=None):
|
||||
if os.path.exists(os.path.join(prefix, self.DRIVEINFO)):
|
||||
with open(os.path.join(prefix, self.DRIVEINFO), 'rb') as f:
|
||||
try:
|
||||
driveinfo = json.loads(f.read(), object_hook=from_json)
|
||||
except:
|
||||
driveinfo = None
|
||||
driveinfo = self._update_driveinfo_record(driveinfo, prefix,
|
||||
location_code, name)
|
||||
with open(os.path.join(prefix, self.DRIVEINFO), 'wb') as f:
|
||||
f.write(json.dumps(driveinfo, default=to_json))
|
||||
else:
|
||||
driveinfo = self._update_driveinfo_record({}, prefix, location_code, name)
|
||||
with open(os.path.join(prefix, self.DRIVEINFO), 'wb') as f:
|
||||
f.write(json.dumps(driveinfo, default=to_json))
|
||||
return driveinfo
|
||||
|
||||
def get_device_information(self, end_session=True):
|
||||
self.report_progress(1.0, _('Get device information...'))
|
||||
return (self.get_gui_name(), '', '', '')
|
||||
self.driveinfo = {}
|
||||
if self._main_prefix is not None:
|
||||
self.driveinfo['main'] = self._update_driveinfo_file(self._main_prefix, 'main')
|
||||
if self._card_a_prefix is not None:
|
||||
self.driveinfo['A'] = self._update_driveinfo_file(self._card_a_prefix, 'A')
|
||||
if self._card_b_prefix is not None:
|
||||
self.driveinfo['B'] = self._update_driveinfo_file(self._card_b_prefix, 'B')
|
||||
return (self.get_gui_name(), '', '', '', self.driveinfo)
|
||||
|
||||
def set_driveinfo_name(self, location_code, name):
|
||||
if location_code == 'main':
|
||||
self._update_driveinfo_file(self._main_prefix, location_code, name)
|
||||
elif location_code == 'A':
|
||||
self._update_driveinfo_file(self._card_a_prefix, location_code, name)
|
||||
elif location_code == 'B':
|
||||
self._update_driveinfo_file(self._card_b_prefix, location_code, name)
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
from calibre.ebooks.metadata.meta import path_to_ext
|
||||
|
@ -25,10 +25,10 @@ class DRMError(ValueError):
|
||||
class ParserError(ValueError):
|
||||
pass
|
||||
|
||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm',
|
||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
|
||||
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
|
||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']
|
||||
|
||||
class HTMLRenderer(object):
|
||||
|
||||
|
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
from calibre.ebooks.chm.reader import CHMReader
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log)
|
||||
rdr = CHMReader(chm_path, log, self.opts)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir)
|
||||
self._chm_reader = rdr
|
||||
@ -32,13 +32,13 @@ class CHMInput(InputFormatPlugin):
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.chm.metadata import get_metadata_from_reader
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
self.opts = options
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
@ -54,6 +54,7 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
options.input_encoding = 'utf-8'
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
|
@ -40,13 +40,14 @@ class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
def __init__(self, input, log, opts):
|
||||
CHMFile.__init__(self)
|
||||
if isinstance(input, unicode):
|
||||
input = input.encode(filesystem_encoding)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self.opts = opts
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
@ -54,6 +55,10 @@ class CHMReader(CHMFile):
|
||||
self._extracted = False
|
||||
|
||||
# location of '.hhc' file, which is the CHM TOC.
|
||||
if self.topics is None:
|
||||
self.root, ext = os.path.splitext(self.home.lstrip('/'))
|
||||
self.hhc_path = self.root + ".hhc"
|
||||
else:
|
||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||
self.hhc_path = self.root + ".hhc"
|
||||
|
||||
@ -147,6 +152,8 @@ class CHMReader(CHMFile):
|
||||
break
|
||||
|
||||
def _reformat(self, data, htmlpath):
|
||||
if self.opts.input_encoding:
|
||||
data = data.decode(self.opts.input_encoding)
|
||||
try:
|
||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||
soup = BeautifulSoup(data)
|
||||
|
@ -131,6 +131,9 @@ class PageProcessor(list): # {{{
|
||||
newsizey = int(newsizex / aspect)
|
||||
deltax = 0
|
||||
deltay = (SCRHEIGHT - newsizey) / 2
|
||||
if newsizex < 20000 and newsizey < 20000:
|
||||
# Too large and resizing fails, so better
|
||||
# to leave it as original size
|
||||
wand.size = (newsizex, newsizey)
|
||||
wand.set_border_color(pw)
|
||||
wand.add_border(pw, deltax, deltay)
|
||||
@ -152,10 +155,14 @@ class PageProcessor(list): # {{{
|
||||
newsizey = int(newsizex / aspect)
|
||||
deltax = 0
|
||||
deltay = (wscreeny - newsizey) / 2
|
||||
if newsizex < 20000 and newsizey < 20000:
|
||||
# Too large and resizing fails, so better
|
||||
# to leave it as original size
|
||||
wand.size = (newsizex, newsizey)
|
||||
wand.set_border_color(pw)
|
||||
wand.add_border(pw, deltax, deltay)
|
||||
else:
|
||||
if SCRWIDTH < 20000 and SCRHEIGHT < 20000:
|
||||
wand.size = (SCRWIDTH, SCRHEIGHT)
|
||||
|
||||
if not self.opts.dont_sharpen:
|
||||
|
@ -265,16 +265,28 @@ class CSSPreProcessor(object):
|
||||
|
||||
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
||||
# Remove some of the broken CSS Microsoft products
|
||||
# create, slightly dangerous as it removes to end of line
|
||||
# rather than semi-colon
|
||||
MS_PAT = re.compile(r'^\s*(mso-|panose-).+?$',
|
||||
re.MULTILINE|re.IGNORECASE)
|
||||
# create
|
||||
MS_PAT = re.compile(r'''
|
||||
(?P<start>^|;|\{)\s* # The end of the previous rule or block start
|
||||
(%s).+? # The invalid selectors
|
||||
(?P<end>$|;|\}) # The end of the declaration
|
||||
'''%'mso-|panose-|text-underline|tab-interval',
|
||||
re.MULTILINE|re.IGNORECASE|re.VERBOSE)
|
||||
|
||||
def ms_sub(self, match):
|
||||
end = match.group('end')
|
||||
try:
|
||||
start = match.group('start')
|
||||
except:
|
||||
start = ''
|
||||
if end == ';':
|
||||
end = ''
|
||||
return start + end
|
||||
|
||||
def __call__(self, data, add_namespace=False):
|
||||
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
|
||||
data = self.PAGE_PAT.sub('', data)
|
||||
if '\n' in data:
|
||||
data = self.MS_PAT.sub('', data)
|
||||
data = self.MS_PAT.sub(self.ms_sub, data)
|
||||
if not add_namespace:
|
||||
return data
|
||||
ans, namespaced = [], False
|
||||
|
@ -18,14 +18,14 @@ SOCIAL_METADATA_FIELDS = frozenset([
|
||||
'series_index', # A floating point number
|
||||
# Of the form { scheme1:value1, scheme2:value2}
|
||||
# For example: {'isbn':'123456789', 'doi':'xxxx', ... }
|
||||
'classifiers',
|
||||
'identifiers',
|
||||
])
|
||||
|
||||
'''
|
||||
The list of names that convert to classifiers when in get and set.
|
||||
The list of names that convert to identifiers when in get and set.
|
||||
'''
|
||||
|
||||
TOP_LEVEL_CLASSIFIERS = frozenset([
|
||||
TOP_LEVEL_IDENTIFIERS = frozenset([
|
||||
'isbn',
|
||||
])
|
||||
|
||||
@ -108,7 +108,7 @@ STANDARD_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(
|
||||
SC_FIELDS_NOT_COPIED = frozenset(['title', 'title_sort', 'authors',
|
||||
'author_sort', 'author_sort_map',
|
||||
'cover_data', 'tags', 'language',
|
||||
'classifiers'])
|
||||
'identifiers'])
|
||||
|
||||
# Metadata fields that smart update should copy only if the source is not None
|
||||
SC_FIELDS_COPY_NOT_NULL = frozenset(['lpath', 'size', 'comments', 'thumbnail'])
|
||||
|
@ -12,7 +12,7 @@ from calibre.constants import DEBUG
|
||||
from calibre.ebooks.metadata.book import SC_COPYABLE_FIELDS
|
||||
from calibre.ebooks.metadata.book import SC_FIELDS_COPY_NOT_NULL
|
||||
from calibre.ebooks.metadata.book import STANDARD_METADATA_FIELDS
|
||||
from calibre.ebooks.metadata.book import TOP_LEVEL_CLASSIFIERS
|
||||
from calibre.ebooks.metadata.book import TOP_LEVEL_IDENTIFIERS
|
||||
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.utils.date import isoformat, format_date
|
||||
@ -24,7 +24,7 @@ NULL_VALUES = {
|
||||
'user_metadata': {},
|
||||
'cover_data' : (None, None),
|
||||
'tags' : [],
|
||||
'classifiers' : {},
|
||||
'identifiers' : {},
|
||||
'languages' : [],
|
||||
'device_collections': [],
|
||||
'author_sort_map': {},
|
||||
@ -41,7 +41,7 @@ class SafeFormat(TemplateFormatter):
|
||||
def get_value(self, key, args, kwargs):
|
||||
try:
|
||||
key = key.lower()
|
||||
if key != 'title_sort':
|
||||
if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS:
|
||||
key = field_metadata.search_term_to_field_key(key)
|
||||
b = self.book.get_user_metadata(key, False)
|
||||
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
|
||||
@ -49,7 +49,7 @@ class SafeFormat(TemplateFormatter):
|
||||
elif b and b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0:
|
||||
v = ''
|
||||
else:
|
||||
ign, v = self.book.format_field(key, series_with_index=False)
|
||||
v = self.book.format_field(key, series_with_index=False)[1]
|
||||
if v is None:
|
||||
return ''
|
||||
if v == '':
|
||||
@ -96,8 +96,8 @@ class Metadata(object):
|
||||
|
||||
def __getattribute__(self, field):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
if field in TOP_LEVEL_CLASSIFIERS:
|
||||
return _data.get('classifiers').get(field, None)
|
||||
if field in TOP_LEVEL_IDENTIFIERS:
|
||||
return _data.get('identifiers').get(field, None)
|
||||
if field in STANDARD_METADATA_FIELDS:
|
||||
return _data.get(field, None)
|
||||
try:
|
||||
@ -123,11 +123,14 @@ class Metadata(object):
|
||||
|
||||
def __setattr__(self, field, val, extra=None):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
if field in TOP_LEVEL_CLASSIFIERS:
|
||||
_data['classifiers'].update({field: val})
|
||||
if field in TOP_LEVEL_IDENTIFIERS:
|
||||
field, val = self._clean_identifier(field, val)
|
||||
_data['identifiers'].update({field: val})
|
||||
elif field == 'identifiers':
|
||||
self.set_identifiers(val)
|
||||
elif field in STANDARD_METADATA_FIELDS:
|
||||
if val is None:
|
||||
val = NULL_VALUES.get(field, None)
|
||||
val = copy.copy(NULL_VALUES.get(field, None))
|
||||
_data[field] = val
|
||||
elif field in _data['user_metadata'].iterkeys():
|
||||
_data['user_metadata'][field]['#value#'] = val
|
||||
@ -176,17 +179,48 @@ class Metadata(object):
|
||||
def set(self, field, val, extra=None):
|
||||
self.__setattr__(field, val, extra)
|
||||
|
||||
def get_classifiers(self):
|
||||
def get_identifiers(self):
|
||||
'''
|
||||
Return a copy of the classifiers dictionary.
|
||||
Return a copy of the identifiers dictionary.
|
||||
The dict is small, and the penalty for using a reference where a copy is
|
||||
needed is large. Also, we don't want any manipulations of the returned
|
||||
dict to show up in the book.
|
||||
'''
|
||||
return copy.deepcopy(object.__getattribute__(self, '_data')['classifiers'])
|
||||
ans = object.__getattribute__(self,
|
||||
'_data')['identifiers']
|
||||
if not ans:
|
||||
ans = {}
|
||||
return copy.deepcopy(ans)
|
||||
|
||||
def set_classifiers(self, classifiers):
|
||||
object.__getattribute__(self, '_data')['classifiers'] = classifiers
|
||||
def _clean_identifier(self, typ, val):
|
||||
typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
|
||||
val = val.strip().replace(',', '|').replace(':', '|')
|
||||
return typ, val
|
||||
|
||||
def set_identifiers(self, identifiers):
|
||||
'''
|
||||
Set all identifiers. Note that if you previously set ISBN, calling
|
||||
this method will delete it.
|
||||
'''
|
||||
cleaned = {}
|
||||
for key, val in identifiers.iteritems():
|
||||
key, val = self._clean_identifier(key, val)
|
||||
if key and val:
|
||||
cleaned[key] = val
|
||||
object.__getattribute__(self, '_data')['identifiers'] = cleaned
|
||||
|
||||
def set_identifier(self, typ, val):
|
||||
'If val is empty, deletes identifier of type typ'
|
||||
typ, val = self._clean_identifier(typ, val)
|
||||
if not typ:
|
||||
return
|
||||
identifiers = object.__getattribute__(self,
|
||||
'_data')['identifiers']
|
||||
|
||||
if not val and typ in identifiers:
|
||||
identifiers.pop(typ)
|
||||
if val:
|
||||
identifiers[typ] = val
|
||||
|
||||
# field-oriented interface. Intended to be the same as in LibraryDatabase
|
||||
|
||||
@ -229,7 +263,7 @@ class Metadata(object):
|
||||
if v is not None:
|
||||
result[attr] = v
|
||||
# separate these because it uses the self.get(), not _data.get()
|
||||
for attr in TOP_LEVEL_CLASSIFIERS:
|
||||
for attr in TOP_LEVEL_IDENTIFIERS:
|
||||
v = self.get(attr, None)
|
||||
if v is not None:
|
||||
result[attr] = v
|
||||
@ -400,8 +434,8 @@ class Metadata(object):
|
||||
self.set_all_user_metadata(other.get_all_user_metadata(make_copy=True))
|
||||
for x in SC_FIELDS_COPY_NOT_NULL:
|
||||
copy_not_none(self, other, x)
|
||||
if callable(getattr(other, 'get_classifiers', None)):
|
||||
self.set_classifiers(other.get_classifiers())
|
||||
if callable(getattr(other, 'get_identifiers', None)):
|
||||
self.set_identifiers(other.get_identifiers())
|
||||
# language is handled below
|
||||
else:
|
||||
for attr in SC_COPYABLE_FIELDS:
|
||||
@ -456,15 +490,15 @@ class Metadata(object):
|
||||
if len(other_comments.strip()) > len(my_comments.strip()):
|
||||
self.comments = other_comments
|
||||
|
||||
# Copy all the non-none classifiers
|
||||
if callable(getattr(other, 'get_classifiers', None)):
|
||||
d = self.get_classifiers()
|
||||
s = other.get_classifiers()
|
||||
# Copy all the non-none identifiers
|
||||
if callable(getattr(other, 'get_identifiers', None)):
|
||||
d = self.get_identifiers()
|
||||
s = other.get_identifiers()
|
||||
d.update([v for v in s.iteritems() if v[1] is not None])
|
||||
self.set_classifiers(d)
|
||||
self.set_identifiers(d)
|
||||
else:
|
||||
# other structure not Metadata. Copy the top-level classifiers
|
||||
for attr in TOP_LEVEL_CLASSIFIERS:
|
||||
# other structure not Metadata. Copy the top-level identifiers
|
||||
for attr in TOP_LEVEL_IDENTIFIERS:
|
||||
copy_not_none(self, other, attr)
|
||||
|
||||
other_lang = getattr(other, 'language', None)
|
||||
@ -544,9 +578,15 @@ class Metadata(object):
|
||||
res = res/2
|
||||
return (name, unicode(res), orig_res, cmeta)
|
||||
|
||||
# convert top-level ids into their value
|
||||
if key in TOP_LEVEL_IDENTIFIERS:
|
||||
fmeta = field_metadata['identifiers']
|
||||
name = key
|
||||
res = self.get(key, None)
|
||||
return (name, res, res, fmeta)
|
||||
|
||||
# Translate aliases into the standard field name
|
||||
fmkey = field_metadata.search_term_to_field_key(key)
|
||||
|
||||
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
|
||||
res = self.get(key, None)
|
||||
fmeta = field_metadata[fmkey]
|
||||
@ -561,6 +601,8 @@ class Metadata(object):
|
||||
elif key == 'series_index':
|
||||
res = self.format_series_index(res)
|
||||
elif datatype == 'text' and fmeta['is_multiple']:
|
||||
if isinstance(res, dict):
|
||||
res = [k + ':' + v for k,v in res.items()]
|
||||
res = u', '.join(sorted(res, key=sort_key))
|
||||
elif datatype == 'series' and series_with_index:
|
||||
res = res + ' [%s]'%self.format_series_index()
|
||||
|
@ -123,6 +123,8 @@ class JsonCodec(object):
|
||||
if key == 'user_metadata':
|
||||
book.set_all_user_metadata(meta)
|
||||
else:
|
||||
if key == 'classifiers':
|
||||
key = 'identifiers'
|
||||
setattr(book, key, meta)
|
||||
booklist.append(book)
|
||||
except:
|
||||
@ -130,6 +132,8 @@ class JsonCodec(object):
|
||||
traceback.print_exc()
|
||||
|
||||
def decode_metadata(self, key, value):
|
||||
if key == 'classifiers':
|
||||
key = 'identifiers'
|
||||
if key == 'user_metadata':
|
||||
for k in value:
|
||||
if value[k]['datatype'] == 'datetime':
|
||||
|
@ -596,6 +596,9 @@ class OPF(object): # {{{
|
||||
ans = MetaInformation(self)
|
||||
for n, v in self._user_metadata_.items():
|
||||
ans.set_user_metadata(n, v)
|
||||
|
||||
ans.set_identifiers(self.get_identifiers())
|
||||
|
||||
return ans
|
||||
|
||||
def write_user_metadata(self):
|
||||
@ -855,6 +858,21 @@ class OPF(object): # {{{
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def get_identifiers(self):
|
||||
identifiers = {}
|
||||
for x in self.XPath(
|
||||
'descendant::*[local-name() = "identifier" and text()]')(
|
||||
self.metadata):
|
||||
for attr, val in x.attrib.iteritems():
|
||||
if attr.endswith('scheme'):
|
||||
typ = icu_lower(val)
|
||||
val = etree.tostring(x, with_tail=False, encoding=unicode,
|
||||
method='text').strip()
|
||||
if val and typ not in ('calibre', 'uuid'):
|
||||
identifiers[typ] = val
|
||||
break
|
||||
return identifiers
|
||||
|
||||
@dynamic_property
|
||||
def application_id(self):
|
||||
|
||||
@ -1166,8 +1184,8 @@ class OPFCreator(Metadata):
|
||||
a(DC_ELEM('description', self.comments))
|
||||
if self.publisher:
|
||||
a(DC_ELEM('publisher', self.publisher))
|
||||
if self.isbn:
|
||||
a(DC_ELEM('identifier', self.isbn, opf_attrs={'scheme':'ISBN'}))
|
||||
for key, val in self.get_identifiers().iteritems():
|
||||
a(DC_ELEM('identifier', val, opf_attrs={'scheme':icu_upper(key)}))
|
||||
if self.rights:
|
||||
a(DC_ELEM('rights', self.rights))
|
||||
if self.tags:
|
||||
@ -1291,8 +1309,8 @@ def metadata_to_opf(mi, as_string=True):
|
||||
factory(DC('description'), mi.comments)
|
||||
if mi.publisher:
|
||||
factory(DC('publisher'), mi.publisher)
|
||||
if mi.isbn:
|
||||
factory(DC('identifier'), mi.isbn, scheme='ISBN')
|
||||
for key, val in mi.get_identifiers().iteritems():
|
||||
factory(DC('identifier'), val, scheme=icu_upper(key))
|
||||
if mi.rights:
|
||||
factory(DC('rights'), mi.rights)
|
||||
factory(DC('language'), mi.language if mi.language and mi.language.lower()
|
||||
@ -1342,7 +1360,7 @@ def test_m2o():
|
||||
mi.language = 'en'
|
||||
mi.comments = 'what a fun book\n\n'
|
||||
mi.publisher = 'publisher'
|
||||
mi.isbn = 'boooo'
|
||||
mi.set_identifiers({'isbn':'booo', 'dummy':'dummy'})
|
||||
mi.tags = ['a', 'b']
|
||||
mi.series = 's"c\'l&<>'
|
||||
mi.series_index = 3.34
|
||||
@ -1350,7 +1368,7 @@ def test_m2o():
|
||||
mi.timestamp = nowf()
|
||||
mi.publication_type = 'ooooo'
|
||||
mi.rights = 'yes'
|
||||
mi.cover = 'asd.jpg'
|
||||
mi.cover = os.path.abspath('asd.jpg')
|
||||
opf = metadata_to_opf(mi)
|
||||
print opf
|
||||
newmi = MetaInformation(OPF(StringIO(opf)))
|
||||
@ -1363,6 +1381,9 @@ def test_m2o():
|
||||
o, n = getattr(mi, attr), getattr(newmi, attr)
|
||||
if o != n and o.strip() != n.strip():
|
||||
print 'FAILED:', attr, getattr(mi, attr), '!=', getattr(newmi, attr)
|
||||
if mi.get_identifiers() != newmi.get_identifiers():
|
||||
print 'FAILED:', 'identifiers', mi.get_identifiers(),
|
||||
print '!=', newmi.get_identifiers()
|
||||
|
||||
|
||||
class OPFTest(unittest.TestCase):
|
||||
@ -1378,6 +1399,7 @@ class OPFTest(unittest.TestCase):
|
||||
<creator opf:role="aut">Next</creator>
|
||||
<dc:subject>One</dc:subject><dc:subject>Two</dc:subject>
|
||||
<dc:identifier scheme="ISBN">123456789</dc:identifier>
|
||||
<dc:identifier scheme="dummy">dummy</dc:identifier>
|
||||
<meta name="calibre:series" content="A one book series" />
|
||||
<meta name="calibre:rating" content="4"/>
|
||||
<meta name="calibre:publication_type" content="test"/>
|
||||
@ -1405,6 +1427,8 @@ class OPFTest(unittest.TestCase):
|
||||
self.assertEqual(opf.rating, 4)
|
||||
self.assertEqual(opf.publication_type, 'test')
|
||||
self.assertEqual(list(opf.itermanifest())[0].get('href'), 'a ~ b')
|
||||
self.assertEqual(opf.get_identifiers(), {'isbn':'123456789',
|
||||
'dummy':'dummy'})
|
||||
|
||||
def testWriting(self):
|
||||
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
|
||||
@ -1461,5 +1485,5 @@ def test_user_metadata():
|
||||
|
||||
if __name__ == '__main__':
|
||||
#test_user_metadata()
|
||||
#test_m2o()
|
||||
test_m2o()
|
||||
test()
|
||||
|
@ -65,6 +65,7 @@ class Source(Plugin):
|
||||
parts = parts[1:] + parts[:1]
|
||||
for tok in parts:
|
||||
tok = pat.sub('', tok).strip()
|
||||
if len(tok) > 2 and tok.lower() not in ('von', ):
|
||||
yield tok
|
||||
|
||||
|
||||
|
@ -18,6 +18,7 @@ from calibre import xml_entity_to_unicode, CurrentDir, entity_to_unicode, \
|
||||
replace_entities
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
@ -323,6 +324,7 @@ class MobiReader(object):
|
||||
self.cleanup_html()
|
||||
|
||||
self.log.debug('Parsing HTML...')
|
||||
self.processed_html = clean_ascii_chars(self.processed_html)
|
||||
try:
|
||||
root = html.fromstring(self.processed_html)
|
||||
if len(root.xpath('//html')) > 5:
|
||||
|
@ -827,6 +827,24 @@ class Manifest(object):
|
||||
return None
|
||||
return etree.fromstring(data, parser=RECOVER_PARSER)
|
||||
|
||||
def clean_word_doc(self, data):
|
||||
prefixes = []
|
||||
for match in re.finditer(r'xmlns:(\S+?)=".*?microsoft.*?"', data):
|
||||
prefixes.append(match.group(1))
|
||||
if prefixes:
|
||||
self.oeb.log.warn('Found microsoft markup, cleaning...')
|
||||
# Remove empty tags as they are not rendered by browsers
|
||||
# but can become renderable HTML tags like <p/> if the
|
||||
# document is parsed by an HTML parser
|
||||
pat = re.compile(
|
||||
r'<(%s):([a-zA-Z0-9]+)[^>/]*?></\1:\2>'%('|'.join(prefixes)),
|
||||
re.DOTALL)
|
||||
data = pat.sub('', data)
|
||||
pat = re.compile(
|
||||
r'<(%s):([a-zA-Z0-9]+)[^>/]*?/>'%('|'.join(prefixes)))
|
||||
data = pat.sub('', data)
|
||||
return data
|
||||
|
||||
def _parse_xhtml(self, data):
|
||||
self.oeb.log.debug('Parsing', self.href, '...')
|
||||
# Convert to Unicode and normalize line endings
|
||||
@ -884,6 +902,10 @@ class Manifest(object):
|
||||
except etree.XMLSyntaxError:
|
||||
data = etree.fromstring(data, parser=RECOVER_PARSER)
|
||||
return data
|
||||
try:
|
||||
data = self.clean_word_doc(data)
|
||||
except:
|
||||
pass
|
||||
data = first_pass(data)
|
||||
|
||||
# Handle weird (non-HTML/fragment) files
|
||||
@ -907,6 +929,7 @@ class Manifest(object):
|
||||
parent.append(child)
|
||||
data = nroot
|
||||
|
||||
|
||||
# Force into the XHTML namespace
|
||||
if not namespace(data.tag):
|
||||
self.oeb.log.warn('Forcing', self.href, 'into XHTML namespace')
|
||||
|
@ -59,18 +59,32 @@ class OEBOutput(OutputFormatPlugin):
|
||||
def workaround_nook_cover_bug(self, root): # {{{
|
||||
cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
|
||||
' @content != "cover"]')
|
||||
|
||||
def manifest_items_with_id(id_):
|
||||
return root.xpath('//*[local-name() = "manifest"]/*[local-name() = "item" '
|
||||
' and @id="%s"]'%id_)
|
||||
|
||||
if len(cov) == 1:
|
||||
manpath = ('//*[local-name() = "manifest"]/*[local-name() = "item" '
|
||||
' and @id="%s" and @media-type]')
|
||||
cov = cov[0]
|
||||
covid = cov.get('content')
|
||||
manifest_item = root.xpath(manpath%covid)
|
||||
has_cover = root.xpath(manpath%'cover')
|
||||
if len(manifest_item) == 1 and not has_cover and \
|
||||
covid = cov.get('content', '')
|
||||
|
||||
if covid:
|
||||
manifest_item = manifest_items_with_id(covid)
|
||||
if len(manifest_item) == 1 and \
|
||||
manifest_item[0].get('media-type',
|
||||
'').startswith('image/'):
|
||||
self.log.warn('The cover image has an id != "cover". Renaming'
|
||||
' to work around Nook Color bug')
|
||||
' to work around bug in Nook Color')
|
||||
|
||||
import uuid
|
||||
newid = str(uuid.uuid4())
|
||||
|
||||
for item in manifest_items_with_id('cover'):
|
||||
item.set('id', newid)
|
||||
|
||||
for x in root.xpath('//*[@idref="cover"]'):
|
||||
x.set('idref', newid)
|
||||
|
||||
manifest_item = manifest_item[0]
|
||||
manifest_item.set('id', 'cover')
|
||||
cov.set('content', 'cover')
|
||||
|
@ -8,11 +8,7 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import os
|
||||
import itertools
|
||||
import re
|
||||
import logging
|
||||
import copy
|
||||
import os, itertools, re, logging, copy, unicodedata
|
||||
from weakref import WeakKeyDictionary
|
||||
from xml.dom import SyntaxErr as CSSSyntaxError
|
||||
import cssutils
|
||||
@ -234,8 +230,18 @@ class Stylizer(object):
|
||||
for elem in matches:
|
||||
for x in elem.iter():
|
||||
if x.text:
|
||||
span = E.span(x.text[0])
|
||||
span.tail = x.text[1:]
|
||||
punctuation_chars = []
|
||||
text = unicode(x.text)
|
||||
while text:
|
||||
if not unicodedata.category(text[0]).startswith('P'):
|
||||
break
|
||||
punctuation_chars.append(text[0])
|
||||
text = text[1:]
|
||||
|
||||
special_text = u''.join(punctuation_chars) + \
|
||||
(text[0] if text else u'')
|
||||
span = E.span(special_text)
|
||||
span.tail = text[1:]
|
||||
x.text = None
|
||||
x.insert(0, span)
|
||||
self.style(span)._update_cssdict(cssdict)
|
||||
@ -423,6 +429,7 @@ class Stylizer(object):
|
||||
|
||||
class Style(object):
|
||||
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
||||
MS_PAT = re.compile(r'^\s*(mso-|panose-|text-underline|tab-interval)')
|
||||
|
||||
def __init__(self, element, stylizer):
|
||||
self._element = element
|
||||
@ -447,6 +454,8 @@ class Style(object):
|
||||
return
|
||||
css = attrib['style'].split(';')
|
||||
css = filter(None, (x.strip() for x in css))
|
||||
css = [x.strip() for x in css]
|
||||
css = [x for x in css if self.MS_PAT.match(x) is None]
|
||||
try:
|
||||
style = CSSStyleDeclaration('; '.join(css))
|
||||
except CSSSyntaxError:
|
||||
|
@ -13,6 +13,7 @@ from urlparse import urlparse
|
||||
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
|
||||
from calibre.ebooks import ConversionError
|
||||
from calibre.utils.ordered_dict import OrderedDict
|
||||
|
||||
def XPath(x):
|
||||
try:
|
||||
@ -95,10 +96,8 @@ class DetectStructure(object):
|
||||
self.log.exception('Failed to mark chapter')
|
||||
|
||||
def create_level_based_toc(self):
|
||||
if self.opts.level1_toc is None:
|
||||
return
|
||||
for item in self.oeb.spine:
|
||||
self.add_leveled_toc_items(item)
|
||||
if self.opts.level1_toc is not None:
|
||||
self.add_leveled_toc_items()
|
||||
|
||||
def create_toc_from_chapters(self):
|
||||
counter = self.oeb.toc.next_play_order()
|
||||
@ -145,49 +144,57 @@ class DetectStructure(object):
|
||||
return text, href
|
||||
|
||||
|
||||
def add_leveled_toc_items(self, item):
|
||||
level1 = XPath(self.opts.level1_toc)(item.data)
|
||||
level1_order = []
|
||||
document = item
|
||||
|
||||
def add_leveled_toc_items(self):
|
||||
added = OrderedDict()
|
||||
added2 = OrderedDict()
|
||||
counter = 1
|
||||
if level1:
|
||||
added = {}
|
||||
for elem in level1:
|
||||
for document in self.oeb.spine:
|
||||
previous_level1 = list(added.itervalues())[-1] if added else None
|
||||
previous_level2 = list(added2.itervalues())[-1] if added2 else None
|
||||
|
||||
for elem in XPath(self.opts.level1_toc)(document.data):
|
||||
text, _href = self.elem_to_link(document, elem, counter)
|
||||
counter += 1
|
||||
if text:
|
||||
node = self.oeb.toc.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
level1_order.append(node)
|
||||
added[elem] = node
|
||||
#node.add(_('Top'), _href)
|
||||
if self.opts.level2_toc is not None:
|
||||
added2 = {}
|
||||
level2 = list(XPath(self.opts.level2_toc)(document.data))
|
||||
for elem in level2:
|
||||
|
||||
if self.opts.level2_toc is not None and added:
|
||||
for elem in XPath(self.opts.level2_toc)(document.data):
|
||||
level1 = None
|
||||
for item in document.data.iterdescendants():
|
||||
if item in added.keys():
|
||||
if item in added:
|
||||
level1 = added[item]
|
||||
elif item == elem and level1 is not None:
|
||||
elif item == elem:
|
||||
if level1 is None:
|
||||
if previous_level1 is None:
|
||||
break
|
||||
level1 = previous_level1
|
||||
text, _href = self.elem_to_link(document, elem, counter)
|
||||
counter += 1
|
||||
if text:
|
||||
added2[elem] = level1.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
if self.opts.level3_toc is not None:
|
||||
level3 = list(XPath(self.opts.level3_toc)(document.data))
|
||||
for elem in level3:
|
||||
break
|
||||
|
||||
if self.opts.level3_toc is not None and added2:
|
||||
for elem in XPath(self.opts.level3_toc)(document.data):
|
||||
level2 = None
|
||||
for item in document.data.iterdescendants():
|
||||
if item in added2.keys():
|
||||
if item in added2:
|
||||
level2 = added2[item]
|
||||
elif item == elem and level2 is not None:
|
||||
elif item == elem:
|
||||
if level2 is None:
|
||||
if previous_level2 is None:
|
||||
break
|
||||
level2 = previous_level2
|
||||
text, _href = \
|
||||
self.elem_to_link(document, elem, counter)
|
||||
counter += 1
|
||||
if text:
|
||||
level2.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
break
|
||||
|
||||
|
@ -887,7 +887,7 @@ vector<char>* Reflow::render_first_page(bool use_crop_box, double x_res,
|
||||
}
|
||||
|
||||
pg_w *= x_res/72.;
|
||||
pg_h *= x_res/72.;
|
||||
pg_h *= y_res/72.;
|
||||
|
||||
int x=0, y=0;
|
||||
this->doc->displayPageSlice(out, pg, x_res, y_res, 0,
|
||||
|
@ -46,7 +46,8 @@ def get_pdf_printer(opts, for_comic=False):
|
||||
printer = QPrinter(QPrinter.HighResolution)
|
||||
custom_size = get_custom_size(opts)
|
||||
|
||||
if opts.output_profile.short_name == 'default':
|
||||
if opts.output_profile.short_name == 'default' or \
|
||||
opts.output_profile.width > 10000:
|
||||
if custom_size is None:
|
||||
printer.setPaperSize(paper_size(opts.paper_size))
|
||||
else:
|
||||
|
@ -75,15 +75,20 @@ class SNBFile:
|
||||
for i in range(self.plainBlock):
|
||||
bzdc = bz2.BZ2Decompressor()
|
||||
if (i < self.plainBlock - 1):
|
||||
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
|
||||
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset
|
||||
else:
|
||||
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
|
||||
snbFile.seek(self.blocks[self.binBlock + i].Offset);
|
||||
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset
|
||||
snbFile.seek(self.blocks[self.binBlock + i].Offset)
|
||||
try:
|
||||
data = snbFile.read(bSize)
|
||||
if len(data) < 32768:
|
||||
uncompressedData += bzdc.decompress(data)
|
||||
else:
|
||||
uncompressedData += data
|
||||
except Exception, e:
|
||||
print e
|
||||
if len(uncompressedData) != self.plainStreamSizeUncompressed:
|
||||
raise Exception()
|
||||
f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
|
||||
plainPos += f.fileSize
|
||||
elif f.attr & 0x01000000 == 0x01000000:
|
||||
|
@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
|
||||
name = 'TXT Input'
|
||||
author = 'John Schember'
|
||||
description = 'Convert TXT files to HTML'
|
||||
file_types = set(['txt', 'txtz'])
|
||||
file_types = set(['txt', 'txtz', 'text'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||
|
@ -20,9 +20,26 @@ from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
from calibre.gui2 import config
|
||||
from calibre.gui2 import config, question_dialog
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
def get_filters():
|
||||
return [
|
||||
(_('Books'), BOOK_EXTENSIONS),
|
||||
(_('EPUB Books'), ['epub']),
|
||||
(_('LRF Books'), ['lrf']),
|
||||
(_('HTML Books'), ['htm', 'html', 'xhtm', 'xhtml']),
|
||||
(_('LIT Books'), ['lit']),
|
||||
(_('MOBI Books'), ['mobi', 'prc', 'azw']),
|
||||
(_('Topaz books'), ['tpz','azw1']),
|
||||
(_('Text books'), ['txt', 'rtf']),
|
||||
(_('PDF Books'), ['pdf']),
|
||||
(_('SNB Books'), ['snb']),
|
||||
(_('Comics'), ['cbz', 'cbr', 'cbc']),
|
||||
(_('Archives'), ['zip', 'rar']),
|
||||
]
|
||||
|
||||
|
||||
class AddAction(InterfaceAction):
|
||||
|
||||
name = 'Add Books'
|
||||
@ -47,6 +64,10 @@ class AddAction(InterfaceAction):
|
||||
self.add_menu.addAction(_('Add Empty book. (Book entry with no '
|
||||
'formats)'), self.add_empty, _('Shift+Ctrl+E'))
|
||||
self.add_menu.addAction(_('Add from ISBN'), self.add_from_isbn)
|
||||
self.add_menu.addSeparator()
|
||||
self.add_menu.addAction(_('Add files to selected book records'),
|
||||
self.add_formats, _('Shift+A'))
|
||||
|
||||
self.qaction.setMenu(self.add_menu)
|
||||
self.qaction.triggered.connect(self.add_books)
|
||||
|
||||
@ -55,6 +76,39 @@ class AddAction(InterfaceAction):
|
||||
for action in list(self.add_menu.actions())[1:]:
|
||||
action.setEnabled(enabled)
|
||||
|
||||
def add_formats(self, *args):
|
||||
if self.gui.stack.currentIndex() != 0:
|
||||
return
|
||||
view = self.gui.library_view
|
||||
rows = view.selectionModel().selectedRows()
|
||||
if not rows:
|
||||
return
|
||||
ids = [view.model().id(r) for r in rows]
|
||||
|
||||
if len(ids) > 1 and not question_dialog(self.gui,
|
||||
_('Are you sure'),
|
||||
_('Are you sure you want to add the same'
|
||||
' files to all %d books? If the format'
|
||||
'already exists for a book, it will be replaced.')%len(ids)):
|
||||
return
|
||||
|
||||
books = choose_files(self.gui, 'add formats dialog dir',
|
||||
_('Select book files'), filters=get_filters())
|
||||
if not books:
|
||||
return
|
||||
|
||||
db = view.model().db
|
||||
for id_ in ids:
|
||||
for fpath in books:
|
||||
fmt = os.path.splitext(fpath)[1][1:].upper()
|
||||
if fmt:
|
||||
db.add_format_with_hooks(id_, fmt, fpath, index_is_id=True,
|
||||
notify=True)
|
||||
current_idx = self.gui.library_view.currentIndex()
|
||||
if current_idx.isValid():
|
||||
view.model().current_changed(current_idx, current_idx)
|
||||
|
||||
|
||||
def add_recursive(self, single):
|
||||
root = choose_dir(self.gui, 'recursive book import root dir dialog',
|
||||
'Select root folder')
|
||||
@ -150,15 +204,29 @@ class AddAction(InterfaceAction):
|
||||
to_device = self.gui.stack.currentIndex() != 0
|
||||
self._add_books(paths, to_device)
|
||||
|
||||
def files_dropped_on_book(self, event, paths):
|
||||
def remote_file_dropped_on_book(self, url, fname):
|
||||
if self.gui.current_view() is not self.gui.library_view:
|
||||
return
|
||||
db = self.gui.library_view.model().db
|
||||
current_idx = self.gui.library_view.currentIndex()
|
||||
if not current_idx.isValid(): return
|
||||
cid = db.id(current_idx.row())
|
||||
from calibre.gui2.dnd import DownloadDialog
|
||||
d = DownloadDialog(url, fname, self.gui)
|
||||
d.start_download()
|
||||
if d.err is None:
|
||||
self.files_dropped_on_book(None, [d.fpath], cid=cid)
|
||||
|
||||
def files_dropped_on_book(self, event, paths, cid=None):
|
||||
accept = False
|
||||
if self.gui.current_view() is not self.gui.library_view:
|
||||
return
|
||||
db = self.gui.library_view.model().db
|
||||
cover_changed = False
|
||||
current_idx = self.gui.library_view.currentIndex()
|
||||
if cid is None:
|
||||
if not current_idx.isValid(): return
|
||||
cid = db.id(current_idx.row())
|
||||
cid = db.id(current_idx.row()) if cid is None else cid
|
||||
for path in paths:
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext:
|
||||
@ -173,8 +241,9 @@ class AddAction(InterfaceAction):
|
||||
elif ext in BOOK_EXTENSIONS:
|
||||
db.add_format_with_hooks(cid, ext, path, index_is_id=True)
|
||||
accept = True
|
||||
if accept:
|
||||
if accept and event is not None:
|
||||
event.accept()
|
||||
if current_idx.isValid():
|
||||
self.gui.library_view.model().current_changed(current_idx, current_idx)
|
||||
if cover_changed:
|
||||
if self.gui.cover_flow:
|
||||
@ -207,27 +276,14 @@ class AddAction(InterfaceAction):
|
||||
'''
|
||||
Add books from the local filesystem to either the library or the device.
|
||||
'''
|
||||
filters = [
|
||||
(_('Books'), BOOK_EXTENSIONS),
|
||||
(_('EPUB Books'), ['epub']),
|
||||
(_('LRF Books'), ['lrf']),
|
||||
(_('HTML Books'), ['htm', 'html', 'xhtm', 'xhtml']),
|
||||
(_('LIT Books'), ['lit']),
|
||||
(_('MOBI Books'), ['mobi', 'prc', 'azw']),
|
||||
(_('Topaz books'), ['tpz','azw1']),
|
||||
(_('Text books'), ['txt', 'rtf']),
|
||||
(_('PDF Books'), ['pdf']),
|
||||
(_('SNB Books'), ['snb']),
|
||||
(_('Comics'), ['cbz', 'cbr', 'cbc']),
|
||||
(_('Archives'), ['zip', 'rar']),
|
||||
]
|
||||
filters = get_filters()
|
||||
to_device = self.gui.stack.currentIndex() != 0
|
||||
if to_device:
|
||||
fmts = self.gui.device_manager.device.settings().format_map
|
||||
filters = [(_('Supported books'), fmts)]
|
||||
|
||||
books = choose_files(self.gui, 'add books dialog dir', 'Select books',
|
||||
filters=filters)
|
||||
books = choose_files(self.gui, 'add books dialog dir',
|
||||
_('Select books'), filters=filters)
|
||||
if not books:
|
||||
return
|
||||
self._add_books(books, to_device)
|
||||
|
@ -355,6 +355,7 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
print
|
||||
print 'before:', self.before_mem
|
||||
print 'after:', memory()/1024**2
|
||||
print
|
||||
self.dbref = self.before_mem = None
|
||||
|
||||
|
||||
|
@ -19,11 +19,11 @@ single_shot = partial(QTimer.singleShot, 10)
|
||||
|
||||
class MultiDeleter(QObject):
|
||||
|
||||
def __init__(self, gui, rows, callback):
|
||||
def __init__(self, gui, ids, callback):
|
||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||
QObject.__init__(self, gui)
|
||||
self.model = gui.library_view.model()
|
||||
self.ids = list(map(self.model.id, rows))
|
||||
self.ids = ids
|
||||
self.gui = gui
|
||||
self.failures = []
|
||||
self.deleted_ids = []
|
||||
@ -231,6 +231,7 @@ class DeleteAction(InterfaceAction):
|
||||
return
|
||||
# Library view is visible.
|
||||
if self.gui.stack.currentIndex() == 0:
|
||||
to_delete_ids = [view.model().id(r) for r in rows]
|
||||
# Ask the user if they want to delete the book from the library or device if it is in both.
|
||||
if self.gui.device_manager.is_device_connected:
|
||||
on_device = False
|
||||
@ -264,10 +265,10 @@ class DeleteAction(InterfaceAction):
|
||||
if ci.isValid():
|
||||
row = ci.row()
|
||||
if len(rows) < 5:
|
||||
ids_deleted = view.model().delete_books(rows)
|
||||
self.library_ids_deleted(ids_deleted, row)
|
||||
view.model().delete_books_by_id(to_delete_ids)
|
||||
self.library_ids_deleted(to_delete_ids, row)
|
||||
else:
|
||||
self.__md = MultiDeleter(self.gui, rows,
|
||||
self.__md = MultiDeleter(self.gui, to_delete_ids,
|
||||
partial(self.library_ids_deleted, current_row=row))
|
||||
# Device view is visible.
|
||||
else:
|
||||
|
@ -67,7 +67,8 @@ class FetchNewsAction(InterfaceAction):
|
||||
keep_issues = 0
|
||||
if keep_issues > 0:
|
||||
ids_with_tag = list(sorted(self.gui.library_view.model().
|
||||
db.tags_older_than(arg['title'], None), reverse=True))
|
||||
db.tags_older_than(arg['title'],
|
||||
None, must_have_tag=_('News')), reverse=True))
|
||||
ids_to_delete = ids_with_tag[keep_issues:]
|
||||
if ids_to_delete:
|
||||
self.gui.library_view.model().delete_books_by_id(ids_to_delete)
|
||||
|
@ -5,20 +5,21 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, collections, sys
|
||||
import collections, sys
|
||||
from Queue import Queue
|
||||
|
||||
from PyQt4.Qt import QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl, \
|
||||
QPropertyAnimation, QEasingCurve, QThread, QApplication, QFontInfo, \
|
||||
QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette
|
||||
QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette, QMenu
|
||||
from PyQt4.QtWebKit import QWebView
|
||||
|
||||
from calibre import fit_image, prepare_string_for_xml
|
||||
from calibre.gui2.widgets import IMAGE_EXTENSIONS
|
||||
from calibre.gui2.dnd import dnd_has_image, dnd_get_image, dnd_get_files, \
|
||||
IMAGE_EXTENSIONS, dnd_has_extension
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.library.comments import comments_to_html
|
||||
from calibre.gui2 import config, open_local_file, open_url
|
||||
from calibre.gui2 import config, open_local_file, open_url, pixmap_to_data
|
||||
from calibre.utils.icu import sort_key
|
||||
|
||||
# render_rows(data) {{{
|
||||
@ -70,6 +71,7 @@ def render_rows(data):
|
||||
|
||||
class CoverView(QWidget): # {{{
|
||||
|
||||
cover_changed = pyqtSignal(object, object)
|
||||
|
||||
def __init__(self, vertical, parent=None):
|
||||
QWidget.__init__(self, parent)
|
||||
@ -151,6 +153,36 @@ class CoverView(QWidget): # {{{
|
||||
fset=setCurrentPixmapSize
|
||||
)
|
||||
|
||||
def contextMenuEvent(self, ev):
|
||||
cm = QMenu(self)
|
||||
paste = cm.addAction(_('Paste Cover'))
|
||||
copy = cm.addAction(_('Copy Cover'))
|
||||
if not QApplication.instance().clipboard().mimeData().hasImage():
|
||||
paste.setEnabled(False)
|
||||
copy.triggered.connect(self.copy_to_clipboard)
|
||||
paste.triggered.connect(self.paste_from_clipboard)
|
||||
cm.exec_(ev.globalPos())
|
||||
|
||||
def copy_to_clipboard(self):
|
||||
QApplication.instance().clipboard().setPixmap(self.pixmap)
|
||||
|
||||
def paste_from_clipboard(self, pmap=None):
|
||||
if not isinstance(pmap, QPixmap):
|
||||
cb = QApplication.instance().clipboard()
|
||||
pmap = cb.pixmap()
|
||||
if pmap.isNull() and cb.supportsSelection():
|
||||
pmap = cb.pixmap(cb.Selection)
|
||||
if not pmap.isNull():
|
||||
self.pixmap = pmap
|
||||
self.do_layout()
|
||||
self.update()
|
||||
if not config['disable_animations']:
|
||||
self.animation.start()
|
||||
id_ = self.data.get('id', None)
|
||||
if id_ is not None:
|
||||
self.cover_changed.emit(id_,
|
||||
pixmap_to_data(pmap))
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
@ -196,6 +228,7 @@ class BookInfo(QWebView):
|
||||
self._link_clicked = False
|
||||
self.setAttribute(Qt.WA_OpaquePaintEvent, False)
|
||||
palette = self.palette()
|
||||
self.setAcceptDrops(False)
|
||||
palette.setBrush(QPalette.Base, Qt.transparent)
|
||||
self.page().setPalette(palette)
|
||||
|
||||
@ -358,34 +391,50 @@ class BookDetails(QWidget): # {{{
|
||||
show_book_info = pyqtSignal()
|
||||
open_containing_folder = pyqtSignal(int)
|
||||
view_specific_format = pyqtSignal(int, object)
|
||||
remote_file_dropped = pyqtSignal(object, object)
|
||||
files_dropped = pyqtSignal(object, object)
|
||||
cover_changed = pyqtSignal(object, object)
|
||||
|
||||
# Drag 'n drop {{{
|
||||
DROPABBLE_EXTENSIONS = IMAGE_EXTENSIONS+BOOK_EXTENSIONS
|
||||
files_dropped = pyqtSignal(object, object)
|
||||
|
||||
@classmethod
|
||||
def paths_from_event(cls, event):
|
||||
'''
|
||||
Accept a drop event and return a list of paths that can be read from
|
||||
and represent files with extensions.
|
||||
'''
|
||||
if event.mimeData().hasFormat('text/uri-list'):
|
||||
urls = [unicode(u.toLocalFile()) for u in event.mimeData().urls()]
|
||||
urls = [u for u in urls if os.path.splitext(u)[1] and os.access(u, os.R_OK)]
|
||||
return [u for u in urls if os.path.splitext(u)[1][1:].lower() in cls.DROPABBLE_EXTENSIONS]
|
||||
|
||||
def dragEnterEvent(self, event):
|
||||
if int(event.possibleActions() & Qt.CopyAction) + \
|
||||
int(event.possibleActions() & Qt.MoveAction) == 0:
|
||||
return
|
||||
paths = self.paths_from_event(event)
|
||||
if paths:
|
||||
md = event.mimeData()
|
||||
if dnd_has_extension(md, self.DROPABBLE_EXTENSIONS) or \
|
||||
dnd_has_image(md):
|
||||
event.acceptProposedAction()
|
||||
|
||||
def dropEvent(self, event):
|
||||
paths = self.paths_from_event(event)
|
||||
event.setDropAction(Qt.CopyAction)
|
||||
self.files_dropped.emit(event, paths)
|
||||
md = event.mimeData()
|
||||
|
||||
x, y = dnd_get_image(md)
|
||||
if x is not None:
|
||||
# We have an image, set cover
|
||||
event.accept()
|
||||
if y is None:
|
||||
# Local image
|
||||
self.cover_view.paste_from_clipboard(x)
|
||||
else:
|
||||
self.remote_file_dropped.emit(x, y)
|
||||
# We do not support setting cover *and* adding formats for
|
||||
# a remote drop, anyway, so return
|
||||
return
|
||||
|
||||
# Now look for ebook files
|
||||
urls, filenames = dnd_get_files(md, BOOK_EXTENSIONS)
|
||||
if not urls:
|
||||
# Nothing found
|
||||
return
|
||||
|
||||
if not filenames:
|
||||
# Local files
|
||||
self.files_dropped.emit(event, urls)
|
||||
else:
|
||||
# Remote files, use the first file
|
||||
self.remote_file_dropped.emit(urls[0], filenames[0])
|
||||
event.accept()
|
||||
|
||||
|
||||
def dragMoveEvent(self, event):
|
||||
event.acceptProposedAction()
|
||||
@ -399,6 +448,7 @@ class BookDetails(QWidget): # {{{
|
||||
self.setLayout(self._layout)
|
||||
|
||||
self.cover_view = CoverView(vertical, self)
|
||||
self.cover_view.cover_changed.connect(self.cover_changed.emit)
|
||||
self._layout.addWidget(self.cover_view)
|
||||
self.book_info = BookInfo(vertical, self)
|
||||
self._layout.addWidget(self.book_info)
|
||||
|
@ -43,6 +43,9 @@
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="sizeAdjustPolicy">
|
||||
<enum>QComboBox::AdjustToMinimumContentsLengthWithIcon</enum>
|
||||
</property>
|
||||
<property name="minimumContentsLength">
|
||||
<number>30</number>
|
||||
</property>
|
||||
|