Merge from trunk

This commit is contained in:
Charles Haley 2011-10-09 13:58:01 +02:00
commit 3ae196df74
53 changed files with 1012 additions and 778 deletions

View File

@ -118,7 +118,7 @@ EBVS
<0x 00 00 00 00>
<0x 00 00 00 10>
...(rest of size of DATA block)
<0x FD EA = PAD? (ýê)>
<0x FD EA = PAD? (ýê)>
DATA
<0x 4 bytes = size of <marked text (see 3rd note)> >
<marked text (see 3rd note)>
@ -155,7 +155,7 @@ EBVS
<0x 00 00 00 00>
<0x 00 00 00 10>
...(rest of size of DATA block)
<0x FD EA = PAD? (ýê)>
<0x FD EA = PAD? (ýê)>
[fi MARK || BOOKMARK]
//-------------------------------
[if CORRECTION]
@ -174,7 +174,7 @@ EBVS
<0x 00 00 00 00>
<0x 00 00 00 10>
...(rest of size of DATA block)
<0x FD EA = PAD? (ýê)>
<0x FD EA = PAD? (ýê)>
DATA
<0x 4 bytes = size of <marked text (see 3rd note)> >
<marked text (see 3rd note)>
@ -246,7 +246,7 @@ EBVS
<0x 00 00 00 00>
<0x 00 00 00 10>
...(size of DATA block - 30)
<0x FD EA = PAD? (ýê)>
<0x FD EA = PAD? (ýê)>
[fi DRAWING]
//-------------------------------
[next {NOTE,MARK,CORRECTION,DRAWING}]
@ -308,7 +308,7 @@ EBVS
...4
...4
...4
<0x FD EA = PAD? (ýê)>
<0x FD EA = PAD? (ýê)>
//--------------------------------------------------------------------
// CATEGORY (if any)

View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Archeowiesci(BasicNewsRecipe):
title = u'Archeowiesci'
__author__ = 'fenuks'
category = 'archeology'
language = 'pl'
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'subskrypcja' in article.title:
feed.articles.remove(article)
return feeds

View File

@ -9,9 +9,10 @@ class CGM(BasicNewsRecipe):
category = 'music'
language = 'pl'
use_embedded_content = False
remove_empty_feeds= True
max_articles_per_feed = 100
no_stylesheers=True
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;}'
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}'
remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
@ -22,10 +23,12 @@ class CGM(BasicNewsRecipe):
def preprocess_html(self, soup):
ad=soup.findAll('img')
for item in soup.findAll(style=True):
del item['style']
ad=soup.findAll('a')
for r in ad:
if '/_vault/_article_photos/5841.jpg' in r['src'] or '_vault/_article_photos/5807.jpg' in r['src'] or 'article_photos/5841.jpg' in r['src'] or 'article_photos/5825.jpg' in r['src'] or '_article_photos/5920.jpg' in r['src'] or '_article_photos/5919.jpg' in r['src'] or '_article_photos/5918.jpg' in r['src'] or '_article_photos/5914.jpg' in r['src'] or '_article_photos/5911.jpg' in r['src'] or '_article_photos/5923.jpg' in r['src'] or '_article_photos/5921.jpg' in r['src']:
ad[ad.index(r)].extract()
if 'http://www.hustla.pl' in r['href']:
r.extract()
gallery=soup.find('div', attrs={'class':'galleryFlash'})
if gallery:
img=gallery.find('embed')

View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317580312(BasicNewsRecipe):
title = u'Dark Horizons'
language = 'en'
__author__ = 'Jaded'
description ='News, images, video clips and reviews of current and upcoming blockbuster films. '
category = 'movies, tv, news'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://a4.sphotos.ak.fbcdn.net/hphotos-ak-ash2/164168_148419801879765_148410081880737_225532_464073_n.jpg'
masthead_url = 'http://www.darkhorizons.com/graphics/2/logo_print.png'
auto_cleanup = True
feeds = [(u'News', u'http://www.darkhorizons.com/feeds/news.atom'), (u'Features', u'http://www.darkhorizons.com/feeds/features.atom'), (u'Reviews', u'http://www.darkhorizons.com/feeds/reviews.atom')]

View File

@ -22,6 +22,10 @@ class Descopera(BasicNewsRecipe):
category = 'Ziare,Reviste,Descopera'
encoding = 'utf-8'
cover_url = 'http://www.descopera.ro/images/header_images/logo.gif'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
conversion_options = {
'comments' : description
@ -30,28 +34,6 @@ class Descopera(BasicNewsRecipe):
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'style':'font-family: Arial,Helvetica,sans-serif; font-size: 18px; color: rgb(51, 51, 51); font-weight: bold; margin: 10px 0pt; clear: both; float: left;width: 610px;'})
,dict(name='div', attrs={'style':'margin-right: 15px; margin-bottom: 15px; float: left;'})
, dict(name='p', attrs={'id':'itemDescription'})
,dict(name='div', attrs={'id':'itemBody'})
]
remove_tags = [
dict(name='div', attrs={'class':['tools']})
, dict(name='div', attrs={'class':['share']})
, dict(name='div', attrs={'class':['category']})
, dict(name='div', attrs={'id':['comments']})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comments'})
]
feeds = [
(u'Feeds', u'http://www.descopera.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

23
recipes/eioba.recipe Normal file
View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class eioba(BasicNewsRecipe):
title = u'eioba'
__author__ = 'fenuks'
cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png'
language = 'pl'
oldest_article = 7
remove_empty_feeds= True
max_articles_per_feed = 100
extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}'
keep_only_tags=[dict(id=['ctl0_body_Topic', 'articleContent'])]
feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'),
(u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'),
(u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'),
(u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'),
(u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'),
(u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'),
(u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'),
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
]

View File

@ -19,45 +19,20 @@ class FazNet(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
remove_tags = [
dict(name=['object','link','embed','base'])
,dict(name='div',
attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo',
'ArtikelServices', 'ModulLesermeinungenFooter',
'ModulArtikelServices', 'BoxTool Aufklappen_Grau',
'SocialMediaUnten', ]}),
dict(id=['KurzLinkMenu', 'ArtikelServicesMenu']),
]
keep_only_tags = [{'class':'FAZArtikelEinleitung'},
{'id':'ArtikelTabContent_0'}]
feeds = [
('FAZ.NET Aktuell', 'http://www.faz.net/s/RubF3CE08B362D244869BE7984590CB6AC1/Tpl~Epartner~SRss_.xml'),
('Politik', 'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
('Wirtschaft', 'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
('Feuilleton', 'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
('Sport', 'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
('Gesellschaft', 'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
('Finanzen', 'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
('Wissen', 'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
('Reise', 'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
('Technik & Motor', 'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
('Beruf & Chance', 'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml')
('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'),
('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'),
('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'),
('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'),
('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'),
('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'),
('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'),
('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'),
('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'),
('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'),
('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
del soup.body['onload']
for item in soup.findAll(style=True):
del item['style']
return soup

66
recipes/focus_pl.recipe Normal file
View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Focus_pl(BasicNewsRecipe):
title = u'Focus.pl'
oldest_article = 15
max_articles_per_feed = 100
__author__ = 'fenuks'
language = 'pl'
description ='polish scientific monthly magazine'
category='magazine'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
remove_tags_after=dict(name='div', attrs={'class':'clear'})
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
]
def skip_ad_pages(self, soup):
tag=soup.find(name='a')
if tag:
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
return new_soup
def append_page(self, appendtag):
tag=appendtag.find(name='div', attrs={'class':'arrows'})
if tag:
nexturl='http://www.focus.pl/'+tag.a['href']
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
nexturl=None
pagetext=soup2.find(name='div', attrs={'class':'txt'})
tag=pagetext.find(name='div', attrs={'class':'arrows'})
for r in tag.findAll(name='a'):
if u'Następne' in r.string:
nexturl='http://www.focus.pl/'+r['href']
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def get_cover_url(self):
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
tag=soup.find(name='div', attrs={'class':'clr fl'})
if tag:
self.cover_url='http://www.focus.pl/' + tag.a['href']
return getattr(self, 'cover_url', self.cover_url)
def preprocess_html(self, soup):
self.append_page(soup.body)
return soup

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
__author__ = 'fenuks'
cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
language = 'pl'
description ='news from gazeta.pl'
category='newspaper'
INDEX='http://wyborcza.pl'
remove_empty_feeds= True
oldest_article = 3
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
remove_tags_before=dict(id='k0')
remove_tags_after=dict(id='banP4')
remove_tags=[dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop=False
tag = soup.find('div', attrs={'id':'Str'})
if appendtag.find('div', attrs={'id':'Str'}):
nexturl=tag.findAll('a')
appendtag.find('div', attrs={'id':'Str'}).extract()
loop=True
if appendtag.find(id='source'):
appendtag.find(id='source').extract()
while loop:
loop=False
for link in nexturl:
if u'następne' in link.string:
url= self.INDEX + link['href']
soup2 = self.index_to_soup(url)
pagetext = soup2.find(id='artykul')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag = soup2.find('div', attrs={'id':'Str'})
nexturl=tag.findAll('a')
loop=True
def gallery_article(self, appendtag):
tag=appendtag.find(id='container_gal')
if tag:
nexturl=appendtag.find(id='gal_btn_next').a['href']
appendtag.find(id='gal_navi').extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
pagetext=soup2.find(id='container_gal')
nexturl=pagetext.find(id='gal_btn_next')
if nexturl:
nexturl=nexturl.a['href']
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(id='gal_navi')
if rem:
rem.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup
def print_version(self, url):
if 'http://wyborcza.biz/biznes/' not in url:
return url
else:
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')

View File

@ -9,8 +9,17 @@ class Gram_pl(BasicNewsRecipe):
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
feed.articles.remove(article)
return feeds

View File

@ -1,7 +1,9 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe(BasicNewsRecipe):
title = 'Heise-online'
title = 'heise online'
description = 'News vom Heise-Verlag'
__author__ = 'schuster'
use_embedded_content = False
@ -12,10 +14,11 @@ class AdvancedUserRecipe(BasicNewsRecipe):
remove_empty_feeds = True
timeout = 5
no_stylesheets = True
encoding = 'utf-8'
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
remove_tags = [dict(id='navi_top_container'),
remove_tags = [{'class':'navi_top_container'},
dict(id='navi_bottom'),
dict(id='mitte_rechts'),
dict(id='navigation'),
@ -25,28 +28,28 @@ class AdvancedUserRecipe(BasicNewsRecipe):
dict(id='content_foren'),
dict(id='seiten_navi'),
dict(id='adbottom'),
dict(id='sitemap')]
dict(id='sitemap'),
dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
]
feeds = [
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
('iX', 'http://www.heise.de/ix/news/news.rdf'),
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
('Security', 'http://www.heise.de/security/news/news-atom.xml'),
('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
]
]
def print_version(self, url):
return url + '?view=print'

View File

@ -18,6 +18,7 @@ class HoustonChronicle(BasicNewsRecipe):
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
'hst-articletext' in x or 'hst-galleryitem' in x)}
remove_attributes = ['xmlns']
feeds = [
('News', "http://www.chron.com/rss/feed/News-270.php"),

Binary file not shown.

After

Width:  |  Height:  |  Size: 718 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 399 B

BIN
recipes/icons/eioba.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 908 B

BIN
recipes/icons/focus_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 695 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 221 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 320 B

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Konflikty(BasicNewsRecipe):
title = u'Konflikty Zbrojne'
__author__ = 'fenuks'
cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg'
language = 'pl'
description ='military news'
category='military, history'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]

View File

@ -5,30 +5,46 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
description = 'News as provide by The Metro -UK'
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
no_stylesheets = True
oldest_article = 1
max_articles_per_feed = 25
max_articles_per_feed = 20
remove_empty_feeds = True
remove_javascript = True
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
#preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
preprocess_regexps = [
(re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
preprocess_regexps = [
(re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]
language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
extra_css = 'h2 {font: sans-serif medium;}'
keep_only_tags = [
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
dict(attrs={'class':['img-cnt figure']}),
dict(attrs={'class':['art-img']}),
dict(name='div', attrs={'class':'art-lft'})
dict(name='div', attrs={'class':'art-lft'}),
dict(name='p')
]
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
]
feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
extra_css = '''
body {font: sans-serif medium;}'
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
span{ font-size:9.5px; font-weight:bold;font-style:italic}
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''

View File

@ -6,19 +6,24 @@ __Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
# Turn below to True if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
# (HK only) if __UseLife__ is true, turn this on if you want to include the column section
# (HK only) It is to disable the column section which is now a premium content
__InclCols__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
__ParsePFF__ = False
# (HK only) Turn below to True if you wish hi-res images
__HiResImg__ = False
'''
Change Log:
2011/09/21: fetching "column" section is made optional. Default is False
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -42,7 +47,7 @@ Change Log:
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -56,7 +61,7 @@ class MPRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
@ -147,43 +152,6 @@ class MPRecipe(BasicNewsRecipe):
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
@ -260,15 +228,16 @@ class MPRecipe(BasicNewsRecipe):
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
#if ed_articles:
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -279,20 +248,39 @@ class MPRecipe(BasicNewsRecipe):
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
#if fin_articles:
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
# if articles:
# feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
#if ent_articles:
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
if __InclCols__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -300,11 +288,6 @@ class MPRecipe(BasicNewsRecipe):
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -348,6 +331,16 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
url = url.replace('%2Etxt', '_print.htm')
url = url.replace('%5F', '_')
else:
url = url.replace('.htm', '_print.htm')
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
@ -472,11 +465,92 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# preprocess those .txt based files
# preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url):
if url.rfind('ftp') == -1:
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
raw_html = raw_html.replace(img, newimg)
else:
# if not found, insert _ after "
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
gifimg = img.replace('jpg\'', 'gif\'')
try:
#print 'Original: ', url
#print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
gifurl = re.sub(r'dailynews.*txt', '', url)
#print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
#print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
#br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
#print 'GIF not found'
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'newimg: ', newimg
raw_html = raw_html.replace(img, newimg)
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
return raw_html
else:
if url.rfind('_print.htm') <> -1:
# javascript based file
splitter = re.compile(r'\n')
new_raw_html = '<html><head><title>Untitled</title></head>'
new_raw_html = new_raw_html + '<body>'
for item in splitter.split(raw_html):
if item.startswith('var heading1 ='):
heading = item.replace('var heading1 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
new_raw_html = new_raw_html + '<div class="heading">' + heading
if item.startswith('var heading2 ='):
heading = item.replace('var heading2 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
if heading <> '':
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
else:
new_raw_html = new_raw_html + '</div>'
if item.startswith('var content ='):
content = item.replace("var content = ", '')
content = content.replace('\'', '')
content = content.replace(';', '')
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
if item.startswith('var photocontent ='):
photo = item.replace('var photocontent = \'', '')
photo = photo.replace('\'', '')
photo = photo.replace(';', '')
photo = photo.replace('<tr>', '')
photo = photo.replace('<td>', '')
photo = photo.replace('</tr>', '')
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
return new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
@ -604,7 +678,7 @@ class MPRecipe(BasicNewsRecipe):
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:

15
recipes/naczytniki.recipe Normal file
View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class naczytniki(BasicNewsRecipe):
title = u'naczytniki.pl'
__author__ = 'fenuks'
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
language = 'pl'
description ='everything about e-readers'
category='readers'
oldest_article = 7
max_articles_per_feed = 100
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Nowa_Fantastyka(BasicNewsRecipe):
title = u'Nowa Fantastyka'
oldest_article = 7
__author__ = 'fenuks'
language = 'pl'
description ='site for fantasy readers'
category='fantasy'
max_articles_per_feed = 100
INDEX='http://www.fantastyka.pl/'
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
remove_tags=[dict(attrs={'class':'avatar2'})]
feeds = []
def find_articles(self, url):
articles = []
soup=self.index_to_soup(url)
tag=soup.find(attrs={'class':'belka1-tlo-m'})
art=tag.findAll(name='a', attrs={'class':'a-box'})
for i in art:
title=i.string
url=self.INDEX+i['href']
#date=soup.find(id='footer').ul.li.string[41:-1]
articles.append({'title' : title,
'url' : url,
'date' : '',
'description' : ''
})
return articles
def parse_index(self):
feeds = []
feeds.append((u"Opowiadania", self.find_articles('http://www.fantastyka.pl/3.html')))
feeds.append((u"Publicystyka", self.find_articles('http://www.fantastyka.pl/6.html')))
feeds.append((u"Hype Park", self.find_articles('http://www.fantastyka.pl/9.html')))
return feeds
def get_cover_url(self):
soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
cover=soup.find(name='img', attrs={'class':'okladka'})
self.cover_url=self.INDEX+ cover['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class RevistaPiaui(BasicNewsRecipe):
title = u'Revista piau\xed'
language = 'pt_BR'
__author__ = u'Eduardo Gustini Simões'
oldest_article = 31
max_articles_per_feed = 50
auto_cleanup = True
feeds = [(u'Edi\xe7\xe3o Atual', u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
soup = self.index_to_soup('http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')
itemTitle = article.title.partition('|')[0].rstrip()
item = soup.find(text=itemTitle)
articleDescription = item.parent.parent.description.string.partition('<br />')[2]
article.summary = articleDescription
return feeds
def populate_article_metadata(self, article, soup, first):
h2 = soup.find('h2')
h2.string.replaceWith(h2.string.partition('|')[0].rstrip())
h2.replaceWith(h2.prettify() + '<p><em>' + article.summary + '</em></p><p><em>' + ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '</em></p>')

View File

@ -9,285 +9,79 @@ calibre recipe for slate.com
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag
class Slate(BasicNewsRecipe):
# Method variables for customizing downloads
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
__author__ = 'GRiker, Sujata Raman and Nick Redding'
max_articles_per_feed = 100
oldest_article = 14
recursions = 0
delay = 0
simultaneous_downloads = 5
timeout = 120.0
__author__ = 'Kovid Goyal'
timefmt = ''
feeds = None
no_stylesheets = True
encoding = None
language = 'en'
title = 'Slate'
INDEX = 'http://slate.com'
encoding = 'utf-8'
preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda x: ''),
(re.compile(r'^.*?<html', re.DOTALL), lambda x:'<html'),
(re.compile(r'<meta[^>]+?/>', re.DOTALL), lambda x:''),
]
remove_tags = [
{'name':['link', 'script']},
{'class':['share-box-flank', 'sl-crumbs', 'sl-tbar',
'sl-chunky-tbar']},
]
remove_tags_after = [{'class':'sl-art-creds-cntr'}]
keep_only_tags = {'class':'sl-body-wrapper'}
remove_attributes = ['style']
slate_complete = True
if slate_complete:
title = 'Slate (complete)'
else:
title = 'Slate (weekly)'
def print_version(self, url):
return url.replace('.html', '.single.html')
# Method variables for customizing feed parsing
summary_length = 250
use_embedded_content = None
# Method variables for pre/post processing of HTML
preprocess_regexps = [ (re.compile(r'<p><em>Disclosure: <strong>Slate</strong> is owned by the Washington Post.*</p>',
re.DOTALL|re.IGNORECASE),
lambda match: ''),
(re.compile(r'<p><strong><em>Join the discussion about this story on.*</p>',
re.DOTALL|re.IGNORECASE),
lambda match: '') ]
match_regexps = []
# The second entry is for 'Big Money', which comes from a different site, uses different markup
keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body']}),
dict(attrs={ 'id':['content']}) ]
# The second entry is for 'Big Money', which comes from a different site, uses different markup
remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper',
'article_bottom_tools_cntr','fray_article_discussion','fray_article_links','bottom_sponsored_links','author_bio',
'bizbox_links_bottom','ris_links_wrapper','BOXXLE',
'comments_button','add_comments_button','comments-to-fray','marriott_ad',
'article_bottom_tools','recommend_tab2','fbog_article_bottom_cntr']}),
dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ]
excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
excludedTitleKeywords = ['Gabfest','Slate V','on Twitter']
excludedAuthorKeywords = []
excludedContentKeywords = ['http://twitter.com/Slate']
extra_css = '''
.h1_subhead{font-family:Arial; font-size:small; }
h1{font-family:Verdana; font-size:large; }
.byline {font-family:Georgia; margin-bottom: 0px; }
.dateline {font-family:Arial; font-size: smaller; height: 0pt;}
.imagewrapper {font-family:Verdana;font-size:x-small; }
.source {font-family:Verdana; font-size:x-small;}
.credit {font-family:Verdana; font-size: smaller;}
#article_body {font-family:Verdana; }
#content {font-family:Arial; }
.caption{font-family:Verdana;font-style:italic; font-size:x-small;}
h3{font-family:Arial; font-size:small}
'''
# Local variables to extend class
baseURL = 'http://slate.com'
section_dates = []
# class extension methods
def tag_to_strings(self, tag):
if not tag:
return ''
if isinstance(tag, basestring):
return tag
strings = []
for item in tag.contents:
if isinstance(item, (NavigableString, CData)):
strings.append(item.string)
elif isinstance(item, Tag):
res = self.tag_to_string(item,use_alt=False)
if res:
strings.append(res)
return strings
def extract_named_sections(self):
soup = self.index_to_soup( self.baseURL )
soup_nav_bar = soup.find(True, attrs={'id':'nav'})
briefing_nav = soup.find('li')
briefing_url = briefing_nav.a['href']
for section_nav in soup_nav_bar.findAll('li'):
section_name = self.tag_to_string(section_nav,use_alt=False)
self.section_dates.append(section_name)
soup = self.index_to_soup(briefing_url)
self.log("Briefing url = %s " % briefing_url)
section_lists = soup.findAll('ul','view_links_list')
sections = []
for section in section_lists :
sections.append(section)
return sections
def extract_dated_sections(self):
soup = self.index_to_soup( self.baseURL )
soup_top_stories = soup.find(True, attrs={'id':'tap3_cntr'})
if soup_top_stories:
self.section_dates.append("Top Stories")
self.log("SELECTION TOP STORIES %s" % "Top Stories")
soup = soup.find(True, attrs={'id':'toc_links_container'})
todays_section = soup.find(True, attrs={'class':'todaydateline'})
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
self.log("SELECTION DATE %s" % self.tag_to_string(todays_section,use_alt=False))
older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
for older_section in older_section_dates :
self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
self.log("SELECTION DATE %s" % self.tag_to_string(older_section,use_alt=False))
if soup_top_stories:
headline_stories = soup_top_stories
self.log("HAVE top_stories")
else:
headline_stories = None
self.log("NO top_stories")
section_lists = soup.findAll('ul')
# Prepend the headlines to the first section
if headline_stories:
section_lists.insert(0,headline_stories)
sections = []
for section in section_lists :
sections.append(section)
return sections
def extract_section_articles(self, sections_html) :
# Find the containers with section content
sections = sections_html
articles = {}
key = None
def parse_index(self) :
ans = []
for (i,section) in enumerate(sections) :
# Get the section name
if section.has_key('id') :
self.log("PROCESSING SECTION id = %s" % section['id'])
key = self.section_dates[i]
if key.startswith("Pod"):
continue
if key.startswith("Blog"):
continue
articles[key] = []
ans.append(key)
elif self.slate_complete:
key = self.section_dates[i]
if key.startswith("Pod"):
continue
if key.startswith("Blog"):
continue
self.log("PROCESSING SECTION name = %s" % key)
articles[key] = []
ans.append(key)
else :
self.log("SECTION %d HAS NO id" % i);
continue
# Get the section article_list
article_list = section.findAll('li')
# Extract the article attributes
for article in article_list :
bylines = self.tag_to_strings(article)
url = article.a['href']
title = bylines[0]
full_title = self.tag_to_string(article,use_alt=False)
#self.log("ARTICLE TITLE%s" % title)
#self.log("ARTICLE FULL_TITLE%s" % full_title)
#self.log("URL %s" % url)
author = None
description = None
pubdate = None
if len(bylines) == 2 and self.tag_to_string(article).find("Today's Papers") > 0 :
description = "A summary of what's in the major U.S. newspapers."
if len(bylines) == 3 :
author = bylines[2].strip()
author = re.sub('[\r][\n][\t][\t\t]','', author)
author = re.sub(',','', author)
if bylines[1] is not None :
description = bylines[1]
full_byline = self.tag_to_string(article)
if full_byline.find('major U.S. newspapers') > 0 :
description = "A summary of what's in the major U.S. newspapers."
if len(bylines) > 3 and author is not None:
author += " | "
for (i,substring) in enumerate(bylines[3:]) :
#print "substring: %s" % substring.encode('cp1252')
author += substring.strip()
if i < len(bylines[3:]) :
author += " | "
# Skip articles whose descriptions contain excluded keywords
if description is not None and len(self.excludedDescriptionKeywords):
excluded = re.compile('|'.join(self.excludedDescriptionKeywords))
found_excluded = excluded.search(description)
if found_excluded :
self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
continue
# Skip articles whose title contain excluded keywords
if full_title is not None and len(self.excludedTitleKeywords):
excluded = re.compile('|'.join(self.excludedTitleKeywords))
#self.log("evaluating full_title: %s" % full_title)
found_excluded = excluded.search(full_title)
if found_excluded :
self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
continue
# Skip articles whose author contain excluded keywords
if author is not None and len(self.excludedAuthorKeywords):
excluded = re.compile('|'.join(self.excludedAuthorKeywords))
found_excluded = excluded.search(author)
if found_excluded :
self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
continue
skip_this_article = False
# Check to make sure we're not adding a duplicate
for article in articles[key] :
if article['url'] == url :
skip_this_article = True
self.log("SKIPPING DUP %s" % url)
break
if skip_this_article :
continue
# Build the dictionary entry for this article
feed = key
if not articles.has_key(feed) :
articles[feed] = []
articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
author=author, content=''))
#self.log("KEY %s" % feed)
#self.log("APPENDED %s" % url)
# Promote 'newspapers' to top
for (i,article) in enumerate(articles[feed]) :
if article['description'] is not None :
if article['description'].find('newspapers') > 0 :
articles[feed].insert(0,articles[feed].pop(i))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
for sectitle, url in (
('News & Politics', '/articles/news_and_politics.html'),
('Technology', '/articles/technology.html'),
('Business', '/articles/business.html'),
('Arts', '/articles/arts.html'),
('Life', '/articles/life.html'),
('Health & Science', '/articles/health_and_science.html'),
('Sports', '/articles/sports.html'),
('Double X', '/articles/double_x.html'),
):
url = self.INDEX + url
self.log('Found section:', sectitle)
articles = self.slate_section_articles(self.index_to_soup(url))
if articles:
ans.append((sectitle, articles))
return ans
def print_version(self, url) :
return url + 'pagenum/all/'
# Class methods
def parse_index(self) :
if self.slate_complete:
sections = self.extract_named_sections()
else:
sections = self.extract_dated_sections()
section_list = self.extract_section_articles(sections)
return section_list
def slate_section_articles(self, soup):
cont = soup.find('div', id='most_read')
seen = set()
ans = []
for h4 in cont.findAll('h4'):
a = h4.find('a', href=True)
if a is None: continue
url = a['href']
if url.startswith('/'):
url = self.INDEX + url
if url in seen: continue
seen.add(url)
title = self.tag_to_string(a)
parent = h4.parent
h3 = parent.find('h3')
desc = ''
if h3 is not None:
desc = self.tag_to_string(h3)
a = parent.find('a', rel='author')
if a is not None:
a = self.tag_to_string(a)
art = {'title':title, 'description':desc, 'date':'', 'url':url}
if a:
art['author'] = a
self.log('\tFound article:', title, ' by ', a)
ans.append(art)
return ans
def get_masthead_url(self):
masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif'
@ -299,153 +93,4 @@ class Slate(BasicNewsRecipe):
masthead = None
return masthead
def stripAnchors(self,soup):
body = soup.find('div',attrs={'id':['article_body','content']})
if body is not None:
paras = body.findAll('p')
if paras is not None:
for para in paras:
aTags = para.findAll('a')
if aTags is not None:
for a in aTags:
if a.img is None:
#print repr(a.renderContents())
a.replaceWith(a.renderContents().decode('utf-8','replace'))
return soup
def preprocess_html(self, soup) :
# Remove 'grayPlus4.png' images
imgs = soup.findAll('img')
if imgs is not None:
for img in imgs:
if re.search("grayPlus4.png",str(img)):
img.extract()
# Delete article based upon content keywords
if len(self.excludedDescriptionKeywords):
excluded = re.compile('|'.join(self.excludedContentKeywords))
found_excluded = excluded.search(str(soup))
if found_excluded :
print "No allowed content found, removing article"
raise Exception('Rejected article')
# Articles from www.thebigmoney.com use different tagging for byline, dateline and body
head = soup.find('head')
if head.link is not None and re.search('www\.thebigmoney\.com', str(head)):
byline = soup.find('div',attrs={'id':'byline'})
if byline is not None:
byline['class'] = byline['id']
dateline = soup.find('div',attrs={'id':'dateline'})
if dateline is not None:
dateline['class'] = dateline['id']
body = soup.find('div',attrs={'id':'content'})
if body is not None:
body['class'] = 'article_body'
# Synthesize a department kicker
h3Tag = Tag(soup,'h3')
emTag = Tag(soup,'em')
emTag.insert(0,NavigableString("the big money: Today's business press"))
h3Tag.insert(0,emTag)
soup.body.insert(0,h3Tag)
# Strip anchors from HTML
return self.stripAnchors(soup)
def postprocess_html(self, soup, first_fetch) :
# Fix up dept_kicker as <h3><em>
dept_kicker = soup.find('div', attrs={'class':'department_kicker'})
if dept_kicker is not None :
kicker_strings = self.tag_to_strings(dept_kicker)
kicker = ''.join(kicker_strings[2:])
kicker = re.sub('\.','',kicker)
h3Tag = Tag(soup, "h3")
emTag = Tag(soup, "em")
emTag.insert(0,NavigableString(kicker))
h3Tag.insert(0, emTag)
dept_kicker.replaceWith(h3Tag)
else:
self.log("No kicker--return null")
return None
# Fix up the concatenated byline and dateline
byline = soup.find(True,attrs={'class':'byline'})
if byline is not None :
bylineTag = Tag(soup,'div')
bylineTag['class'] = 'byline'
#bylineTag['height'] = '0em'
bylineTag.insert(0,self.tag_to_string(byline))
byline.replaceWith(bylineTag)
dateline = soup.find(True, attrs={'class':'dateline'})
if dateline is not None :
datelineTag = Tag(soup, 'div')
datelineTag['class'] = 'dateline'
#datelineTag['margin-top'] = '0em'
datelineTag.insert(0,self.tag_to_string(dateline))
dateline.replaceWith(datelineTag)
# Change captions to italic, add <hr>
for caption in soup.findAll(True, {'class':'caption'}) :
if caption is not None:
emTag = Tag(soup, "em")
emTag.insert(0, '<br />' + self.tag_to_string(caption))
hrTag = Tag(soup, 'hr')
emTag.insert(1, hrTag)
caption.replaceWith(emTag)
# Fix photos
for photo in soup.findAll('span',attrs={'class':'imagewrapper'}):
if photo.a is not None and photo.a.img is not None:
divTag = Tag(soup,'div')
divTag['class'] ='imagewrapper'
divTag.insert(0,photo.a.img)
photo.replaceWith(divTag)
return soup
def postprocess_book(self, oeb, opts, log) :
def extract_byline(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find(True,attrs={'class':'byline'})
if byline is not None:
return self.tag_to_string(byline,use_alt=False)
else :
return None
def extract_description(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
paragraphs = soup.findAll('p')
for p in paragraphs :
if self.tag_to_string(p,use_alt=False).startswith('By ') or \
self.tag_to_string(p,use_alt=False).startswith('Posted '):
continue
comment = p.find(text=lambda text:isinstance(text, Comment))
if comment is not None:
continue
else:
return self.tag_to_string(p,use_alt=False)[:self.summary_length] + '...'
return None
# Method entry point here
# Single section toc looks different than multi-section tocs
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)

View File

@ -9,4 +9,6 @@ class Tablety_pl(BasicNewsRecipe):
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
keep_only_tags=[dict(name='header', attrs={'class':'entry-header'}), dict(name='div', attrs={'class':'entry-content clearfix'})]
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'}), dict(name='span', attrs={'class':'dsq-postid'})]
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]

View File

@ -278,6 +278,8 @@ def get_proxies(debug=True):
continue
if proxy.startswith(key+'://'):
proxy = proxy[len(key)+3:]
if key == 'https' and proxy.startswith('http://'):
proxy = proxy[7:]
if proxy.endswith('/'):
proxy = proxy[:-1]
if len(proxy) > 4:

View File

@ -437,8 +437,8 @@ class TabletOutput(iPadOutput):
short_name = 'tablet'
description = _('Intended for generic tablet devices, does no resizing of images')
screen_size = (sys.maxint, sys.maxint)
comic_screen_size = (sys.maxint, sys.maxint)
screen_size = (10000, 10000)
comic_screen_size = (10000, 10000)
class SamsungGalaxy(TabletOutput):
name = 'Samsung Galaxy'

View File

@ -414,7 +414,8 @@ class DevicePlugin(Plugin):
@classmethod
def config_widget(cls):
'''
Should return a QWidget. The QWidget contains the settings for the device interface
Should return a QWidget. The QWidget contains the settings for the
device interface
'''
raise NotImplementedError()
@ -429,8 +430,9 @@ class DevicePlugin(Plugin):
@classmethod
def settings(cls):
'''
Should return an opts object. The opts object should have at least one attribute
`format_map` which is an ordered list of formats for the device.
Should return an opts object. The opts object should have at least one
attribute `format_map` which is an ordered list of formats for the
device.
'''
raise NotImplementedError()

View File

@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
import os
import sqlite3 as sqlite
from contextlib import closing
from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book
from calibre.devices.kobo.books import ImageWrapper
@ -16,6 +15,7 @@ from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS, debug_print
from calibre import prints
from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.magick.draw import save_cover_data_to
class KOBO(USBMS):
@ -53,11 +53,23 @@ class KOBO(USBMS):
_('The Kobo supports several collections including ')+\
'Read, Closed, Im_Reading. ' +\
_('Create tags for automatic management'),
_('Upload covers for books (newer readers)') +
':::'+_('Normally, the KOBO readers get the cover image from the'
' ebook file itself. With this option, calibre will send a '
'separate cover image to the reader, useful if you '
'have modified the cover.'),
_('Upload Black and White Covers')
]
EXTRA_CUSTOMIZATION_DEFAULT = [', '.join(['tags'])]
EXTRA_CUSTOMIZATION_DEFAULT = [
', '.join(['tags']),
True,
True
]
OPT_COLLECTIONS = 0
OPT_UPLOAD_COVERS = 1
OPT_UPLOAD_GRAYSCALE_COVERS = 2
def initialize(self):
USBMS.initialize(self)
@ -593,7 +605,7 @@ class KOBO(USBMS):
raise
else:
connection.commit()
debug_print(' Commit: Reset ReadStatus list')
# debug_print(' Commit: Reset ReadStatus list')
cursor.close()
@ -616,7 +628,7 @@ class KOBO(USBMS):
raise
else:
connection.commit()
debug_print(' Commit: Setting ReadStatus List')
# debug_print(' Commit: Setting ReadStatus List')
cursor.close()
def reset_favouritesindex(self, connection, oncard):
@ -635,7 +647,7 @@ class KOBO(USBMS):
raise
else:
connection.commit()
debug_print(' Commit: Reset FavouritesIndex list')
# debug_print(' Commit: Reset FavouritesIndex list')
def set_favouritesindex(self, connection, ContentID):
cursor = connection.cursor()
@ -650,7 +662,7 @@ class KOBO(USBMS):
raise
else:
connection.commit()
debug_print(' Commit: Set FavouritesIndex')
# debug_print(' Commit: Set FavouritesIndex')
def update_device_database_collections(self, booklists, collections_attributes, oncard):
# Only process categories in this list
@ -702,9 +714,9 @@ class KOBO(USBMS):
# Process any collections that exist
for category, books in collections.items():
if category in supportedcategories:
debug_print("Category: ", category, " id = ", readstatuslist.get(category))
# debug_print("Category: ", category, " id = ", readstatuslist.get(category))
for book in books:
debug_print(' Title:', book.title, 'category: ', category)
# debug_print(' Title:', book.title, 'category: ', category)
if category not in book.device_collections:
book.device_collections.append(category)
@ -763,3 +775,93 @@ class KOBO(USBMS):
collections_attributes = []
self.update_device_database_collections(booklist, collections_attributes, oncard)
def upload_cover(self, path, filename, metadata, filepath):
'''
Upload book cover to the device. Default implementation does nothing.
:param path: The full path to the directory where the associated book is located.
:param filename: The name of the book file without the extension.
:param metadata: metadata belonging to the book. Use metadata.thumbnail
for cover
:param filepath: The full path to the ebook file
'''
opts = self.settings()
if not opts.extra_customization[self.OPT_UPLOAD_COVERS]:
# Building thumbnails disabled
debug_print('KOBO: not uploading cover')
return
if not opts.extra_customization[self.OPT_UPLOAD_GRAYSCALE_COVERS]:
uploadgrayscale = False
else:
uploadgrayscale = True
debug_print('KOBO: uploading cover')
try:
self._upload_cover(path, filename, metadata, filepath, uploadgrayscale)
except:
debug_print('FAILED to upload cover', filepath)
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
if metadata.cover:
cover = self.normalize_path(metadata.cover.replace('/', os.sep))
if os.path.exists(cover):
# Get ContentID for Selected Book
extension = os.path.splitext(filepath)[1]
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
ContentID = self.contentid_from_path(filepath, ContentType)
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:
# return bytestrings if the content cannot the decoded as unicode
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
cursor = connection.cursor()
t = (ContentID,)
cursor.execute('select ImageId from Content where BookID is Null and ContentID = ?', t)
result = cursor.fetchone()
if result is None:
debug_print("No rows exist in the database - cannot upload")
return
else:
ImageID = result[0]
# debug_print("ImageId: ", result[0])
cursor.close()
if ImageID != None:
path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID
file_endings = {' - iPhoneThumbnail.parsed':(103,150),
' - bbMediumGridList.parsed':(93,135),
' - NickelBookCover.parsed':(500,725),
' - N3_LIBRARY_FULL.parsed':(355,530),
' - N3_LIBRARY_GRID.parsed':(149,233),
' - N3_LIBRARY_LIST.parsed':(60,90),
' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}
for ending, resize in file_endings.items():
fpath = path + ending
fpath = self.normalize_path(fpath.replace('/', os.sep))
if os.path.exists(fpath):
with open(cover, 'rb') as f:
data = f.read()
# Return the data resized and in Grayscale if
# required
data = save_cover_data_to(data, 'dummy.jpg',
grayscale=uploadgrayscale,
resize_to=resize, return_data=True)
with open(fpath, 'wb') as f:
f.write(data)
else:
debug_print("ImageID could not be retreived from the database")

View File

@ -319,7 +319,7 @@ class PRST1(USBMS):
THUMBNAIL_HEIGHT = 217
SCAN_FROM_ROOT = True
EBOOK_DIR_MAIN = __appname__
SUPPORTS_SUB_DIRS = True
def windows_filter_pnp_id(self, pnp_id):
return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
@ -329,4 +329,10 @@ class PRST1(USBMS):
return __appname__
return self.EBOOK_DIR_CARD_A
def get_main_ebook_dir(self, for_upload=False):
if for_upload:
return __appname__
return ''

View File

@ -100,7 +100,7 @@ gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?se
gprefs.defaults['preserve_date_on_ctl'] = True
gprefs.defaults['cb_fullscreen'] = False
gprefs.defaults['worker_max_time'] = 0
gprefs.defaults['show_files_after_save'] = True
# }}}
NONE = QVariant() #: Null value to return from the data function of item models

View File

@ -18,11 +18,15 @@ class GenerateCatalogAction(InterfaceAction):
name = 'Generate Catalog'
action_spec = (_('Create catalog'), 'catalog.png', 'Catalog builder', ())
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
def genesis(self):
self.qaction.triggered.connect(self.generate_catalog)
def location_selected(self, loc):
enabled = loc == 'library'
self.qaction.setEnabled(enabled)
def generate_catalog(self):
rows = self.gui.library_view.selectionModel().selectedRows()
if not rows or len(rows) < 2:

View File

@ -138,7 +138,7 @@ class ChooseLibraryAction(InterfaceAction):
name = 'Choose Library'
action_spec = (_('Choose Library'), 'lt.png',
_('Choose calibre library to work with'), None)
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_add_menu = True
action_menu_clone_qaction = _('Switch/create library...')

View File

@ -20,7 +20,7 @@ class ConvertAction(InterfaceAction):
name = 'Convert Books'
action_spec = (_('Convert books'), 'convert.png', None, _('C'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
action_add_menu = True

View File

@ -127,7 +127,7 @@ class CopyToLibraryAction(InterfaceAction):
action_spec = (_('Copy to library'), 'lt.png',
_('Copy selected books to the specified library'), None)
popup_type = QToolButton.InstantPopup
dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
action_add_menu = True

View File

@ -24,7 +24,7 @@ class ShareConnMenu(QMenu): # {{{
config_email = pyqtSignal()
toggle_server = pyqtSignal()
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
def __init__(self, parent=None):
QMenu.__init__(self, parent)

View File

@ -11,7 +11,7 @@ class NextMatchAction(InterfaceAction):
name = 'Move to next highlighted book'
action_spec = (_('Move to next match'), 'arrow-down.png',
_('Move to next highlighted match'), [_('N'), _('F3')])
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
def genesis(self):

View File

@ -13,7 +13,7 @@ class OpenFolderAction(InterfaceAction):
name = 'Open Folder'
action_spec = (_('Open containing folder'), 'document_open.png', None,
_('O'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
def genesis(self):

View File

@ -16,11 +16,15 @@ class PickRandomAction(InterfaceAction):
name = 'Pick Random Book'
action_spec = (_('Pick a random book'), 'random.png',
'Select a random book from your calibre library', ())
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
def genesis(self):
self.qaction.triggered.connect(self.pick_random)
def location_selected(self, loc):
enabled = loc == 'library'
self.qaction.setEnabled(enabled)
def pick_random(self):
pick = random.randint(0, self.gui.library_view.model().rowCount(None))
self.gui.library_view.set_current_row(pick)

View File

@ -11,8 +11,8 @@ from functools import partial
from PyQt4.Qt import QMenu, pyqtSignal
from calibre.utils.config import prefs
from calibre.gui2 import error_dialog, Dispatcher, \
choose_dir, warning_dialog, open_local_file
from calibre.gui2 import (error_dialog, Dispatcher, gprefs,
choose_dir, warning_dialog, open_local_file)
from calibre.gui2.actions import InterfaceAction
from calibre.ebooks import BOOK_EXTENSIONS
@ -141,6 +141,7 @@ class SaveToDiskAction(InterfaceAction):
_('Could not save some books') + ', ' +
_('Click the show details button to see which ones.'),
u'\n\n'.join(failures), show=True)
if gprefs['show_files_after_save']:
open_local_file(path)
def books_saved(self, job):

View File

@ -15,7 +15,7 @@ class ShowBookDetailsAction(InterfaceAction):
name = 'Show Book Details'
action_spec = (_('Show book details'), 'dialog_information.png', None,
_('I'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
def genesis(self):

View File

@ -14,7 +14,7 @@ class ShowQuickviewAction(InterfaceAction):
name = 'Show quickview'
action_spec = (_('Show quickview'), 'search.png', None, _('Q'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
current_instance = None

View File

@ -17,7 +17,7 @@ class TweakEpubAction(InterfaceAction):
action_spec = (_('Tweak ePub'), 'trim.png',
_('Make small changes to ePub format books'),
_('T'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
dont_add_to = frozenset(['context-menu-device'])
action_type = 'current'
def genesis(self):

View File

@ -310,7 +310,7 @@ class CheckLibraryDialog(QDialog):
tl = Item()
tl.setText(0, h)
if fixable:
if fixable and list:
tl.setText(1, _('(fixable)'))
tl.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable)
tl.setCheckState(1, False)

View File

@ -538,14 +538,20 @@ class CoversModel(QAbstractListModel): # {{{
current_cover = QPixmap(I('default_cover.png'))
self.blank = QPixmap(I('blank.png')).scaled(150, 200)
self.cc = current_cover
self.reset_covers(do_reset=False)
self.covers = [self.get_item(_('Current cover'), current_cover)]
def reset_covers(self, do_reset=True):
self.covers = [self.get_item(_('Current cover'), self.cc)]
self.plugin_map = {}
for i, plugin in enumerate(metadata_plugins(['cover'])):
self.covers.append((plugin.name+'\n'+_('Searching...'),
QVariant(self.blank), None, True))
self.plugin_map[plugin] = i+1
if do_reset:
self.reset()
def get_item(self, src, pmap, waiting=False):
sz = '%dx%d'%(pmap.width(), pmap.height())
text = QVariant(src + '\n' + sz)
@ -654,6 +660,9 @@ class CoversView(QListView): # {{{
self.select(0)
self.delegate.start_animation()
def reset_covers(self):
self.m.reset_covers()
def clear_failed(self):
plugin = self.m.plugin_for_index(self.currentIndex())
self.m.clear_failed()
@ -683,12 +692,18 @@ class CoversWidget(QWidget): # {{{
l.addWidget(self.covers_view, 1, 0)
self.continue_processing = True
def reset_covers(self):
self.covers_view.reset_covers()
def start(self, book, current_cover, title, authors):
self.continue_processing = True
self.abort.clear()
self.book, self.current_cover = book, current_cover
self.title, self.authors = title, authors
self.log('Starting cover download for:', book.title)
self.log('Query:', title, authors, self.book.identifiers)
self.msg.setText('<p>'+_('Downloading covers for <b>%s</b>, please wait...')%book.title)
self.msg.setText('<p>'+
_('Downloading covers for <b>%s</b>, please wait...')%book.title)
self.covers_view.start()
self.worker = CoverWorker(self.log, self.abort, self.title,
@ -726,8 +741,9 @@ class CoversWidget(QWidget): # {{{
if num < 2:
txt = _('Could not find any covers for <b>%s</b>')%self.book.title
else:
txt = _('Found <b>%(num)d</b> covers of %(title)s. Pick the one you like'
' best.')%dict(num=num-1, title=self.title)
txt = _('Found <b>%(num)d</b> covers of %(title)s. '
'Pick the one you like best.')%dict(num=num-1,
title=self.title)
self.msg.setText(txt)
self.finished.emit()
@ -832,10 +848,14 @@ class FullFetch(QDialog): # {{{
self.next_button.clicked.connect(self.next_clicked)
self.ok_button = self.bb.button(self.bb.Ok)
self.ok_button.clicked.connect(self.ok_clicked)
self.prev_button = self.bb.addButton(_('Back'), self.bb.ActionRole)
self.prev_button.setIcon(QIcon(I('back.png')))
self.prev_button.clicked.connect(self.back_clicked)
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
self.log_button.clicked.connect(self.view_log)
self.log_button.setIcon(QIcon(I('debug.png')))
self.ok_button.setVisible(False)
self.prev_button.setVisible(False)
self.identify_widget = IdentifyWidget(self.log, self)
self.identify_widget.rejected.connect(self.reject)
@ -857,12 +877,21 @@ class FullFetch(QDialog): # {{{
def book_selected(self, book):
self.next_button.setVisible(False)
self.ok_button.setVisible(True)
self.prev_button.setVisible(True)
self.book = book
self.stack.setCurrentIndex(1)
self.log('\n\n')
self.covers_widget.start(book, self.current_cover,
self.title, self.authors)
def back_clicked(self):
self.next_button.setVisible(True)
self.ok_button.setVisible(False)
self.prev_button.setVisible(False)
self.stack.setCurrentIndex(0)
self.covers_widget.cancel()
self.covers_widget.reset_covers()
def accept(self):
# Prevent the usual dialog accept mechanisms from working
pass

View File

@ -58,7 +58,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.device_to_formats_map = {}
for device in device_plugins():
n = device_name_for_plugboards(device)
self.device_to_formats_map[n] = set(device.FORMATS)
self.device_to_formats_map[n] = set(device.settings().format_map)
if getattr(device, 'CAN_DO_DEVICE_DB_PLUGBOARD', False):
self.device_to_formats_map[n].add('device_db')
if n not in self.devices:

View File

@ -12,6 +12,7 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
from calibre.gui2.preferences.saving_ui import Ui_Form
from calibre.utils.config import ConfigProxy
from calibre.library.save_to_disk import config
from calibre.gui2 import gprefs
class ConfigWidget(ConfigWidgetBase, Ui_Form):
@ -24,6 +25,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf',
'replace_whitespace', 'to_lowercase', 'formats', 'timefmt'):
r(x, self.proxy)
r('show_files_after_save', gprefs)
self.save_template.changed_signal.connect(self.changed_signal.emit)

View File

@ -95,6 +95,13 @@
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_show_files_after_save">
<property name="text">
<string>&amp;Show files in file browser after saving to disk</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>

View File

@ -231,6 +231,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def genesis(self, gui):
self.models = {}
self.what.addItem(_('Click to choose toolbar or menu to customize'),
'blank')
for key, text in self.LOCATIONS:
self.what.addItem(text, key)
all_model = AllModel(key, gui)
@ -247,6 +249,12 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def what_changed(self, idx):
key = unicode(self.what.itemData(idx).toString())
if key == 'blank':
self.actions_widget.setVisible(False)
self.spacer_widget.setVisible(True)
else:
self.actions_widget.setVisible(True)
self.spacer_widget.setVisible(False)
self.all_actions.setModel(self.models[key][0])
self.current_actions.setModel(self.models[key][1])

View File

@ -13,16 +13,19 @@
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" colspan="5">
<widget class="QComboBox" name="what">
<property name="font">
<font>
<pointsize>20</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QLabel" name="label">
<property name="text">
<string>&lt;p&gt;The toolbar in calibre is different depending on whether a device is connected or not. Choose &lt;b&gt;which toolbar&lt;/b&gt; you would like to customize:</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="what">
<property name="toolTip">
<string>Choose the toolbar to customize</string>
</property>
@ -34,7 +37,13 @@
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<item>
<widget class="QWidget" name="actions_widget" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_2">
<property name="margin">
<number>0</number>
</property>
<item>
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>A&amp;vailable actions</string>
@ -42,6 +51,12 @@
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QListView" name="all_actions">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>100</verstretch>
</sizepolicy>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::MultiSelection</enum>
</property>
@ -62,7 +77,7 @@
</layout>
</widget>
</item>
<item row="2" column="2">
<item>
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
<widget class="QToolButton" name="add_action_button">
@ -122,7 +137,7 @@
</item>
</layout>
</item>
<item row="2" column="3" colspan="2">
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>&amp;Current actions</string>
@ -130,6 +145,12 @@
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QListView" name="current_actions">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>100</verstretch>
</sizepolicy>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::MultiSelection</enum>
</property>
@ -210,14 +231,26 @@
</layout>
</widget>
</item>
<item row="1" column="0" colspan="5">
<widget class="QLabel" name="label">
<property name="text">
<string>&lt;p&gt;The toolbar in calibre is different depending on whether a device is connected or not. To customize the toolbar when a device is connected as well as customizing right click menus, &lt;b&gt;click the dropdown above&lt;/b&gt; and select which toolbar/menu you want to customize.</string>
</layout>
</widget>
</item>
<item>
<widget class="QWidget" name="spacer_widget" native="true">
<layout class="QVBoxLayout" name="verticalLayout_5">
<item>
<spacer name="verticalSpacer_3">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="wordWrap">
<bool>true</bool>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>224</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
</layout>

View File

@ -112,8 +112,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
self.opts = opts
self.embedded = embedded
self.state_callback = None
try:
self.max_cover_width, self.max_cover_height = \
map(int, self.opts.max_cover.split('x'))
except:
self.max_cover_width = 1200
self.max_cover_height = 1600
path = P('content_server')
self.build_time = fromtimestamp(os.stat(path).st_mtime)
self.default_cover = open(P('content_server/default_cover.jpg'), 'rb').read()

View File

@ -47,7 +47,8 @@ def normalize_format_name(fmt):
return fmt
def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
return_data=False, compression_quality=90, minify_to=None):
return_data=False, compression_quality=90, minify_to=None,
grayscale=False):
'''
Saves image in data to path, in the format specified by the path
extension. Removes any transparency. If there is no transparency and no
@ -60,7 +61,8 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
compression (lossless).
:param bgcolor: The color for transparent pixels. Must be specified in hex.
:param resize_to: A tuple (width, height) or None for no resizing
:param minify_to: A tuple (width, height) to specify target size. The image
:param minify_to: A tuple (width, height) to specify maximum target size.
:param grayscale: If True, the image is grayscaled
will be resized to fit into this target size. If None the value from the
tweak is used.
@ -71,6 +73,10 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
fmt = os.path.splitext(path)[1]
fmt = normalize_format_name(fmt[1:])
if grayscale:
img.type = "GrayscaleType"
changed = True
if resize_to is not None:
img.size = (resize_to[0], resize_to[1])
changed = True