Merge from trunk

This commit is contained in:
Charles Haley 2011-10-09 13:58:01 +02:00
commit 3ae196df74
53 changed files with 1012 additions and 778 deletions

View File

@ -118,7 +118,7 @@ EBVS
<0x 00 00 00 00> <0x 00 00 00 00>
<0x 00 00 00 10> <0x 00 00 00 10>
...(rest of size of DATA block) ...(rest of size of DATA block)
<0x FD EA = PAD? (ýê)> <0x FD EA = PAD? (ýê)>
DATA DATA
<0x 4 bytes = size of <marked text (see 3rd note)> > <0x 4 bytes = size of <marked text (see 3rd note)> >
<marked text (see 3rd note)> <marked text (see 3rd note)>
@ -155,7 +155,7 @@ EBVS
<0x 00 00 00 00> <0x 00 00 00 00>
<0x 00 00 00 10> <0x 00 00 00 10>
...(rest of size of DATA block) ...(rest of size of DATA block)
<0x FD EA = PAD? (ýê)> <0x FD EA = PAD? (ýê)>
[fi MARK || BOOKMARK] [fi MARK || BOOKMARK]
//------------------------------- //-------------------------------
[if CORRECTION] [if CORRECTION]
@ -174,7 +174,7 @@ EBVS
<0x 00 00 00 00> <0x 00 00 00 00>
<0x 00 00 00 10> <0x 00 00 00 10>
...(rest of size of DATA block) ...(rest of size of DATA block)
<0x FD EA = PAD? (ýê)> <0x FD EA = PAD? (ýê)>
DATA DATA
<0x 4 bytes = size of <marked text (see 3rd note)> > <0x 4 bytes = size of <marked text (see 3rd note)> >
<marked text (see 3rd note)> <marked text (see 3rd note)>
@ -246,7 +246,7 @@ EBVS
<0x 00 00 00 00> <0x 00 00 00 00>
<0x 00 00 00 10> <0x 00 00 00 10>
...(size of DATA block - 30) ...(size of DATA block - 30)
<0x FD EA = PAD? (ýê)> <0x FD EA = PAD? (ýê)>
[fi DRAWING] [fi DRAWING]
//------------------------------- //-------------------------------
[next {NOTE,MARK,CORRECTION,DRAWING}] [next {NOTE,MARK,CORRECTION,DRAWING}]
@ -308,7 +308,7 @@ EBVS
...4 ...4
...4 ...4
...4 ...4
<0x FD EA = PAD? (ýê)> <0x FD EA = PAD? (ýê)>
//-------------------------------------------------------------------- //--------------------------------------------------------------------
// CATEGORY (if any) // CATEGORY (if any)
@ -411,4 +411,4 @@ BKMK
// END OF FILE // END OF FILE
// by idleloop@yahoo.com, v0.2.e, 12/2009 // by idleloop@yahoo.com, v0.2.e, 12/2009
// http://www.angelfire.com/ego2/idleloop // http://www.angelfire.com/ego2/idleloop

View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Archeowiesci(BasicNewsRecipe):
title = u'Archeowiesci'
__author__ = 'fenuks'
category = 'archeology'
language = 'pl'
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'subskrypcja' in article.title:
feed.articles.remove(article)
return feeds

View File

@ -9,9 +9,10 @@ class CGM(BasicNewsRecipe):
category = 'music' category = 'music'
language = 'pl' language = 'pl'
use_embedded_content = False use_embedded_content = False
remove_empty_feeds= True
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheers=True no_stylesheers=True
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;}' extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}'
remove_tags_before=dict(id='mainContent') remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'}) remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}), remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
@ -22,10 +23,12 @@ class CGM(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
ad=soup.findAll('img') for item in soup.findAll(style=True):
del item['style']
ad=soup.findAll('a')
for r in ad: for r in ad:
if '/_vault/_article_photos/5841.jpg' in r['src'] or '_vault/_article_photos/5807.jpg' in r['src'] or 'article_photos/5841.jpg' in r['src'] or 'article_photos/5825.jpg' in r['src'] or '_article_photos/5920.jpg' in r['src'] or '_article_photos/5919.jpg' in r['src'] or '_article_photos/5918.jpg' in r['src'] or '_article_photos/5914.jpg' in r['src'] or '_article_photos/5911.jpg' in r['src'] or '_article_photos/5923.jpg' in r['src'] or '_article_photos/5921.jpg' in r['src']: if 'http://www.hustla.pl' in r['href']:
ad[ad.index(r)].extract() r.extract()
gallery=soup.find('div', attrs={'class':'galleryFlash'}) gallery=soup.find('div', attrs={'class':'galleryFlash'})
if gallery: if gallery:
img=gallery.find('embed') img=gallery.find('embed')

View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317580312(BasicNewsRecipe):
title = u'Dark Horizons'
language = 'en'
__author__ = 'Jaded'
description ='News, images, video clips and reviews of current and upcoming blockbuster films. '
category = 'movies, tv, news'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://a4.sphotos.ak.fbcdn.net/hphotos-ak-ash2/164168_148419801879765_148410081880737_225532_464073_n.jpg'
masthead_url = 'http://www.darkhorizons.com/graphics/2/logo_print.png'
auto_cleanup = True
feeds = [(u'News', u'http://www.darkhorizons.com/feeds/news.atom'), (u'Features', u'http://www.darkhorizons.com/feeds/features.atom'), (u'Reviews', u'http://www.darkhorizons.com/feeds/reviews.atom')]

View File

@ -22,6 +22,10 @@ class Descopera(BasicNewsRecipe):
category = 'Ziare,Reviste,Descopera' category = 'Ziare,Reviste,Descopera'
encoding = 'utf-8' encoding = 'utf-8'
cover_url = 'http://www.descopera.ro/images/header_images/logo.gif' cover_url = 'http://www.descopera.ro/images/header_images/logo.gif'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -30,28 +34,6 @@ class Descopera(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [
dict(name='h1', attrs={'style':'font-family: Arial,Helvetica,sans-serif; font-size: 18px; color: rgb(51, 51, 51); font-weight: bold; margin: 10px 0pt; clear: both; float: left;width: 610px;'})
,dict(name='div', attrs={'style':'margin-right: 15px; margin-bottom: 15px; float: left;'})
, dict(name='p', attrs={'id':'itemDescription'})
,dict(name='div', attrs={'id':'itemBody'})
]
remove_tags = [
dict(name='div', attrs={'class':['tools']})
, dict(name='div', attrs={'class':['share']})
, dict(name='div', attrs={'class':['category']})
, dict(name='div', attrs={'id':['comments']})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comments'})
]
feeds = [ feeds = [
(u'Feeds', u'http://www.descopera.ro/rss') (u'Feeds', u'http://www.descopera.ro/rss')
] ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

23
recipes/eioba.recipe Normal file
View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class eioba(BasicNewsRecipe):
title = u'eioba'
__author__ = 'fenuks'
cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png'
language = 'pl'
oldest_article = 7
remove_empty_feeds= True
max_articles_per_feed = 100
extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}'
keep_only_tags=[dict(id=['ctl0_body_Topic', 'articleContent'])]
feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'),
(u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'),
(u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'),
(u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'),
(u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'),
(u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'),
(u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'),
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
]

View File

@ -19,45 +19,20 @@ class FazNet(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
remove_javascript = True remove_javascript = True
keep_only_tags = [{'class':'FAZArtikelEinleitung'},
html2lrf_options = [ {'id':'ArtikelTabContent_0'}]
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
remove_tags = [
dict(name=['object','link','embed','base'])
,dict(name='div',
attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo',
'ArtikelServices', 'ModulLesermeinungenFooter',
'ModulArtikelServices', 'BoxTool Aufklappen_Grau',
'SocialMediaUnten', ]}),
dict(id=['KurzLinkMenu', 'ArtikelServicesMenu']),
]
feeds = [ feeds = [
('FAZ.NET Aktuell', 'http://www.faz.net/s/RubF3CE08B362D244869BE7984590CB6AC1/Tpl~Epartner~SRss_.xml'), ('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'),
('Politik', 'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'), ('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'),
('Wirtschaft', 'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'), ('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'),
('Feuilleton', 'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'), ('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'),
('Sport', 'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'), ('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'),
('Gesellschaft', 'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'), ('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'),
('Finanzen', 'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'), ('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'),
('Wissen', 'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'), ('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'),
('Reise', 'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'), ('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'),
('Technik & Motor', 'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'), ('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'),
('Beruf & Chance', 'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml') ('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1')
] ]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
del soup.body['onload']
for item in soup.findAll(style=True):
del item['style']
return soup

66
recipes/focus_pl.recipe Normal file
View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Focus_pl(BasicNewsRecipe):
title = u'Focus.pl'
oldest_article = 15
max_articles_per_feed = 100
__author__ = 'fenuks'
language = 'pl'
description ='polish scientific monthly magazine'
category='magazine'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
remove_tags_after=dict(name='div', attrs={'class':'clear'})
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
]
def skip_ad_pages(self, soup):
tag=soup.find(name='a')
if tag:
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
return new_soup
def append_page(self, appendtag):
tag=appendtag.find(name='div', attrs={'class':'arrows'})
if tag:
nexturl='http://www.focus.pl/'+tag.a['href']
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
nexturl=None
pagetext=soup2.find(name='div', attrs={'class':'txt'})
tag=pagetext.find(name='div', attrs={'class':'arrows'})
for r in tag.findAll(name='a'):
if u'Następne' in r.string:
nexturl='http://www.focus.pl/'+r['href']
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def get_cover_url(self):
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
tag=soup.find(name='div', attrs={'class':'clr fl'})
if tag:
self.cover_url='http://www.focus.pl/' + tag.a['href']
return getattr(self, 'cover_url', self.cover_url)
def preprocess_html(self, soup):
self.append_page(soup.body)
return soup

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
__author__ = 'fenuks'
cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
language = 'pl'
description ='news from gazeta.pl'
category='newspaper'
INDEX='http://wyborcza.pl'
remove_empty_feeds= True
oldest_article = 3
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
remove_tags_before=dict(id='k0')
remove_tags_after=dict(id='banP4')
remove_tags=[dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop=False
tag = soup.find('div', attrs={'id':'Str'})
if appendtag.find('div', attrs={'id':'Str'}):
nexturl=tag.findAll('a')
appendtag.find('div', attrs={'id':'Str'}).extract()
loop=True
if appendtag.find(id='source'):
appendtag.find(id='source').extract()
while loop:
loop=False
for link in nexturl:
if u'następne' in link.string:
url= self.INDEX + link['href']
soup2 = self.index_to_soup(url)
pagetext = soup2.find(id='artykul')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag = soup2.find('div', attrs={'id':'Str'})
nexturl=tag.findAll('a')
loop=True
def gallery_article(self, appendtag):
tag=appendtag.find(id='container_gal')
if tag:
nexturl=appendtag.find(id='gal_btn_next').a['href']
appendtag.find(id='gal_navi').extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
pagetext=soup2.find(id='container_gal')
nexturl=pagetext.find(id='gal_btn_next')
if nexturl:
nexturl=nexturl.a['href']
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(id='gal_navi')
if rem:
rem.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup
def print_version(self, url):
if 'http://wyborcza.biz/biznes/' not in url:
return url
else:
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')

View File

@ -9,8 +9,17 @@ class Gram_pl(BasicNewsRecipe):
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])] remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})] keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'), feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')] (u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
feed.articles.remove(article)
return feeds

View File

@ -1,7 +1,9 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe(BasicNewsRecipe): class AdvancedUserRecipe(BasicNewsRecipe):
title = 'Heise-online' title = 'heise online'
description = 'News vom Heise-Verlag' description = 'News vom Heise-Verlag'
__author__ = 'schuster' __author__ = 'schuster'
use_embedded_content = False use_embedded_content = False
@ -12,10 +14,11 @@ class AdvancedUserRecipe(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
timeout = 5 timeout = 5
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8'
remove_tags_after = dict(name ='p', attrs={'class':'editor'}) remove_tags_after = dict(name ='p', attrs={'class':'editor'})
remove_tags = [dict(id='navi_top_container'), remove_tags = [{'class':'navi_top_container'},
dict(id='navi_bottom'), dict(id='navi_bottom'),
dict(id='mitte_rechts'), dict(id='mitte_rechts'),
dict(id='navigation'), dict(id='navigation'),
@ -25,28 +28,28 @@ class AdvancedUserRecipe(BasicNewsRecipe):
dict(id='content_foren'), dict(id='content_foren'),
dict(id='seiten_navi'), dict(id='seiten_navi'),
dict(id='adbottom'), dict(id='adbottom'),
dict(id='sitemap')] dict(id='sitemap'),
dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
]
feeds = [ feeds = [
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'), ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
('iX', 'http://www.heise.de/ix/news/news.rdf'), ('iX', 'http://www.heise.de/ix/news/news.rdf'),
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'), ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
('Security', 'http://www.heise.de/security/news/news-atom.xml'),
('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'), ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'), ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'), ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'), ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf') ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
] ]
def print_version(self, url): def print_version(self, url):
return url + '?view=print' return url + '?view=print'

View File

@ -18,6 +18,7 @@ class HoustonChronicle(BasicNewsRecipe):
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
'hst-articletext' in x or 'hst-galleryitem' in x)} 'hst-articletext' in x or 'hst-galleryitem' in x)}
remove_attributes = ['xmlns']
feeds = [ feeds = [
('News', "http://www.chron.com/rss/feed/News-270.php"), ('News', "http://www.chron.com/rss/feed/News-270.php"),

Binary file not shown.

After

Width:  |  Height:  |  Size: 718 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 399 B

BIN
recipes/icons/eioba.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 908 B

BIN
recipes/icons/focus_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 695 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 221 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 320 B

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Konflikty(BasicNewsRecipe):
title = u'Konflikty Zbrojne'
__author__ = 'fenuks'
cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg'
language = 'pl'
description ='military news'
category='military, history'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]

View File

@ -5,30 +5,46 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
description = 'News as provide by The Metro -UK' description = 'News as provide by The Metro -UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
no_stylesheets = True no_stylesheets = True
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 25 max_articles_per_feed = 20
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')] #preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
preprocess_regexps = [
(re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
preprocess_regexps = [
(re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]
language = 'en_GB' language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif' masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
extra_css = 'h2 {font: sans-serif medium;}'
keep_only_tags = [ keep_only_tags = [
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}), dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
dict(attrs={'class':['img-cnt figure']}), dict(attrs={'class':['img-cnt figure']}),
dict(attrs={'class':['art-img']}), dict(attrs={'class':['art-img']}),
dict(name='div', attrs={'class':'art-lft'}),
dict(name='div', attrs={'class':'art-lft'}) dict(name='p')
] ]
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap', remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}), 'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']}) dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
] ,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
]
feeds = [ feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')] (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
extra_css = '''
body {font: sans-serif medium;}'
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
span{ font-size:9.5px; font-weight:bold;font-style:italic}
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''

View File

@ -6,19 +6,24 @@ __Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False".
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles # Turn below to True if your device supports display of CJK titles
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Set it to False if you want to skip images # Set it to False if you want to skip images
__KeepImages__ = True __KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
__UseLife__ = True __UseLife__ = True
# (HK only) if __UseLife__ is true, turn this on if you want to include the column section # (HK only) It is to disable the column section which is now a premium content
__InclCols__ = False __InclCols__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
__ParsePFF__ = False
# (HK only) Turn below to True if you wish hi-res images
__HiResImg__ = False
''' '''
Change Log: Change Log:
2011/09/21: fetching "column" section is made optional. Default is False 2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly. 2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free. 2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -42,7 +47,7 @@ Change Log:
2010/10/31: skip repeated articles in section pages 2010/10/31: skip repeated articles in section pages
''' '''
import os, datetime, re import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -56,7 +61,7 @@ class MPRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong' title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'), keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
@ -147,43 +152,6 @@ class MPRecipe(BasicNewsRecipe):
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
@ -260,15 +228,16 @@ class MPRecipe(BasicNewsRecipe):
else: else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
# special- editorial # special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles: #if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) # feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -279,20 +248,39 @@ class MPRecipe(BasicNewsRecipe):
# special - finance # special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles: #if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) # feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: articles = self.parse_section2(url, keystr)
articles = self.parse_section(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
# if articles:
# feeds.append((title, articles))
# special - entertainment # special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles: #if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) # feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
if __InclCols__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -300,11 +288,6 @@ class MPRecipe(BasicNewsRecipe):
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'), (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -348,6 +331,16 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(a) title = self.tag_to_string(a)
url = a.get('href', False) url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
url = url.replace('%2Etxt', '_print.htm')
url = url.replace('%5F', '_')
else:
url = url.replace('.htm', '_print.htm')
if url not in included_urls and url.rfind('Redirect') == -1: if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url) included_urls.append(url)
@ -472,38 +465,119 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# preprocess those .txt based files # preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):
if url.rfind('ftp') == -1: #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
raw_html = raw_html.replace(img, newimg)
else:
# if not found, insert _ after "
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
gifimg = img.replace('jpg\'', 'gif\'')
try:
#print 'Original: ', url
#print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
gifurl = re.sub(r'dailynews.*txt', '', url)
#print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
#print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
#br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
#print 'GIF not found'
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'newimg: ', newimg
raw_html = raw_html.replace(img, newimg)
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
return raw_html return raw_html
else: else:
splitter = re.compile(r'\n') # Match non-digits if url.rfind('_print.htm') <> -1:
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">' # javascript based file
next_is_img_txt = False splitter = re.compile(r'\n')
title_started = False new_raw_html = '<html><head><title>Untitled</title></head>'
met_article_start_char = False new_raw_html = new_raw_html + '<body>'
for item in splitter.split(raw_html): for item in splitter.split(raw_html):
if item.startswith(u'\u3010'): if item.startswith('var heading1 ='):
met_article_start_char = True heading = item.replace('var heading1 = \'', '')
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n' heading = heading.replace('\'', '')
else: heading = heading.replace(';', '')
if next_is_img_txt == False: new_raw_html = new_raw_html + '<div class="heading">' + heading
if item.startswith('='): if item.startswith('var heading2 ='):
next_is_img_txt = True heading = item.replace('var heading2 = \'', '')
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n' heading = heading.replace('\'', '')
heading = heading.replace(';', '')
if heading <> '':
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
else: else:
if met_article_start_char == False: new_raw_html = new_raw_html + '</div>'
if title_started == False: if item.startswith('var content ='):
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n' content = item.replace("var content = ", '')
title_started = True content = content.replace('\'', '')
else: content = content.replace(';', '')
new_raw_html = new_raw_html + item + '\n' new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
else: if item.startswith('var photocontent ='):
new_raw_html = new_raw_html + item + '<p>\n' photo = item.replace('var photocontent = \'', '')
photo = photo.replace('\'', '')
photo = photo.replace(';', '')
photo = photo.replace('<tr>', '')
photo = photo.replace('<td>', '')
photo = photo.replace('</tr>', '')
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
return new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
met_article_start_char = False
for item in splitter.split(raw_html):
if item.startswith(u'\u3010'):
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else: else:
next_is_img_txt = False if next_is_img_txt == False:
new_raw_html = new_raw_html + item + '\n' if item.startswith('='):
return new_raw_html + '</div></body></html>' next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if met_article_start_char == False:
if title_started == False:
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
return new_raw_html + '</div></body></html>'
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
@ -604,7 +678,7 @@ class MPRecipe(BasicNewsRecipe):
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
play_order=po, author=auth, description=desc) play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:

15
recipes/naczytniki.recipe Normal file
View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class naczytniki(BasicNewsRecipe):
title = u'naczytniki.pl'
__author__ = 'fenuks'
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
language = 'pl'
description ='everything about e-readers'
category='readers'
oldest_article = 7
max_articles_per_feed = 100
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Nowa_Fantastyka(BasicNewsRecipe):
title = u'Nowa Fantastyka'
oldest_article = 7
__author__ = 'fenuks'
language = 'pl'
description ='site for fantasy readers'
category='fantasy'
max_articles_per_feed = 100
INDEX='http://www.fantastyka.pl/'
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
remove_tags=[dict(attrs={'class':'avatar2'})]
feeds = []
def find_articles(self, url):
articles = []
soup=self.index_to_soup(url)
tag=soup.find(attrs={'class':'belka1-tlo-m'})
art=tag.findAll(name='a', attrs={'class':'a-box'})
for i in art:
title=i.string
url=self.INDEX+i['href']
#date=soup.find(id='footer').ul.li.string[41:-1]
articles.append({'title' : title,
'url' : url,
'date' : '',
'description' : ''
})
return articles
def parse_index(self):
feeds = []
feeds.append((u"Opowiadania", self.find_articles('http://www.fantastyka.pl/3.html')))
feeds.append((u"Publicystyka", self.find_articles('http://www.fantastyka.pl/6.html')))
feeds.append((u"Hype Park", self.find_articles('http://www.fantastyka.pl/9.html')))
return feeds
def get_cover_url(self):
soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
cover=soup.find(name='img', attrs={'class':'okladka'})
self.cover_url=self.INDEX+ cover['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class RevistaPiaui(BasicNewsRecipe):
title = u'Revista piau\xed'
language = 'pt_BR'
__author__ = u'Eduardo Gustini Simões'
oldest_article = 31
max_articles_per_feed = 50
auto_cleanup = True
feeds = [(u'Edi\xe7\xe3o Atual', u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
soup = self.index_to_soup('http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')
itemTitle = article.title.partition('|')[0].rstrip()
item = soup.find(text=itemTitle)
articleDescription = item.parent.parent.description.string.partition('<br />')[2]
article.summary = articleDescription
return feeds
def populate_article_metadata(self, article, soup, first):
h2 = soup.find('h2')
h2.string.replaceWith(h2.string.partition('|')[0].rstrip())
h2.replaceWith(h2.prettify() + '<p><em>' + article.summary + '</em></p><p><em>' + ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '</em></p>')

View File

@ -9,285 +9,79 @@ calibre recipe for slate.com
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag
class Slate(BasicNewsRecipe): class Slate(BasicNewsRecipe):
# Method variables for customizing downloads
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.' description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
__author__ = 'GRiker, Sujata Raman and Nick Redding' __author__ = 'Kovid Goyal'
max_articles_per_feed = 100
oldest_article = 14
recursions = 0
delay = 0
simultaneous_downloads = 5
timeout = 120.0
timefmt = '' timefmt = ''
feeds = None
no_stylesheets = True no_stylesheets = True
encoding = None
language = 'en' language = 'en'
title = 'Slate'
INDEX = 'http://slate.com'
encoding = 'utf-8'
preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda x: ''),
(re.compile(r'^.*?<html', re.DOTALL), lambda x:'<html'),
(re.compile(r'<meta[^>]+?/>', re.DOTALL), lambda x:''),
]
remove_tags = [
{'name':['link', 'script']},
{'class':['share-box-flank', 'sl-crumbs', 'sl-tbar',
'sl-chunky-tbar']},
]
remove_tags_after = [{'class':'sl-art-creds-cntr'}]
keep_only_tags = {'class':'sl-body-wrapper'}
remove_attributes = ['style']
slate_complete = True def print_version(self, url):
if slate_complete: return url.replace('.html', '.single.html')
title = 'Slate (complete)'
else:
title = 'Slate (weekly)'
# Method variables for customizing feed parsing def parse_index(self) :
summary_length = 250
use_embedded_content = None
# Method variables for pre/post processing of HTML
preprocess_regexps = [ (re.compile(r'<p><em>Disclosure: <strong>Slate</strong> is owned by the Washington Post.*</p>',
re.DOTALL|re.IGNORECASE),
lambda match: ''),
(re.compile(r'<p><strong><em>Join the discussion about this story on.*</p>',
re.DOTALL|re.IGNORECASE),
lambda match: '') ]
match_regexps = []
# The second entry is for 'Big Money', which comes from a different site, uses different markup
keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body']}),
dict(attrs={ 'id':['content']}) ]
# The second entry is for 'Big Money', which comes from a different site, uses different markup
remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper',
'article_bottom_tools_cntr','fray_article_discussion','fray_article_links','bottom_sponsored_links','author_bio',
'bizbox_links_bottom','ris_links_wrapper','BOXXLE',
'comments_button','add_comments_button','comments-to-fray','marriott_ad',
'article_bottom_tools','recommend_tab2','fbog_article_bottom_cntr']}),
dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ]
excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
excludedTitleKeywords = ['Gabfest','Slate V','on Twitter']
excludedAuthorKeywords = []
excludedContentKeywords = ['http://twitter.com/Slate']
extra_css = '''
.h1_subhead{font-family:Arial; font-size:small; }
h1{font-family:Verdana; font-size:large; }
.byline {font-family:Georgia; margin-bottom: 0px; }
.dateline {font-family:Arial; font-size: smaller; height: 0pt;}
.imagewrapper {font-family:Verdana;font-size:x-small; }
.source {font-family:Verdana; font-size:x-small;}
.credit {font-family:Verdana; font-size: smaller;}
#article_body {font-family:Verdana; }
#content {font-family:Arial; }
.caption{font-family:Verdana;font-style:italic; font-size:x-small;}
h3{font-family:Arial; font-size:small}
'''
# Local variables to extend class
baseURL = 'http://slate.com'
section_dates = []
# class extension methods
def tag_to_strings(self, tag):
if not tag:
return ''
if isinstance(tag, basestring):
return tag
strings = []
for item in tag.contents:
if isinstance(item, (NavigableString, CData)):
strings.append(item.string)
elif isinstance(item, Tag):
res = self.tag_to_string(item,use_alt=False)
if res:
strings.append(res)
return strings
def extract_named_sections(self):
soup = self.index_to_soup( self.baseURL )
soup_nav_bar = soup.find(True, attrs={'id':'nav'})
briefing_nav = soup.find('li')
briefing_url = briefing_nav.a['href']
for section_nav in soup_nav_bar.findAll('li'):
section_name = self.tag_to_string(section_nav,use_alt=False)
self.section_dates.append(section_name)
soup = self.index_to_soup(briefing_url)
self.log("Briefing url = %s " % briefing_url)
section_lists = soup.findAll('ul','view_links_list')
sections = []
for section in section_lists :
sections.append(section)
return sections
def extract_dated_sections(self):
soup = self.index_to_soup( self.baseURL )
soup_top_stories = soup.find(True, attrs={'id':'tap3_cntr'})
if soup_top_stories:
self.section_dates.append("Top Stories")
self.log("SELECTION TOP STORIES %s" % "Top Stories")
soup = soup.find(True, attrs={'id':'toc_links_container'})
todays_section = soup.find(True, attrs={'class':'todaydateline'})
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
self.log("SELECTION DATE %s" % self.tag_to_string(todays_section,use_alt=False))
older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
for older_section in older_section_dates :
self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
self.log("SELECTION DATE %s" % self.tag_to_string(older_section,use_alt=False))
if soup_top_stories:
headline_stories = soup_top_stories
self.log("HAVE top_stories")
else:
headline_stories = None
self.log("NO top_stories")
section_lists = soup.findAll('ul')
# Prepend the headlines to the first section
if headline_stories:
section_lists.insert(0,headline_stories)
sections = []
for section in section_lists :
sections.append(section)
return sections
def extract_section_articles(self, sections_html) :
# Find the containers with section content
sections = sections_html
articles = {}
key = None
ans = [] ans = []
for sectitle, url in (
for (i,section) in enumerate(sections) : ('News & Politics', '/articles/news_and_politics.html'),
('Technology', '/articles/technology.html'),
# Get the section name ('Business', '/articles/business.html'),
if section.has_key('id') : ('Arts', '/articles/arts.html'),
self.log("PROCESSING SECTION id = %s" % section['id']) ('Life', '/articles/life.html'),
key = self.section_dates[i] ('Health & Science', '/articles/health_and_science.html'),
if key.startswith("Pod"): ('Sports', '/articles/sports.html'),
continue ('Double X', '/articles/double_x.html'),
if key.startswith("Blog"): ):
continue url = self.INDEX + url
articles[key] = [] self.log('Found section:', sectitle)
ans.append(key) articles = self.slate_section_articles(self.index_to_soup(url))
elif self.slate_complete: if articles:
key = self.section_dates[i] ans.append((sectitle, articles))
if key.startswith("Pod"):
continue
if key.startswith("Blog"):
continue
self.log("PROCESSING SECTION name = %s" % key)
articles[key] = []
ans.append(key)
else :
self.log("SECTION %d HAS NO id" % i);
continue
# Get the section article_list
article_list = section.findAll('li')
# Extract the article attributes
for article in article_list :
bylines = self.tag_to_strings(article)
url = article.a['href']
title = bylines[0]
full_title = self.tag_to_string(article,use_alt=False)
#self.log("ARTICLE TITLE%s" % title)
#self.log("ARTICLE FULL_TITLE%s" % full_title)
#self.log("URL %s" % url)
author = None
description = None
pubdate = None
if len(bylines) == 2 and self.tag_to_string(article).find("Today's Papers") > 0 :
description = "A summary of what's in the major U.S. newspapers."
if len(bylines) == 3 :
author = bylines[2].strip()
author = re.sub('[\r][\n][\t][\t\t]','', author)
author = re.sub(',','', author)
if bylines[1] is not None :
description = bylines[1]
full_byline = self.tag_to_string(article)
if full_byline.find('major U.S. newspapers') > 0 :
description = "A summary of what's in the major U.S. newspapers."
if len(bylines) > 3 and author is not None:
author += " | "
for (i,substring) in enumerate(bylines[3:]) :
#print "substring: %s" % substring.encode('cp1252')
author += substring.strip()
if i < len(bylines[3:]) :
author += " | "
# Skip articles whose descriptions contain excluded keywords
if description is not None and len(self.excludedDescriptionKeywords):
excluded = re.compile('|'.join(self.excludedDescriptionKeywords))
found_excluded = excluded.search(description)
if found_excluded :
self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
continue
# Skip articles whose title contain excluded keywords
if full_title is not None and len(self.excludedTitleKeywords):
excluded = re.compile('|'.join(self.excludedTitleKeywords))
#self.log("evaluating full_title: %s" % full_title)
found_excluded = excluded.search(full_title)
if found_excluded :
self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
continue
# Skip articles whose author contain excluded keywords
if author is not None and len(self.excludedAuthorKeywords):
excluded = re.compile('|'.join(self.excludedAuthorKeywords))
found_excluded = excluded.search(author)
if found_excluded :
self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
continue
skip_this_article = False
# Check to make sure we're not adding a duplicate
for article in articles[key] :
if article['url'] == url :
skip_this_article = True
self.log("SKIPPING DUP %s" % url)
break
if skip_this_article :
continue
# Build the dictionary entry for this article
feed = key
if not articles.has_key(feed) :
articles[feed] = []
articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
author=author, content=''))
#self.log("KEY %s" % feed)
#self.log("APPENDED %s" % url)
# Promote 'newspapers' to top
for (i,article) in enumerate(articles[feed]) :
if article['description'] is not None :
if article['description'].find('newspapers') > 0 :
articles[feed].insert(0,articles[feed].pop(i))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans
def print_version(self, url) : def slate_section_articles(self, soup):
return url + 'pagenum/all/' cont = soup.find('div', id='most_read')
seen = set()
# Class methods ans = []
def parse_index(self) : for h4 in cont.findAll('h4'):
if self.slate_complete: a = h4.find('a', href=True)
sections = self.extract_named_sections() if a is None: continue
else: url = a['href']
sections = self.extract_dated_sections() if url.startswith('/'):
section_list = self.extract_section_articles(sections) url = self.INDEX + url
return section_list if url in seen: continue
seen.add(url)
title = self.tag_to_string(a)
parent = h4.parent
h3 = parent.find('h3')
desc = ''
if h3 is not None:
desc = self.tag_to_string(h3)
a = parent.find('a', rel='author')
if a is not None:
a = self.tag_to_string(a)
art = {'title':title, 'description':desc, 'date':'', 'url':url}
if a:
art['author'] = a
self.log('\tFound article:', title, ' by ', a)
ans.append(art)
return ans
def get_masthead_url(self): def get_masthead_url(self):
masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif' masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif'
@ -299,153 +93,4 @@ class Slate(BasicNewsRecipe):
masthead = None masthead = None
return masthead return masthead
def stripAnchors(self,soup):
body = soup.find('div',attrs={'id':['article_body','content']})
if body is not None:
paras = body.findAll('p')
if paras is not None:
for para in paras:
aTags = para.findAll('a')
if aTags is not None:
for a in aTags:
if a.img is None:
#print repr(a.renderContents())
a.replaceWith(a.renderContents().decode('utf-8','replace'))
return soup
def preprocess_html(self, soup) :
# Remove 'grayPlus4.png' images
imgs = soup.findAll('img')
if imgs is not None:
for img in imgs:
if re.search("grayPlus4.png",str(img)):
img.extract()
# Delete article based upon content keywords
if len(self.excludedDescriptionKeywords):
excluded = re.compile('|'.join(self.excludedContentKeywords))
found_excluded = excluded.search(str(soup))
if found_excluded :
print "No allowed content found, removing article"
raise Exception('Rejected article')
# Articles from www.thebigmoney.com use different tagging for byline, dateline and body
head = soup.find('head')
if head.link is not None and re.search('www\.thebigmoney\.com', str(head)):
byline = soup.find('div',attrs={'id':'byline'})
if byline is not None:
byline['class'] = byline['id']
dateline = soup.find('div',attrs={'id':'dateline'})
if dateline is not None:
dateline['class'] = dateline['id']
body = soup.find('div',attrs={'id':'content'})
if body is not None:
body['class'] = 'article_body'
# Synthesize a department kicker
h3Tag = Tag(soup,'h3')
emTag = Tag(soup,'em')
emTag.insert(0,NavigableString("the big money: Today's business press"))
h3Tag.insert(0,emTag)
soup.body.insert(0,h3Tag)
# Strip anchors from HTML
return self.stripAnchors(soup)
def postprocess_html(self, soup, first_fetch) :
# Fix up dept_kicker as <h3><em>
dept_kicker = soup.find('div', attrs={'class':'department_kicker'})
if dept_kicker is not None :
kicker_strings = self.tag_to_strings(dept_kicker)
kicker = ''.join(kicker_strings[2:])
kicker = re.sub('\.','',kicker)
h3Tag = Tag(soup, "h3")
emTag = Tag(soup, "em")
emTag.insert(0,NavigableString(kicker))
h3Tag.insert(0, emTag)
dept_kicker.replaceWith(h3Tag)
else:
self.log("No kicker--return null")
return None
# Fix up the concatenated byline and dateline
byline = soup.find(True,attrs={'class':'byline'})
if byline is not None :
bylineTag = Tag(soup,'div')
bylineTag['class'] = 'byline'
#bylineTag['height'] = '0em'
bylineTag.insert(0,self.tag_to_string(byline))
byline.replaceWith(bylineTag)
dateline = soup.find(True, attrs={'class':'dateline'})
if dateline is not None :
datelineTag = Tag(soup, 'div')
datelineTag['class'] = 'dateline'
#datelineTag['margin-top'] = '0em'
datelineTag.insert(0,self.tag_to_string(dateline))
dateline.replaceWith(datelineTag)
# Change captions to italic, add <hr>
for caption in soup.findAll(True, {'class':'caption'}) :
if caption is not None:
emTag = Tag(soup, "em")
emTag.insert(0, '<br />' + self.tag_to_string(caption))
hrTag = Tag(soup, 'hr')
emTag.insert(1, hrTag)
caption.replaceWith(emTag)
# Fix photos
for photo in soup.findAll('span',attrs={'class':'imagewrapper'}):
if photo.a is not None and photo.a.img is not None:
divTag = Tag(soup,'div')
divTag['class'] ='imagewrapper'
divTag.insert(0,photo.a.img)
photo.replaceWith(divTag)
return soup
def postprocess_book(self, oeb, opts, log) :
def extract_byline(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find(True,attrs={'class':'byline'})
if byline is not None:
return self.tag_to_string(byline,use_alt=False)
else :
return None
def extract_description(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
paragraphs = soup.findAll('p')
for p in paragraphs :
if self.tag_to_string(p,use_alt=False).startswith('By ') or \
self.tag_to_string(p,use_alt=False).startswith('Posted '):
continue
comment = p.find(text=lambda text:isinstance(text, Comment))
if comment is not None:
continue
else:
return self.tag_to_string(p,use_alt=False)[:self.summary_length] + '...'
return None
# Method entry point here
# Single section toc looks different than multi-section tocs
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)

View File

@ -9,4 +9,6 @@ class Tablety_pl(BasicNewsRecipe):
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
keep_only_tags=[dict(name='header', attrs={'class':'entry-header'}), dict(name='div', attrs={'class':'entry-content clearfix'})]
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'}), dict(name='span', attrs={'class':'dsq-postid'})]
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')] feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]

View File

@ -278,6 +278,8 @@ def get_proxies(debug=True):
continue continue
if proxy.startswith(key+'://'): if proxy.startswith(key+'://'):
proxy = proxy[len(key)+3:] proxy = proxy[len(key)+3:]
if key == 'https' and proxy.startswith('http://'):
proxy = proxy[7:]
if proxy.endswith('/'): if proxy.endswith('/'):
proxy = proxy[:-1] proxy = proxy[:-1]
if len(proxy) > 4: if len(proxy) > 4:

View File

@ -437,8 +437,8 @@ class TabletOutput(iPadOutput):
short_name = 'tablet' short_name = 'tablet'
description = _('Intended for generic tablet devices, does no resizing of images') description = _('Intended for generic tablet devices, does no resizing of images')
screen_size = (sys.maxint, sys.maxint) screen_size = (10000, 10000)
comic_screen_size = (sys.maxint, sys.maxint) comic_screen_size = (10000, 10000)
class SamsungGalaxy(TabletOutput): class SamsungGalaxy(TabletOutput):
name = 'Samsung Galaxy' name = 'Samsung Galaxy'

View File

@ -414,7 +414,8 @@ class DevicePlugin(Plugin):
@classmethod @classmethod
def config_widget(cls): def config_widget(cls):
''' '''
Should return a QWidget. The QWidget contains the settings for the device interface Should return a QWidget. The QWidget contains the settings for the
device interface
''' '''
raise NotImplementedError() raise NotImplementedError()
@ -429,8 +430,9 @@ class DevicePlugin(Plugin):
@classmethod @classmethod
def settings(cls): def settings(cls):
''' '''
Should return an opts object. The opts object should have at least one attribute Should return an opts object. The opts object should have at least one
`format_map` which is an ordered list of formats for the device. attribute `format_map` which is an ordered list of formats for the
device.
''' '''
raise NotImplementedError() raise NotImplementedError()

View File

@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
import os import os
import sqlite3 as sqlite import sqlite3 as sqlite
from contextlib import closing from contextlib import closing
from calibre.devices.usbms.books import BookList from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book from calibre.devices.kobo.books import Book
from calibre.devices.kobo.books import ImageWrapper from calibre.devices.kobo.books import ImageWrapper
@ -16,6 +15,7 @@ from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS, debug_print from calibre.devices.usbms.driver import USBMS, debug_print
from calibre import prints from calibre import prints
from calibre.devices.usbms.books import CollectionsBookList from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.magick.draw import save_cover_data_to
class KOBO(USBMS): class KOBO(USBMS):
@ -53,11 +53,23 @@ class KOBO(USBMS):
_('The Kobo supports several collections including ')+\ _('The Kobo supports several collections including ')+\
'Read, Closed, Im_Reading. ' +\ 'Read, Closed, Im_Reading. ' +\
_('Create tags for automatic management'), _('Create tags for automatic management'),
] _('Upload covers for books (newer readers)') +
':::'+_('Normally, the KOBO readers get the cover image from the'
' ebook file itself. With this option, calibre will send a '
'separate cover image to the reader, useful if you '
'have modified the cover.'),
_('Upload Black and White Covers')
]
EXTRA_CUSTOMIZATION_DEFAULT = [', '.join(['tags'])] EXTRA_CUSTOMIZATION_DEFAULT = [
', '.join(['tags']),
True,
True
]
OPT_COLLECTIONS = 0 OPT_COLLECTIONS = 0
OPT_UPLOAD_COVERS = 1
OPT_UPLOAD_GRAYSCALE_COVERS = 2
def initialize(self): def initialize(self):
USBMS.initialize(self) USBMS.initialize(self)
@ -593,7 +605,7 @@ class KOBO(USBMS):
raise raise
else: else:
connection.commit() connection.commit()
debug_print(' Commit: Reset ReadStatus list') # debug_print(' Commit: Reset ReadStatus list')
cursor.close() cursor.close()
@ -616,7 +628,7 @@ class KOBO(USBMS):
raise raise
else: else:
connection.commit() connection.commit()
debug_print(' Commit: Setting ReadStatus List') # debug_print(' Commit: Setting ReadStatus List')
cursor.close() cursor.close()
def reset_favouritesindex(self, connection, oncard): def reset_favouritesindex(self, connection, oncard):
@ -635,7 +647,7 @@ class KOBO(USBMS):
raise raise
else: else:
connection.commit() connection.commit()
debug_print(' Commit: Reset FavouritesIndex list') # debug_print(' Commit: Reset FavouritesIndex list')
def set_favouritesindex(self, connection, ContentID): def set_favouritesindex(self, connection, ContentID):
cursor = connection.cursor() cursor = connection.cursor()
@ -650,7 +662,7 @@ class KOBO(USBMS):
raise raise
else: else:
connection.commit() connection.commit()
debug_print(' Commit: Set FavouritesIndex') # debug_print(' Commit: Set FavouritesIndex')
def update_device_database_collections(self, booklists, collections_attributes, oncard): def update_device_database_collections(self, booklists, collections_attributes, oncard):
# Only process categories in this list # Only process categories in this list
@ -702,9 +714,9 @@ class KOBO(USBMS):
# Process any collections that exist # Process any collections that exist
for category, books in collections.items(): for category, books in collections.items():
if category in supportedcategories: if category in supportedcategories:
debug_print("Category: ", category, " id = ", readstatuslist.get(category)) # debug_print("Category: ", category, " id = ", readstatuslist.get(category))
for book in books: for book in books:
debug_print(' Title:', book.title, 'category: ', category) # debug_print(' Title:', book.title, 'category: ', category)
if category not in book.device_collections: if category not in book.device_collections:
book.device_collections.append(category) book.device_collections.append(category)
@ -763,3 +775,93 @@ class KOBO(USBMS):
collections_attributes = [] collections_attributes = []
self.update_device_database_collections(booklist, collections_attributes, oncard) self.update_device_database_collections(booklist, collections_attributes, oncard)
def upload_cover(self, path, filename, metadata, filepath):
'''
Upload book cover to the device. Default implementation does nothing.
:param path: The full path to the directory where the associated book is located.
:param filename: The name of the book file without the extension.
:param metadata: metadata belonging to the book. Use metadata.thumbnail
for cover
:param filepath: The full path to the ebook file
'''
opts = self.settings()
if not opts.extra_customization[self.OPT_UPLOAD_COVERS]:
# Building thumbnails disabled
debug_print('KOBO: not uploading cover')
return
if not opts.extra_customization[self.OPT_UPLOAD_GRAYSCALE_COVERS]:
uploadgrayscale = False
else:
uploadgrayscale = True
debug_print('KOBO: uploading cover')
try:
self._upload_cover(path, filename, metadata, filepath, uploadgrayscale)
except:
debug_print('FAILED to upload cover', filepath)
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
if metadata.cover:
cover = self.normalize_path(metadata.cover.replace('/', os.sep))
if os.path.exists(cover):
# Get ContentID for Selected Book
extension = os.path.splitext(filepath)[1]
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
ContentID = self.contentid_from_path(filepath, ContentType)
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:
# return bytestrings if the content cannot the decoded as unicode
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
cursor = connection.cursor()
t = (ContentID,)
cursor.execute('select ImageId from Content where BookID is Null and ContentID = ?', t)
result = cursor.fetchone()
if result is None:
debug_print("No rows exist in the database - cannot upload")
return
else:
ImageID = result[0]
# debug_print("ImageId: ", result[0])
cursor.close()
if ImageID != None:
path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID
file_endings = {' - iPhoneThumbnail.parsed':(103,150),
' - bbMediumGridList.parsed':(93,135),
' - NickelBookCover.parsed':(500,725),
' - N3_LIBRARY_FULL.parsed':(355,530),
' - N3_LIBRARY_GRID.parsed':(149,233),
' - N3_LIBRARY_LIST.parsed':(60,90),
' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}
for ending, resize in file_endings.items():
fpath = path + ending
fpath = self.normalize_path(fpath.replace('/', os.sep))
if os.path.exists(fpath):
with open(cover, 'rb') as f:
data = f.read()
# Return the data resized and in Grayscale if
# required
data = save_cover_data_to(data, 'dummy.jpg',
grayscale=uploadgrayscale,
resize_to=resize, return_data=True)
with open(fpath, 'wb') as f:
f.write(data)
else:
debug_print("ImageID could not be retreived from the database")

View File

@ -319,7 +319,7 @@ class PRST1(USBMS):
THUMBNAIL_HEIGHT = 217 THUMBNAIL_HEIGHT = 217
SCAN_FROM_ROOT = True SCAN_FROM_ROOT = True
EBOOK_DIR_MAIN = __appname__ EBOOK_DIR_MAIN = __appname__
SUPPORTS_SUB_DIRS = True
def windows_filter_pnp_id(self, pnp_id): def windows_filter_pnp_id(self, pnp_id):
return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
@ -329,4 +329,10 @@ class PRST1(USBMS):
return __appname__ return __appname__
return self.EBOOK_DIR_CARD_A return self.EBOOK_DIR_CARD_A
def get_main_ebook_dir(self, for_upload=False):
if for_upload:
return __appname__
return ''

View File

@ -100,7 +100,7 @@ gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?se
gprefs.defaults['preserve_date_on_ctl'] = True gprefs.defaults['preserve_date_on_ctl'] = True
gprefs.defaults['cb_fullscreen'] = False gprefs.defaults['cb_fullscreen'] = False
gprefs.defaults['worker_max_time'] = 0 gprefs.defaults['worker_max_time'] = 0
gprefs.defaults['show_files_after_save'] = True
# }}} # }}}
NONE = QVariant() #: Null value to return from the data function of item models NONE = QVariant() #: Null value to return from the data function of item models

View File

@ -18,11 +18,15 @@ class GenerateCatalogAction(InterfaceAction):
name = 'Generate Catalog' name = 'Generate Catalog'
action_spec = (_('Create catalog'), 'catalog.png', 'Catalog builder', ()) action_spec = (_('Create catalog'), 'catalog.png', 'Catalog builder', ())
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
def genesis(self): def genesis(self):
self.qaction.triggered.connect(self.generate_catalog) self.qaction.triggered.connect(self.generate_catalog)
def location_selected(self, loc):
enabled = loc == 'library'
self.qaction.setEnabled(enabled)
def generate_catalog(self): def generate_catalog(self):
rows = self.gui.library_view.selectionModel().selectedRows() rows = self.gui.library_view.selectionModel().selectedRows()
if not rows or len(rows) < 2: if not rows or len(rows) < 2:

View File

@ -138,7 +138,7 @@ class ChooseLibraryAction(InterfaceAction):
name = 'Choose Library' name = 'Choose Library'
action_spec = (_('Choose Library'), 'lt.png', action_spec = (_('Choose Library'), 'lt.png',
_('Choose calibre library to work with'), None) _('Choose calibre library to work with'), None)
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_add_menu = True action_add_menu = True
action_menu_clone_qaction = _('Switch/create library...') action_menu_clone_qaction = _('Switch/create library...')

View File

@ -20,7 +20,7 @@ class ConvertAction(InterfaceAction):
name = 'Convert Books' name = 'Convert Books'
action_spec = (_('Convert books'), 'convert.png', None, _('C')) action_spec = (_('Convert books'), 'convert.png', None, _('C'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
action_add_menu = True action_add_menu = True

View File

@ -127,7 +127,7 @@ class CopyToLibraryAction(InterfaceAction):
action_spec = (_('Copy to library'), 'lt.png', action_spec = (_('Copy to library'), 'lt.png',
_('Copy selected books to the specified library'), None) _('Copy selected books to the specified library'), None)
popup_type = QToolButton.InstantPopup popup_type = QToolButton.InstantPopup
dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
action_add_menu = True action_add_menu = True

View File

@ -24,7 +24,7 @@ class ShareConnMenu(QMenu): # {{{
config_email = pyqtSignal() config_email = pyqtSignal()
toggle_server = pyqtSignal() toggle_server = pyqtSignal()
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
def __init__(self, parent=None): def __init__(self, parent=None):
QMenu.__init__(self, parent) QMenu.__init__(self, parent)

View File

@ -11,7 +11,7 @@ class NextMatchAction(InterfaceAction):
name = 'Move to next highlighted book' name = 'Move to next highlighted book'
action_spec = (_('Move to next match'), 'arrow-down.png', action_spec = (_('Move to next match'), 'arrow-down.png',
_('Move to next highlighted match'), [_('N'), _('F3')]) _('Move to next highlighted match'), [_('N'), _('F3')])
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
def genesis(self): def genesis(self):

View File

@ -13,7 +13,7 @@ class OpenFolderAction(InterfaceAction):
name = 'Open Folder' name = 'Open Folder'
action_spec = (_('Open containing folder'), 'document_open.png', None, action_spec = (_('Open containing folder'), 'document_open.png', None,
_('O')) _('O'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
def genesis(self): def genesis(self):

View File

@ -16,11 +16,15 @@ class PickRandomAction(InterfaceAction):
name = 'Pick Random Book' name = 'Pick Random Book'
action_spec = (_('Pick a random book'), 'random.png', action_spec = (_('Pick a random book'), 'random.png',
'Select a random book from your calibre library', ()) 'Select a random book from your calibre library', ())
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
def genesis(self): def genesis(self):
self.qaction.triggered.connect(self.pick_random) self.qaction.triggered.connect(self.pick_random)
def location_selected(self, loc):
enabled = loc == 'library'
self.qaction.setEnabled(enabled)
def pick_random(self): def pick_random(self):
pick = random.randint(0, self.gui.library_view.model().rowCount(None)) pick = random.randint(0, self.gui.library_view.model().rowCount(None))
self.gui.library_view.set_current_row(pick) self.gui.library_view.set_current_row(pick)

View File

@ -11,8 +11,8 @@ from functools import partial
from PyQt4.Qt import QMenu, pyqtSignal from PyQt4.Qt import QMenu, pyqtSignal
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.gui2 import error_dialog, Dispatcher, \ from calibre.gui2 import (error_dialog, Dispatcher, gprefs,
choose_dir, warning_dialog, open_local_file choose_dir, warning_dialog, open_local_file)
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
@ -141,7 +141,8 @@ class SaveToDiskAction(InterfaceAction):
_('Could not save some books') + ', ' + _('Could not save some books') + ', ' +
_('Click the show details button to see which ones.'), _('Click the show details button to see which ones.'),
u'\n\n'.join(failures), show=True) u'\n\n'.join(failures), show=True)
open_local_file(path) if gprefs['show_files_after_save']:
open_local_file(path)
def books_saved(self, job): def books_saved(self, job):
if job.failed: if job.failed:

View File

@ -15,7 +15,7 @@ class ShowBookDetailsAction(InterfaceAction):
name = 'Show Book Details' name = 'Show Book Details'
action_spec = (_('Show book details'), 'dialog_information.png', None, action_spec = (_('Show book details'), 'dialog_information.png', None,
_('I')) _('I'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
def genesis(self): def genesis(self):

View File

@ -14,7 +14,7 @@ class ShowQuickviewAction(InterfaceAction):
name = 'Show quickview' name = 'Show quickview'
action_spec = (_('Show quickview'), 'search.png', None, _('Q')) action_spec = (_('Show quickview'), 'search.png', None, _('Q'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
current_instance = None current_instance = None

View File

@ -17,7 +17,7 @@ class TweakEpubAction(InterfaceAction):
action_spec = (_('Tweak ePub'), 'trim.png', action_spec = (_('Tweak ePub'), 'trim.png',
_('Make small changes to ePub format books'), _('Make small changes to ePub format books'),
_('T')) _('T'))
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_type = 'current' action_type = 'current'
def genesis(self): def genesis(self):

View File

@ -310,7 +310,7 @@ class CheckLibraryDialog(QDialog):
tl = Item() tl = Item()
tl.setText(0, h) tl.setText(0, h)
if fixable: if fixable and list:
tl.setText(1, _('(fixable)')) tl.setText(1, _('(fixable)'))
tl.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable) tl.setFlags(Qt.ItemIsEnabled | Qt.ItemIsUserCheckable)
tl.setCheckState(1, False) tl.setCheckState(1, False)

View File

@ -538,14 +538,20 @@ class CoversModel(QAbstractListModel): # {{{
current_cover = QPixmap(I('default_cover.png')) current_cover = QPixmap(I('default_cover.png'))
self.blank = QPixmap(I('blank.png')).scaled(150, 200) self.blank = QPixmap(I('blank.png')).scaled(150, 200)
self.cc = current_cover
self.reset_covers(do_reset=False)
self.covers = [self.get_item(_('Current cover'), current_cover)] def reset_covers(self, do_reset=True):
self.covers = [self.get_item(_('Current cover'), self.cc)]
self.plugin_map = {} self.plugin_map = {}
for i, plugin in enumerate(metadata_plugins(['cover'])): for i, plugin in enumerate(metadata_plugins(['cover'])):
self.covers.append((plugin.name+'\n'+_('Searching...'), self.covers.append((plugin.name+'\n'+_('Searching...'),
QVariant(self.blank), None, True)) QVariant(self.blank), None, True))
self.plugin_map[plugin] = i+1 self.plugin_map[plugin] = i+1
if do_reset:
self.reset()
def get_item(self, src, pmap, waiting=False): def get_item(self, src, pmap, waiting=False):
sz = '%dx%d'%(pmap.width(), pmap.height()) sz = '%dx%d'%(pmap.width(), pmap.height())
text = QVariant(src + '\n' + sz) text = QVariant(src + '\n' + sz)
@ -654,6 +660,9 @@ class CoversView(QListView): # {{{
self.select(0) self.select(0)
self.delegate.start_animation() self.delegate.start_animation()
def reset_covers(self):
self.m.reset_covers()
def clear_failed(self): def clear_failed(self):
plugin = self.m.plugin_for_index(self.currentIndex()) plugin = self.m.plugin_for_index(self.currentIndex())
self.m.clear_failed() self.m.clear_failed()
@ -683,12 +692,18 @@ class CoversWidget(QWidget): # {{{
l.addWidget(self.covers_view, 1, 0) l.addWidget(self.covers_view, 1, 0)
self.continue_processing = True self.continue_processing = True
def reset_covers(self):
self.covers_view.reset_covers()
def start(self, book, current_cover, title, authors): def start(self, book, current_cover, title, authors):
self.continue_processing = True
self.abort.clear()
self.book, self.current_cover = book, current_cover self.book, self.current_cover = book, current_cover
self.title, self.authors = title, authors self.title, self.authors = title, authors
self.log('Starting cover download for:', book.title) self.log('Starting cover download for:', book.title)
self.log('Query:', title, authors, self.book.identifiers) self.log('Query:', title, authors, self.book.identifiers)
self.msg.setText('<p>'+_('Downloading covers for <b>%s</b>, please wait...')%book.title) self.msg.setText('<p>'+
_('Downloading covers for <b>%s</b>, please wait...')%book.title)
self.covers_view.start() self.covers_view.start()
self.worker = CoverWorker(self.log, self.abort, self.title, self.worker = CoverWorker(self.log, self.abort, self.title,
@ -726,8 +741,9 @@ class CoversWidget(QWidget): # {{{
if num < 2: if num < 2:
txt = _('Could not find any covers for <b>%s</b>')%self.book.title txt = _('Could not find any covers for <b>%s</b>')%self.book.title
else: else:
txt = _('Found <b>%(num)d</b> covers of %(title)s. Pick the one you like' txt = _('Found <b>%(num)d</b> covers of %(title)s. '
' best.')%dict(num=num-1, title=self.title) 'Pick the one you like best.')%dict(num=num-1,
title=self.title)
self.msg.setText(txt) self.msg.setText(txt)
self.finished.emit() self.finished.emit()
@ -832,10 +848,14 @@ class FullFetch(QDialog): # {{{
self.next_button.clicked.connect(self.next_clicked) self.next_button.clicked.connect(self.next_clicked)
self.ok_button = self.bb.button(self.bb.Ok) self.ok_button = self.bb.button(self.bb.Ok)
self.ok_button.clicked.connect(self.ok_clicked) self.ok_button.clicked.connect(self.ok_clicked)
self.prev_button = self.bb.addButton(_('Back'), self.bb.ActionRole)
self.prev_button.setIcon(QIcon(I('back.png')))
self.prev_button.clicked.connect(self.back_clicked)
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole) self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
self.log_button.clicked.connect(self.view_log) self.log_button.clicked.connect(self.view_log)
self.log_button.setIcon(QIcon(I('debug.png'))) self.log_button.setIcon(QIcon(I('debug.png')))
self.ok_button.setVisible(False) self.ok_button.setVisible(False)
self.prev_button.setVisible(False)
self.identify_widget = IdentifyWidget(self.log, self) self.identify_widget = IdentifyWidget(self.log, self)
self.identify_widget.rejected.connect(self.reject) self.identify_widget.rejected.connect(self.reject)
@ -857,12 +877,21 @@ class FullFetch(QDialog): # {{{
def book_selected(self, book): def book_selected(self, book):
self.next_button.setVisible(False) self.next_button.setVisible(False)
self.ok_button.setVisible(True) self.ok_button.setVisible(True)
self.prev_button.setVisible(True)
self.book = book self.book = book
self.stack.setCurrentIndex(1) self.stack.setCurrentIndex(1)
self.log('\n\n') self.log('\n\n')
self.covers_widget.start(book, self.current_cover, self.covers_widget.start(book, self.current_cover,
self.title, self.authors) self.title, self.authors)
def back_clicked(self):
self.next_button.setVisible(True)
self.ok_button.setVisible(False)
self.prev_button.setVisible(False)
self.stack.setCurrentIndex(0)
self.covers_widget.cancel()
self.covers_widget.reset_covers()
def accept(self): def accept(self):
# Prevent the usual dialog accept mechanisms from working # Prevent the usual dialog accept mechanisms from working
pass pass

View File

@ -58,7 +58,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.device_to_formats_map = {} self.device_to_formats_map = {}
for device in device_plugins(): for device in device_plugins():
n = device_name_for_plugboards(device) n = device_name_for_plugboards(device)
self.device_to_formats_map[n] = set(device.FORMATS) self.device_to_formats_map[n] = set(device.settings().format_map)
if getattr(device, 'CAN_DO_DEVICE_DB_PLUGBOARD', False): if getattr(device, 'CAN_DO_DEVICE_DB_PLUGBOARD', False):
self.device_to_formats_map[n].add('device_db') self.device_to_formats_map[n].add('device_db')
if n not in self.devices: if n not in self.devices:

View File

@ -12,6 +12,7 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
from calibre.gui2.preferences.saving_ui import Ui_Form from calibre.gui2.preferences.saving_ui import Ui_Form
from calibre.utils.config import ConfigProxy from calibre.utils.config import ConfigProxy
from calibre.library.save_to_disk import config from calibre.library.save_to_disk import config
from calibre.gui2 import gprefs
class ConfigWidget(ConfigWidgetBase, Ui_Form): class ConfigWidget(ConfigWidgetBase, Ui_Form):
@ -24,6 +25,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf', for x in ('asciiize', 'update_metadata', 'save_cover', 'write_opf',
'replace_whitespace', 'to_lowercase', 'formats', 'timefmt'): 'replace_whitespace', 'to_lowercase', 'formats', 'timefmt'):
r(x, self.proxy) r(x, self.proxy)
r('show_files_after_save', gprefs)
self.save_template.changed_signal.connect(self.changed_signal.emit) self.save_template.changed_signal.connect(self.changed_signal.emit)

View File

@ -95,6 +95,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_show_files_after_save">
<property name="text">
<string>&amp;Show files in file browser after saving to disk</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<customwidgets> <customwidgets>

View File

@ -231,6 +231,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def genesis(self, gui): def genesis(self, gui):
self.models = {} self.models = {}
self.what.addItem(_('Click to choose toolbar or menu to customize'),
'blank')
for key, text in self.LOCATIONS: for key, text in self.LOCATIONS:
self.what.addItem(text, key) self.what.addItem(text, key)
all_model = AllModel(key, gui) all_model = AllModel(key, gui)
@ -247,8 +249,14 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def what_changed(self, idx): def what_changed(self, idx):
key = unicode(self.what.itemData(idx).toString()) key = unicode(self.what.itemData(idx).toString())
self.all_actions.setModel(self.models[key][0]) if key == 'blank':
self.current_actions.setModel(self.models[key][1]) self.actions_widget.setVisible(False)
self.spacer_widget.setVisible(True)
else:
self.actions_widget.setVisible(True)
self.spacer_widget.setVisible(False)
self.all_actions.setModel(self.models[key][0])
self.current_actions.setModel(self.models[key][1])
def add_action(self, *args): def add_action(self, *args):
x = self.all_actions.selectionModel().selectedIndexes() x = self.all_actions.selectionModel().selectedIndexes()

View File

@ -13,16 +13,19 @@
<property name="windowTitle"> <property name="windowTitle">
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QVBoxLayout" name="verticalLayout_2">
<item row="0" column="0" colspan="5"> <item>
<widget class="QComboBox" name="what"> <widget class="QLabel" name="label">
<property name="font"> <property name="text">
<font> <string>&lt;p&gt;The toolbar in calibre is different depending on whether a device is connected or not. Choose &lt;b&gt;which toolbar&lt;/b&gt; you would like to customize:</string>
<pointsize>20</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property> </property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="what">
<property name="toolTip"> <property name="toolTip">
<string>Choose the toolbar to customize</string> <string>Choose the toolbar to customize</string>
</property> </property>
@ -34,132 +37,59 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0" colspan="2"> <item>
<widget class="QGroupBox" name="groupBox"> <widget class="QWidget" name="actions_widget" native="true">
<property name="title"> <layout class="QHBoxLayout" name="horizontalLayout_2">
<string>A&amp;vailable actions</string> <property name="margin">
</property> <number>0</number>
<layout class="QVBoxLayout" name="verticalLayout"> </property>
<item> <item>
<widget class="QListView" name="all_actions"> <widget class="QGroupBox" name="groupBox">
<property name="selectionMode"> <property name="title">
<enum>QAbstractItemView::MultiSelection</enum> <string>A&amp;vailable actions</string>
</property>
<property name="iconSize">
<size>
<width>32</width>
<height>32</height>
</size>
</property>
<property name="spacing">
<number>10</number>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item row="2" column="2">
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
<widget class="QToolButton" name="add_action_button">
<property name="toolTip">
<string>Add selected actions to toolbar</string>
</property>
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/forward.png</normaloff>:/images/forward.png</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QToolButton" name="remove_action_button">
<property name="toolTip">
<string>Remove selected actions from toolbar</string>
</property>
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/back.png</normaloff>:/images/back.png</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="3" colspan="2">
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>&amp;Current actions</string>
</property>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QListView" name="current_actions">
<property name="selectionMode">
<enum>QAbstractItemView::MultiSelection</enum>
</property>
<property name="iconSize">
<size>
<width>32</width>
<height>32</height>
</size>
</property>
<property name="spacing">
<number>10</number>
</property>
<property name="wordWrap">
<bool>true</bool>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QListView" name="all_actions">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>100</verstretch>
</sizepolicy>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::MultiSelection</enum>
</property>
<property name="iconSize">
<size>
<width>32</width>
<height>32</height>
</size>
</property>
<property name="spacing">
<number>10</number>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget> </widget>
</item> </item>
<item> <item>
<layout class="QVBoxLayout" name="verticalLayout_4"> <layout class="QVBoxLayout" name="verticalLayout_3">
<item> <item>
<widget class="QToolButton" name="action_up_button"> <widget class="QToolButton" name="add_action_button">
<property name="toolTip"> <property name="toolTip">
<string>Move selected action up</string> <string>Add selected actions to toolbar</string>
</property> </property>
<property name="text"> <property name="text">
<string>...</string> <string>...</string>
</property> </property>
<property name="icon"> <property name="icon">
<iconset resource="../../../../resources/images.qrc"> <iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/arrow-up.png</normaloff>:/images/arrow-up.png</iconset> <normaloff>:/images/forward.png</normaloff>:/images/forward.png</iconset>
</property> </property>
<property name="iconSize"> <property name="iconSize">
<size> <size>
@ -170,10 +100,13 @@
</widget> </widget>
</item> </item>
<item> <item>
<spacer name="verticalSpacer_2"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
</property> </property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0"> <property name="sizeHint" stdset="0">
<size> <size>
<width>20</width> <width>20</width>
@ -183,16 +116,16 @@
</spacer> </spacer>
</item> </item>
<item> <item>
<widget class="QToolButton" name="action_down_button"> <widget class="QToolButton" name="remove_action_button">
<property name="toolTip"> <property name="toolTip">
<string>Move selected action down</string> <string>Remove selected actions from toolbar</string>
</property> </property>
<property name="text"> <property name="text">
<string>...</string> <string>...</string>
</property> </property>
<property name="icon"> <property name="icon">
<iconset resource="../../../../resources/images.qrc"> <iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/arrow-down.png</normaloff>:/images/arrow-down.png</iconset> <normaloff>:/images/back.png</normaloff>:/images/back.png</iconset>
</property> </property>
<property name="iconSize"> <property name="iconSize">
<size> <size>
@ -200,24 +133,124 @@
<height>24</height> <height>24</height>
</size> </size>
</property> </property>
<property name="shortcut">
<string>Ctrl+S</string>
</property>
</widget> </widget>
</item> </item>
</layout> </layout>
</item> </item>
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>&amp;Current actions</string>
</property>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QListView" name="current_actions">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>100</verstretch>
</sizepolicy>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::MultiSelection</enum>
</property>
<property name="iconSize">
<size>
<width>32</width>
<height>32</height>
</size>
</property>
<property name="spacing">
<number>10</number>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<layout class="QVBoxLayout" name="verticalLayout_4">
<item>
<widget class="QToolButton" name="action_up_button">
<property name="toolTip">
<string>Move selected action up</string>
</property>
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/arrow-up.png</normaloff>:/images/arrow-up.png</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QToolButton" name="action_down_button">
<property name="toolTip">
<string>Move selected action down</string>
</property>
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/arrow-down.png</normaloff>:/images/arrow-down.png</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
<property name="shortcut">
<string>Ctrl+S</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>
<item row="1" column="0" colspan="5"> <item>
<widget class="QLabel" name="label"> <widget class="QWidget" name="spacer_widget" native="true">
<property name="text"> <layout class="QVBoxLayout" name="verticalLayout_5">
<string>&lt;p&gt;The toolbar in calibre is different depending on whether a device is connected or not. To customize the toolbar when a device is connected as well as customizing right click menus, &lt;b&gt;click the dropdown above&lt;/b&gt; and select which toolbar/menu you want to customize.</string> <item>
</property> <spacer name="verticalSpacer_3">
<property name="wordWrap"> <property name="orientation">
<bool>true</bool> <enum>Qt::Vertical</enum>
</property> </property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>224</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget> </widget>
</item> </item>
</layout> </layout>

View File

@ -112,8 +112,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
self.opts = opts self.opts = opts
self.embedded = embedded self.embedded = embedded
self.state_callback = None self.state_callback = None
self.max_cover_width, self.max_cover_height = \ try:
self.max_cover_width, self.max_cover_height = \
map(int, self.opts.max_cover.split('x')) map(int, self.opts.max_cover.split('x'))
except:
self.max_cover_width = 1200
self.max_cover_height = 1600
path = P('content_server') path = P('content_server')
self.build_time = fromtimestamp(os.stat(path).st_mtime) self.build_time = fromtimestamp(os.stat(path).st_mtime)
self.default_cover = open(P('content_server/default_cover.jpg'), 'rb').read() self.default_cover = open(P('content_server/default_cover.jpg'), 'rb').read()

View File

@ -47,7 +47,8 @@ def normalize_format_name(fmt):
return fmt return fmt
def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None, def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
return_data=False, compression_quality=90, minify_to=None): return_data=False, compression_quality=90, minify_to=None,
grayscale=False):
''' '''
Saves image in data to path, in the format specified by the path Saves image in data to path, in the format specified by the path
extension. Removes any transparency. If there is no transparency and no extension. Removes any transparency. If there is no transparency and no
@ -60,7 +61,8 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
compression (lossless). compression (lossless).
:param bgcolor: The color for transparent pixels. Must be specified in hex. :param bgcolor: The color for transparent pixels. Must be specified in hex.
:param resize_to: A tuple (width, height) or None for no resizing :param resize_to: A tuple (width, height) or None for no resizing
:param minify_to: A tuple (width, height) to specify target size. The image :param minify_to: A tuple (width, height) to specify maximum target size.
:param grayscale: If True, the image is grayscaled
will be resized to fit into this target size. If None the value from the will be resized to fit into this target size. If None the value from the
tweak is used. tweak is used.
@ -71,6 +73,10 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
fmt = os.path.splitext(path)[1] fmt = os.path.splitext(path)[1]
fmt = normalize_format_name(fmt[1:]) fmt = normalize_format_name(fmt[1:])
if grayscale:
img.type = "GrayscaleType"
changed = True
if resize_to is not None: if resize_to is not None:
img.size = (resize_to[0], resize_to[1]) img.size = (resize_to[0], resize_to[1])
changed = True changed = True