Merge from trunk

This commit is contained in:
Charles Haley 2012-03-22 09:13:28 +01:00
commit e8259478f3
45 changed files with 255 additions and 321 deletions

View File

@ -6,6 +6,7 @@ class Android_com_pl(BasicNewsRecipe):
description = 'Android.com.pl - biggest polish Android site' description = 'Android.com.pl - biggest polish Android site'
category = 'Android, mobile' category = 'Android, mobile'
language = 'pl' language = 'pl'
use_embedded_content=True
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png' cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
b92.net b92.net
''' '''
@ -20,13 +20,13 @@ class B92(BasicNewsRecipe):
encoding = 'cp1250' encoding = 'cp1250'
language = 'sr' language = 'sr'
publication_type = 'newsportal' publication_type = 'newsportal'
masthead_url = 'http://www.b92.net/images/fp/logo.gif' masthead_url = 'http://b92s.net/v4/img/new-logo.png'
extra_css = """ extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Arial,Helvetica,sans1,sans-serif} body{font-family: Arial,Helvetica,sans1,sans-serif}
.articledescription{font-family: serif1, serif}
.article-info2,.article-info1{text-transform: uppercase; font-size: small} .article-info2,.article-info1{text-transform: uppercase; font-size: small}
img{display: block}
.sms{font-weight: bold}
""" """
conversion_options = { conversion_options = {
@ -37,11 +37,17 @@ class B92(BasicNewsRecipe):
, 'linearize_tables' : True , 'linearize_tables' : True
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
(re.compile(r'<html.*?<body>', re.DOTALL|re.IGNORECASE), lambda match: '<html><head><title>something</title></head><body>')
]
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})] keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
remove_attributes = ['width','height','align','hspace','vspace','border'] remove_attributes = ['width','height','align','hspace','vspace','border','lang','xmlns:fb']
remove_tags = [dict(name=['embed','link','base','meta'])] remove_tags = [
dict(name=['embed','link','base','meta','iframe'])
,dict(attrs={'id':'social'})
]
feeds = [ feeds = [
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' ) (u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class CGM(BasicNewsRecipe): class CGM(BasicNewsRecipe):
title = u'CGM' title = u'CGM'
@ -17,9 +18,9 @@ class CGM(BasicNewsRecipe):
remove_tags_before=dict(id='mainContent') remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'}) remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}), remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}), dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
dict(id=['movieShare', 'container'])] dict(id=['movieShare', 'container'])]
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'), feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')] (u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
@ -33,10 +34,12 @@ class CGM(BasicNewsRecipe):
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')] img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
gallery.contents[1].name='img' gallery.contents[1].name='img'
gallery.contents[1]['src']=img gallery.contents[1]['src']=img
pos = len(gallery.contents)
gallery.insert(pos, BeautifulSoup('<br />'))
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
ad=soup.findAll('a') ad=soup.findAll('a')
for r in ad: for r in ad:
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']: if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
r.extract() r.extract()
return soup return soup

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Elektroda(BasicNewsRecipe): class Elektroda(BasicNewsRecipe):
title = u'Elektroda' title = u'Elektroda'
@ -13,3 +14,18 @@ class Elektroda(BasicNewsRecipe):
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'}) remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
remove_tags=[dict(name='a', attrs={'href':'#top'})] remove_tags=[dict(name='a', attrs={'href':'#top'})]
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')] feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
def preprocess_html(self, soup):
tag=soup.find('span', attrs={'class':'postbody'})
if tag:
pos = len(tag.contents)
tag.insert(pos, BeautifulSoup('<br />'))
return soup
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
article.title=article.title[article.title.find("::")+3:]
return feeds

View File

@ -13,7 +13,7 @@ class Filmweb_pl(BasicNewsRecipe):
remove_empty_feeds=True remove_empty_feeds=True
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}' extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})] remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})] keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'), feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'), (u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),

View File

@ -9,12 +9,12 @@ class Gram_pl(BasicNewsRecipe):
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;}' extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])] remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})] keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'), feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')] (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
def parse_feeds (self): def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self) feeds = BasicNewsRecipe.parse_feeds(self)
@ -23,3 +23,33 @@ class Gram_pl(BasicNewsRecipe):
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper(): if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
feed.articles.remove(article) feed.articles.remove(article)
return feeds return feeds
def append_page(self, soup, appendtag):
nexturl = appendtag.find('a', attrs={'class':'cpn'})
while nexturl:
soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
r=appendtag.find(id='pgbox')
if r:
r.extract()
pagetext = soup2.find(attrs={'class':'main'})
r=pagetext.find('h1')
if r:
r.extract()
r=pagetext.find('h2')
if r:
r.extract()
for r in pagetext.findAll('script'):
r.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
nexturl = appendtag.find('a', attrs={'class':'cpn'})
r=appendtag.find(id='pgbox')
if r:
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
tag=soup.findAll(name='div', attrs={'class':'picbox'})
for t in tag:
t['style']='float: left;'
return soup

Binary file not shown.

Before

Width:  |  Height:  |  Size: 413 B

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -7,12 +7,12 @@ class naczytniki(BasicNewsRecipe):
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png' cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
language = 'pl' language = 'pl'
description ='everything about e-readers' description ='everything about e-readers'
category='readers' category='e-readers'
no_stylesheets=True no_stylesheets=True
use_embedded_content=False
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ] preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
keep_only_tags=[dict(name='div', attrs={'class':'post'})] keep_only_tags=[dict(name='div', attrs={'class':'post'})]
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})] remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')] feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]

View File

@ -17,21 +17,8 @@ class Overclock_pl(BasicNewsRecipe):
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})] remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')] feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
def print_version(self, url):
def append_page(self, soup, appendtag): if 'articles/show' in url:
tag=soup.find(id='navigation') return url.replace('show', 'showall')
if tag: else:
nexturl=tag.findAll('option') return url
tag.extract()
for nextpage in nexturl[2:]:
soup2 = self.index_to_soup(nextpage['value'])
pagetext = soup2.find(id='content')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(attrs={'alt':'Pierwsza'})
if rem:
rem.parent.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -10,5 +10,7 @@ class palmtop_pl(BasicNewsRecipe):
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content=True
#remove_tags_before=dict(name='h2')
#remove_tags_after=dict(attrs={'class':'entry clearfix'})
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')] feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]

View File

@ -1,31 +1,25 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class PC_Arena(BasicNewsRecipe): class PC_Arena(BasicNewsRecipe):
title = u'PCArena' title = u'PCArena'
oldest_article = 18300 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.' description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
category = 'IT' category = 'IT'
language = 'pl' language = 'pl'
masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif' masthead_url='http://pcarena.pl/pcarena/img/logo.png'
cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif' cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
no_stylesheets = True no_stylesheets = True
keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})] remove_empty_feeds=True
remove_tags=[dict(attrs={'class':'pages'})] #keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
feeds = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')] #remove_tags=[dict(attrs={'class':'pages'})]
feeds = [(u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'), (u'Testy', u'http://pcarena.pl/testy/feeds.rss'), (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'), (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'), (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
def print_version(self, url):
return url.replace('show', 'print')
def append_page(self, soup, appendtag): def image_url_processor(self, baseurl, url):
tag=soup.find(name='div', attrs={'class':'pagNum'}) if 'http' not in url:
if tag: return 'http://pcarena.pl' + url
nexturl=tag.findAll('a') else:
tag.extract() return url
for nextpage in nexturl[1:]:
nextpage= 'http://pcarena.pl' + nextpage['href']
soup2 = self.index_to_soup(nextpage)
pagetext = soup2.find(attrs={'class':'artBody'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -10,32 +10,11 @@ class PC_Centre(BasicNewsRecipe):
masthead_url= 'http://pccentre.pl/views/images/logo.gif' masthead_url= 'http://pccentre.pl/views/images/logo.gif'
cover_url= 'http://pccentre.pl/views/images/logo.gif' cover_url= 'http://pccentre.pl/views/images/logo.gif'
no_stylesheets = True no_stylesheets = True
keep_only_tags= [dict(id='content')] remove_empty_feeds = True
remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')] #keep_only_tags= [dict(id='content')]
feeds = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')] #remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
remove_tags=[dict(attrs={'class':'logo_print'})]
feeds = [(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
def print_version(self, url):
def append_page(self, soup, appendtag): return url.replace('show', 'print')
tag=soup.find(name='div', attrs={'class':'pages'})
if tag:
nexturl=tag.findAll('a')
tag.extract()
for nextpage in nexturl[:-1]:
nextpage= 'http://pccentre.pl' + nextpage['href']
soup2 = self.index_to_soup(nextpage)
pagetext = soup2.find(id='content')
rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
for r in rem:
r.extract()
rem=pagetext.findAll(id='comments')
for r in rem:
r.extract()
rem=pagetext.findAll('h1')
for r in rem:
r.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -8,10 +8,11 @@ class Tablety_pl(BasicNewsRecipe):
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png' cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
category = 'IT' category = 'IT'
language = 'pl' language = 'pl'
use_embedded_content=True
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')] preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
remove_tags_before=dict(name="h1", attrs={'class':'entry-title'}) #remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'}) #remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})] #remove_tags=[dict(name='footer', attrs={'class':'entry-footer clearfix'}), dict(name='div', attrs={'class':'entry-comment-counter'})]
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')] feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]

View File

@ -1,5 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1312886443(BasicNewsRecipe): class AdvancedUserRecipe1312886443(BasicNewsRecipe):
title = u'WNP' title = u'WNP'
@ -8,10 +8,11 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
description = u'Wirtualny Nowy Przemysł' description = u'Wirtualny Nowy Przemysł'
category = 'economy' category = 'economy'
language = 'pl' language = 'pl'
preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
keep_only_tags = dict(name='div', attrs={'id':'contentText'}) remove_tags=[dict(attrs={'class':'printF'})]
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'), feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'), (u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'), (u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
@ -19,3 +20,7 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'), (u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'), (u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')] (u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
def print_version(self, url):
return 'http://wnp.pl/drukuj/' +url[url.find(',')+1:]

View File

@ -1538,6 +1538,7 @@ class StoreWaterstonesUKStore(StoreBase):
headquarters = 'UK' headquarters = 'UK'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
affiliate = True
class StoreWeightlessBooksStore(StoreBase): class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books' name = 'Weightless Books'
@ -1557,15 +1558,6 @@ class StoreWHSmithUKStore(StoreBase):
headquarters = 'UK' headquarters = 'UK'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books'
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
drm_free_only = True
headquarters = 'UK'
formats = ['EPUB', 'MOBI']
class StoreWoblinkStore(StoreBase): class StoreWoblinkStore(StoreBase):
name = 'Woblink' name = 'Woblink'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
@ -1573,7 +1565,7 @@ class StoreWoblinkStore(StoreBase):
actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore' actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'PDF', 'WOBLINK'] formats = ['EPUB', 'MOBI', 'PDF', 'WOBLINK']
class XinXiiStore(StoreBase): class XinXiiStore(StoreBase):
name = 'XinXii' name = 'XinXii'
@ -1636,7 +1628,6 @@ plugins += [
StoreWaterstonesUKStore, StoreWaterstonesUKStore,
StoreWeightlessBooksStore, StoreWeightlessBooksStore,
StoreWHSmithUKStore, StoreWHSmithUKStore,
StoreWizardsTowerBooksStore,
StoreWoblinkStore, StoreWoblinkStore,
XinXiiStore, XinXiiStore,
StoreZixoStore StoreZixoStore

View File

@ -13,7 +13,8 @@ from contextlib import closing
from PyQt4.Qt import QToolButton from PyQt4.Qt import QToolButton
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.gui2 import error_dialog, Dispatcher, warning_dialog, gprefs from calibre.gui2 import (error_dialog, Dispatcher, warning_dialog, gprefs,
info_dialog)
from calibre.gui2.dialogs.progress import ProgressDialog from calibre.gui2.dialogs.progress import ProgressDialog
from calibre.utils.config import prefs, tweaks from calibre.utils.config import prefs, tweaks
from calibre.utils.date import now from calibre.utils.date import now
@ -30,6 +31,7 @@ class Worker(Thread): # {{{
self.progress = progress self.progress = progress
self.done = done self.done = done
self.delete_after = delete_after self.delete_after = delete_after
self.auto_merged_ids = {}
def run(self): def run(self):
try: try:
@ -79,6 +81,8 @@ class Worker(Thread): # {{{
if prefs['add_formats_to_existing']: if prefs['add_formats_to_existing']:
identical_book_list = newdb.find_identical_books(mi) identical_book_list = newdb.find_identical_books(mi)
if identical_book_list: # books with same author and nearly same title exist in newdb if identical_book_list: # books with same author and nearly same title exist in newdb
self.auto_merged_ids[x] = _('%s by %s')%(mi.title,
mi.format_field('authors')[1])
automerged = True automerged = True
seen_fmts = set() seen_fmts = set()
for identical_book in identical_book_list: for identical_book in identical_book_list:
@ -196,6 +200,15 @@ class CopyToLibraryAction(InterfaceAction):
self.gui.status_bar.show_message( self.gui.status_bar.show_message(
_('Copied %(num)d books to %(loc)s') % _('Copied %(num)d books to %(loc)s') %
dict(num=len(ids), loc=loc), 2000) dict(num=len(ids), loc=loc), 2000)
if self.worker.auto_merged_ids:
books = '\n'.join(self.worker.auto_merged_ids.itervalues())
info_dialog(self.gui, _('Auto merged'),
_('Some books were automatically merged into existing '
'records in the target library. Click Show '
'details to see which ones. This behavior is '
'controlled by the Auto merge option in '
'Preferences->Adding books.'), det_msg=books,
show=True)
if delete_after and self.worker.processed: if delete_after and self.worker.processed:
v = self.gui.library_view v = self.gui.library_view
ci = v.currentIndex() ci = v.currentIndex()

View File

@ -5,4 +5,3 @@ or asked not to be included in the store integration.
* Indigo (http://www.chapters.indigo.ca/). * Indigo (http://www.chapters.indigo.ca/).
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/). * Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
* EPubBuy DE: reason: too much traffic for too little sales * EPubBuy DE: reason: too much traffic for too little sales
* Empik (http://empik.com.pl).

View File

@ -41,7 +41,9 @@ class AmazonDEKindleStore(StorePlugin):
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace')) # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()' format_xpath = './/span[@class="format"]/text()'
@ -65,8 +67,8 @@ class AmazonDEKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')) author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('von '): if author.startswith('von '):

View File

@ -37,7 +37,9 @@ class AmazonESKindleStore(StorePlugin):
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace')) # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()' format_xpath = './/span[@class="format"]/text()'
@ -61,8 +63,8 @@ class AmazonESKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '): if author.startswith('de '):
author = author[3:] author = author[3:]

View File

@ -39,7 +39,7 @@ class AmazonFRKindleStore(StorePlugin):
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace')) # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon.fr is responding in UTF-8 now # Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
@ -64,8 +64,8 @@ class AmazonFRKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '): if author.startswith('de '):
author = author[3:] author = author[3:]

View File

@ -37,7 +37,9 @@ class AmazonITKindleStore(StorePlugin):
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace')) # doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()' format_xpath = './/span[@class="format"]/text()'
@ -61,8 +63,8 @@ class AmazonITKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('di '): if author.startswith('di '):
author = author[3:] author = author[3:]

View File

@ -38,7 +38,8 @@ class AmazonUKKindleStore(StorePlugin):
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace')) # Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()' format_xpath = './/span[@class="format"]/text()'
@ -62,8 +63,8 @@ class AmazonUKKindleStore(StorePlugin):
cover_url = ''.join(data.xpath(cover_xpath)) cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) price = ''.join(data.xpath('.//span[@class="price"]/text()'))
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')) author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('by '): if author.startswith('by '):

View File

@ -62,7 +62,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()')) title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()')) author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()')) price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[contains(@class, "bn-price")]/text()'))
counter -= 1 counter -= 1

View File

@ -7,7 +7,7 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random import random
import urllib2 import urllib
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -22,7 +22,7 @@ from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.gui2.store.web_store_dialog import WebStoreDialog
class DieselEbooksStore(BasicStoreConfig, StorePlugin): class DieselEbooksStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
url = 'http://www.diesel-ebooks.com/' url = 'http://www.diesel-ebooks.com/'
@ -33,7 +33,7 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
detail_url = None detail_url = None
if detail_item: if detail_item:
detail_url = url + detail_item + aff_id detail_url = detail_item + aff_id
url = url + aff_id url = url + aff_id
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
@ -45,54 +45,46 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
d.exec_() d.exec_()
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib2.quote(query) url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib.quote_plus(query)
br = browser() br = browser()
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="item clearfix"]'): for data in doc.xpath('//div[contains(@class, "item")]'):
data = html.fromstring(html.tostring(data))
if counter <= 0: if counter <= 0:
break break
id = ''.join(data.xpath('div[@class="cover"]/a/@href')) id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
if not id or '/item/' not in id: if not id or '/item/' not in id:
continue continue
a, b, id = id.partition('/item/')
cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src')) cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
title = ''.join(data.xpath('.//div[@class="content"]//h2/text()')) title = ''.join(data.xpath('.//div[@class="content"]//h2/a/text()'))
author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()')) author = ''.join(data.xpath('.//div[@class="content"]/span//a/text()'))
price = '' price = ''
price_elem = data.xpath('//td[@class="price"]/text()') price_elem = data.xpath('.//div[@class="price_fat"]//h1/text()')
if price_elem: if price_elem:
price = price_elem[0] price = price_elem[0]
formats = ', '.join(data.xpath('.//td[@class="format"]/text()')) formats = ', '.join(data.xpath('.//div[@class="book-info"]//text()')).strip()
a, b, formats = formats.partition('Format:')
drm = SearchResult.DRM_LOCKED
if 'drm free' not in formats.lower():
drm = SearchResult.DRM_UNLOCKED
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '/item/' + id.strip() s.detail_item = id.strip()
s.formats = formats s.formats = formats
s.drm = drm
yield s
def get_details(self, search_result, timeout): yield s
url = 'http://www.diesel-ebooks.com/item/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//table[@class="format-info"]//tr[contains(th, "DRM") and contains(td, "No")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -60,10 +60,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
continue continue
cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src')) cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
if cover_url:
cover_url = 'http://www.foyles.co.uk' + cover_url
#print(cover_url)
title = ''.join(data.xpath('.//a[@class="Title"]/text()')) title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
author = ', '.join(data.xpath('.//span[@class="Author"]/text()')) author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()')) price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))

View File

@ -68,7 +68,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src')) cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()')) title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()')) author = ', '.join(data.xpath('.//div[@class="SCItemSummary"]//span//a/text()'))
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])') drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
counter -= 1 counter -= 1

View File

@ -57,7 +57,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src')) cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
title = ''.join(data.xpath('./div/div/h2/a/text()')) title = ''.join(data.xpath('./div/div/h2/a/text()'))
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()')) author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceStandard"]/text()')) price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))
drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])') drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])') pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])') epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')

View File

@ -1,118 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
url = 'http://www.wizardstowerbooks.com/'
def open(self, parent=None, detail_item=None, external=False):
if detail_item:
detail_item = self.url + detail_item
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item)))
else:
d = WebStoreDialog(self.gui, self.url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
if 'search.html' in f.geturl():
for data in doc.xpath('//table[@class="gridp"]//td'):
if counter <= 0:
break
id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
id = id.strip()
if not id:
continue
cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
cover_url = url_slash_cleaner(self.url + cover_url.strip())
price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
price = price.strip()
if not price:
continue
title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
author = ''.join(data.xpath('.//p[@class="last"]/text()'))
a, b, author = author.partition(' by ')
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s
# Exact match brought us to the books detail page.
else:
s = SearchResult()
cover_url = ''.join(doc.xpath('//div[@id="image"]/a/img[@title="Zoom"]/@src')).strip()
s.cover_url = url_slash_cleaner(self.url + cover_url.strip())
s.title = ''.join(doc.xpath('//form[@name="details"]/h1/text()')).strip()
authors = doc.xpath('//p[contains(., "Author:")]//text()')
author_index = None
for i, a in enumerate(authors):
if 'author' in a.lower():
author_index = i + 1
break
if author_index is not None and len(authors) > author_index:
a = authors[author_index]
a = a.replace(u'\xa0', '')
s.author = a.strip()
s.price = ''.join(doc.xpath('//span[@id="price_selling"]//text()')).strip()
s.detail_item = f.geturl().replace(self.url, '').strip()
s.formats = ', '.join(doc.xpath('//select[@id="N1_"]//option//text()'))
s.drm = SearchResult.DRM_UNLOCKED
yield s
def get_details(self, search_result, timeout):
if search_result.formats:
return False
br = browser()
with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
search_result.formats = formats.upper()
return True

View File

@ -3,7 +3,7 @@
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2011-2012, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re
@ -41,6 +41,11 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://woblink.com/publication?query=' + urllib.quote_plus(query.encode('utf-8')) url = 'http://woblink.com/publication?query=' + urllib.quote_plus(query.encode('utf-8'))
if max_results > 10:
if max_results > 20:
url += '&limit=' + str(30)
else:
url += '&limit=' + str(20)
br = browser() br = browser()
@ -58,15 +63,16 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src')) cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()')) title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
author = ', '.join(data.xpath('.//p[@class="author"]/a/text()')) author = ', '.join(data.xpath('.//p[@class="author"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="prices"]/p[1]/span/text()')) price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/span/text()'))
price = re.sub('PLN', '', price)
price = re.sub('\.', ',', price) price = re.sub('\.', ',', price)
formats = ', '.join(data.xpath('.//p[3]/img/@src')) formats = [ form[8:-4].split('_')[0] for form in data.xpath('.//p[3]/img/@src')]
formats = formats[8:-4].upper() if 'epub' in formats:
if formats == 'EPUB': formats.remove('epub')
formats = 'WOBLINK' formats.append('WOBLINK')
if 'E Ink' in data.xpath('.//div[@class="prices"]/img/@title'): if 'E Ink' in data.xpath('.//div[@class="prices"]/img/@title'):
formats += ', EPUB' formats.insert(0, 'EPUB')
if 'pdf' in formats:
formats[formats.index('pdf')] = 'PDF'
counter -= 1 counter -= 1
@ -74,9 +80,9 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
s.cover_url = 'http://woblink.com' + cover_url s.cover_url = 'http://woblink.com' + cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price + ''
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_LOCKED s.drm = SearchResult.DRM_UNKNOWN if 'MOBI' in formats else SearchResult.DRM_LOCKED
s.formats = formats s.formats = ', '.join(formats)
yield s yield s

View File

@ -8,7 +8,7 @@ import os, math, re, glob, sys, zipfile
from base64 import b64encode from base64 import b64encode
from functools import partial from functools import partial
from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt,
QPainter, QPalette, QBrush, QFontDatabase, QDialog, QPainter, QPalette, QBrush, QFontDatabase, QDialog,
QColor, QPoint, QImage, QRegion, QVariant, QIcon, QColor, QPoint, QImage, QRegion, QVariant, QIcon,
QFont, pyqtSignature, QAction, QByteArray, QMenu, QFont, pyqtSignature, QAction, QByteArray, QMenu,
@ -184,12 +184,10 @@ class Document(QWebPage): # {{{
self.misc_config() self.misc_config()
self.after_load() self.after_load()
def __init__(self, shortcuts, parent=None, resize_callback=lambda: None, def __init__(self, shortcuts, parent=None, debug_javascript=False):
debug_javascript=False):
QWebPage.__init__(self, parent) QWebPage.__init__(self, parent)
self.setObjectName("py_bridge") self.setObjectName("py_bridge")
self.debug_javascript = debug_javascript self.debug_javascript = debug_javascript
self.resize_callback = resize_callback
self.current_language = None self.current_language = None
self.loaded_javascript = False self.loaded_javascript = False
self.js_loader = JavaScriptLoader( self.js_loader = JavaScriptLoader(
@ -259,12 +257,6 @@ class Document(QWebPage): # {{{
if self.loaded_javascript: if self.loaded_javascript:
return return
self.loaded_javascript = True self.loaded_javascript = True
self.javascript(
'''
window.onresize = function(event) {
window.py_bridge.window_resized();
}
''')
self.loaded_lang = self.js_loader(self.mainFrame().evaluateJavaScript, self.loaded_lang = self.js_loader(self.mainFrame().evaluateJavaScript,
self.current_language, self.hyphenate_default_lang) self.current_language, self.hyphenate_default_lang)
@ -310,10 +302,6 @@ class Document(QWebPage): # {{{
def debug(self, msg): def debug(self, msg):
prints(msg) prints(msg)
@pyqtSignature('')
def window_resized(self):
self.resize_callback()
def reference_mode(self, enable): def reference_mode(self, enable):
self.javascript(('enter' if enable else 'leave')+'_reference_mode()') self.javascript(('enter' if enable else 'leave')+'_reference_mode()')
@ -444,7 +432,7 @@ class Document(QWebPage): # {{{
def scroll_fraction(self): def scroll_fraction(self):
def fget(self): def fget(self):
try: try:
return float(self.ypos)/(self.height-self.window_height) return abs(float(self.ypos)/(self.height-self.window_height))
except ZeroDivisionError: except ZeroDivisionError:
return 0. return 0.
def fset(self, val): def fset(self, val):
@ -516,7 +504,6 @@ class DocumentView(QWebView): # {{{
self.initial_pos = 0.0 self.initial_pos = 0.0
self.to_bottom = False self.to_bottom = False
self.document = Document(self.shortcuts, parent=self, self.document = Document(self.shortcuts, parent=self,
resize_callback=self.viewport_resized,
debug_javascript=debug_javascript) debug_javascript=debug_javascript)
self.setPage(self.document) self.setPage(self.document)
self.manager = None self.manager = None
@ -1035,13 +1022,9 @@ class DocumentView(QWebView): # {{{
return handled return handled
def resizeEvent(self, event): def resizeEvent(self, event):
ret = QWebView.resizeEvent(self, event)
QTimer.singleShot(10, self.initialize_scrollbar)
return ret
def viewport_resized(self):
if self.manager is not None: if self.manager is not None:
self.manager.viewport_resized(self.scroll_fraction) self.manager.viewport_resize_started(event)
return QWebView.resizeEvent(self, event)
def event(self, ev): def event(self, ev):
if ev.type() == ev.Gesture: if ev.type() == ev.Gesture:

View File

@ -224,6 +224,10 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.toc.setVisible(False) self.toc.setVisible(False)
self.action_quit = QAction(self) self.action_quit = QAction(self)
self.addAction(self.action_quit) self.addAction(self.action_quit)
self.view_resized_timer = QTimer(self)
self.view_resized_timer.timeout.connect(self.viewport_resize_finished)
self.view_resized_timer.setSingleShot(True)
self.resize_in_progress = False
qs = [Qt.CTRL+Qt.Key_Q] qs = [Qt.CTRL+Qt.Key_Q]
if isosx: if isosx:
qs += [Qt.CTRL+Qt.Key_W] qs += [Qt.CTRL+Qt.Key_W]
@ -311,6 +315,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
border-radius: 20px; border-radius: 20px;
} }
''') ''')
self.window_mode_changed = None
self.toggle_toolbar_action = QAction(_('Show/hide controls'), self) self.toggle_toolbar_action = QAction(_('Show/hide controls'), self)
self.toggle_toolbar_action.triggered.connect(self.toggle_toolbars) self.toggle_toolbar_action.triggered.connect(self.toggle_toolbars)
self.addAction(self.toggle_toolbar_action) self.addAction(self.toggle_toolbar_action)
@ -441,6 +446,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.showFullScreen() self.showFullScreen()
def showFullScreen(self): def showFullScreen(self):
self.view.document.page_position.save()
self.window_mode_changed = 'fullscreen'
self.tool_bar.setVisible(False) self.tool_bar.setVisible(False)
self.tool_bar2.setVisible(False) self.tool_bar2.setVisible(False)
self._original_frame_margins = ( self._original_frame_margins = (
@ -450,7 +457,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.centralwidget.layout().setContentsMargins(0, 0, 0, 0) self.centralwidget.layout().setContentsMargins(0, 0, 0, 0)
super(EbookViewer, self).showFullScreen() super(EbookViewer, self).showFullScreen()
QTimer.singleShot(10, self.show_full_screen_label)
def show_full_screen_label(self): def show_full_screen_label(self):
f = self.full_screen_label f = self.full_screen_label
@ -469,6 +475,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.view.document.switch_to_fullscreen_mode() self.view.document.switch_to_fullscreen_mode()
def showNormal(self): def showNormal(self):
self.view.document.page_position.save()
self.window_mode_changed = 'normal'
self.esc_full_screen_action.setEnabled(False) self.esc_full_screen_action.setEnabled(False)
self.tool_bar.setVisible(True) self.tool_bar.setVisible(True)
self.tool_bar2.setVisible(True) self.tool_bar2.setVisible(True)
@ -478,7 +486,16 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.centralwidget.layout().setContentsMargins(om[0]) self.centralwidget.layout().setContentsMargins(om[0])
self.frame.layout().setContentsMargins(om[1]) self.frame.layout().setContentsMargins(om[1])
super(EbookViewer, self).showNormal() super(EbookViewer, self).showNormal()
self.view.document.switch_to_window_mode()
def handle_window_mode_toggle(self):
if self.window_mode_changed:
fs = self.window_mode_changed == 'fullscreen'
self.window_mode_changed = None
if fs:
self.show_full_screen_label()
else:
self.view.document.switch_to_window_mode()
self.view.document.page_position.restore()
def goto(self, ref): def goto(self, ref):
if ref: if ref:
@ -507,6 +524,10 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
def toc_clicked(self, index): def toc_clicked(self, index):
item = self.toc_model.itemFromIndex(index) item = self.toc_model.itemFromIndex(index)
if item.abspath is not None: if item.abspath is not None:
if not os.path.exists(item.abspath):
return error_dialog(self, _('No such location'),
_('The location pointed to by this item'
' does not exist.'), show=True)
url = QUrl.fromLocalFile(item.abspath) url = QUrl.fromLocalFile(item.abspath)
if item.fragment: if item.fragment:
url.setFragment(item.fragment) url.setFragment(item.fragment)
@ -674,16 +695,28 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.open_progress_indicator(_('Laying out %s')%self.current_title) self.open_progress_indicator(_('Laying out %s')%self.current_title)
self.view.load_path(path, pos=pos) self.view.load_path(path, pos=pos)
def viewport_resized(self, frac): def viewport_resize_started(self, event):
new_page = self.pos.value() if not self.resize_in_progress:
if self.current_page is not None: # First resize, so save the current page position
try: self.resize_in_progress = True
frac = float(new_page-self.current_page.start_page)/(self.current_page.pages-1) if not self.window_mode_changed:
except ZeroDivisionError: # The special handling for window mode changed will already
frac = 0 # have saved page position, so only save it if this is not a
self.view.scroll_to(frac, notify=False) # mode change
self.view.document.page_position.save()
if self.resize_in_progress:
self.view_resized_timer.start(75)
def viewport_resize_finished(self):
# There hasn't been a resize event for some time
# restore the current page position.
self.resize_in_progress = False
if self.window_mode_changed:
# This resize is part of a window mode change, special case it
self.handle_window_mode_toggle()
else: else:
self.set_page_number(frac) self.view.document.page_position.restore()
def close_progress_indicator(self): def close_progress_indicator(self):
self.pi.stop() self.pi.stop()

View File

@ -57,12 +57,20 @@ class PagePosition(object):
return ans return ans
def __enter__(self): def __enter__(self):
self._cpos = self.current_pos self.save()
def __exit__(self, *args): def __exit__(self, *args):
self.restore()
def save(self):
self._cpos = self.current_pos
def restore(self):
if self._cpos is None: return
if isinstance(self._cpos, (int, float)): if isinstance(self._cpos, (int, float)):
self.document.scroll_fraction = self._cpos self.document.scroll_fraction = self._cpos
else: else:
self.scroll_to_cfi(self._cpos) self.scroll_to_cfi(self._cpos)
self._cpos = None self._cpos = None

View File

@ -953,7 +953,7 @@ class ResultCache(SearchQueryParser): # {{{
self.series_col, self.series_sort_col) self.series_col, self.series_sort_col)
self._data[id].append(db.book_on_device_string(id)) self._data[id].append(db.book_on_device_string(id))
self._data[id].append(self.marked_ids_dict.get(id, None)) self._data[id].append(self.marked_ids_dict.get(id, None))
self._data[id].append(None) self._data[id].append(None) # Series sort column
self._map[0:0] = ids self._map[0:0] = ids
self._map_filtered[0:0] = ids self._map_filtered[0:0] = ids
@ -983,8 +983,8 @@ class ResultCache(SearchQueryParser): # {{{
for item in self._data: for item in self._data:
if item is not None: if item is not None:
item.append(db.book_on_device_string(item[0])) item.append(db.book_on_device_string(item[0]))
item.append(None) # Temp mark and series_sort columns
item.append(None) item.extend((None, None))
marked_col = self.FIELD_MAP['marked'] marked_col = self.FIELD_MAP['marked']
for id_,val in self.marked_ids_dict.iteritems(): for id_,val in self.marked_ids_dict.iteritems():