This commit is contained in:
GRiker 2013-06-19 10:07:23 -06:00
commit e533d7d218
31 changed files with 632 additions and 205 deletions

65
recipes/cdrinfo_pl.recipe Normal file
View File

@ -0,0 +1,65 @@
__license__ = 'GPL v3'
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment
class cdrinfo(BasicNewsRecipe):
title = u'CDRinfo.pl'
__author__ = 'fenuks'
description = u'Serwis poświęcony archiwizacji danych. Testy i recenzje nagrywarek. Programy do nagrywania płyt. Dyski twarde, dyski SSD i serwery sieciowe NAS. Rankingi dyskow twardych, najszybsze dyski twarde, newsy, artykuły, testy, recenzje, porady, oprogramowanie. Zestawienie nagrywarek, najnowsze biosy do nagrywarek, programy dla dysków twardych.'
category = 'it, hardware'
#publication_type = ''
language = 'pl'
#encoding = ''
#extra_css = ''
cover_url = 'http://www.cdrinfo.pl/gfx/graph3/top.jpg'
#masthead_url = ''
use_embedded_content = False
oldest_article = 777
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
remove_javascript = True
remove_attributes = ['style']
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: '')]
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id='text')]
remove_tags = [dict(attrs={'class':['navigation', 'sociable']}), dict(name='hr'), dict(id='respond')]
remove_tags_after = dict(id='artnawigacja')
feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
(u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
(u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml')
]
def preprocess_html(self, soup):
if soup.find(id='artnawigacja'):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
baseurl = 'http://cdrinfo.pl' + soup.find(name='input', attrs={'name':'ref'})['value'] + '/'
if baseurl[-2] == '/':
baseurl = baseurl[:-1]
tag = soup.find(id='artnawigacja')
div = tag.find('div', attrs={'align':'right'})
while div:
counter = 0
while counter < 5:
try:
soup2 = self.index_to_soup(baseurl+div.a['href'])
break
except:
counter += 1
tag2 = soup2.find(id='artnawigacja')
div = tag2.find('div', attrs={'align':'right'})
pagetext = soup2.find(attrs={'class':'art'})
comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
for r in soup2.findAll(attrs={'class':'star-rating'}):
r.extract()
for r in soup2.findAll(attrs={'class':'star-rating2'}):
r.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag.extract()

View File

@ -9,13 +9,15 @@ class EkologiaPl(BasicNewsRecipe):
language = 'pl' language = 'pl'
cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png' cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}' extra_css = '.title {font-size: 200%;} .imagePowiazane {float:left; margin-right:5px; width: 200px;}'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True
use_embedded_content = False use_embedded_content = False
remove_attrs = ['style'] remove_attrs = ['style']
keep_only_tags = [dict(attrs={'class':'contentParent'})]
remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})] remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})]
feeds = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')] feeds = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment
import re
class gw_bydgoszcz(BasicNewsRecipe):
title = u'Gazeta Wyborcza Bydgoszcz'
__author__ = 'fenuks'
language = 'pl'
description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.'
category = 'newspaper'
publication_type = 'newspaper'
masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif'
INDEX = 'http://bydgoszcz.gazeta.pl'
cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
remove_empty_feeds = True
oldest_article = 3
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
#rules for gazeta.pl
preprocess_regexps = [(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
keep_only_tags = [dict(id='gazeta_article')]
remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
remove_tags_after = dict(id='gazeta_article_body')
feeds = [(u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')]
def print_version(self, url):
if 'feedsportal.com' in url:
s = url.rpartition('gazeta0Bpl')
u = s[2]
if not s[0]:
u = url.rpartition('wyborcza0Bpl')[2]
u = u.replace('/l/', '/')
u = u.replace('/ia1.htm', '')
u = u.replace('0Dbo0F1', '')
u = u.replace('/story01.htm', '')
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0E', '-')
u = u.replace('0H', ',')
u = u.replace('0I', '_')
u = u.replace('0B', '.')
u = self.INDEX + u
return u
else:
return url
def preprocess_html(self, soup):
tag = soup.find(id='Str')
if soup.find(attrs={'class': 'piano_btn_1'}):
return None
elif tag and tag.findAll('a'):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
tag = soup.find('div', attrs={'id': 'Str'})
try:
baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content']
except:
return 1
link = tag.findAll('a')[-1]
while link:
soup2 = self.index_to_soup(baseurl + link['href'])
link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
if not u'następne' in link.string:
link = ''
pagetext = soup2.find(id='artykul')
comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag.extract()
def image_url_processor(self, baseurl, url):
if url.startswith(' '):
return url.strip()
else:
return url

View File

@ -16,40 +16,47 @@ class Gildia(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '') ] preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '') ]
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
remove_tags = [dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] remove_tags = [dict(name='div', attrs={'class':['backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
keep_only_tags = dict(name='div', attrs={'class':'widetext'}) keep_only_tags = [dict(name='div', attrs={'class':'widetext'})]
feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')] feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'),
(u'Literatura', u'http://www.literatura.gildia.pl/rss'),
(u'Film', u'http://www.film.gildia.pl/rss'),
(u'Horror', u'http://www.horror.gildia.pl/rss'),
(u'Konwenty', u'http://www.konwenty.gildia.pl/rss'),
(u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'),
(u'Manga i anime', u'http://www.manga.gildia.pl/rss'),
(u'Star Wars', u'http://www.starwars.gildia.pl/rss'),
(u'Techno', u'http://www.techno.gildia.pl/rss'),
(u'Historia', u'http://www.historia.gildia.pl/rss'),
(u'Magia', u'http://www.magia.gildia.pl/rss'),
(u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'),
(u'RPG', u'http://www.rpg.gildia.pl/rss'),
(u'LARP', u'http://www.larp.gildia.pl/rss'),
(u'Muzyka', u'http://www.muzyka.gildia.pl/rss'),
(u'Nauka', u'http://www.nauka.gildia.pl/rss'),
]
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
content = soup.find('div', attrs={'class':'news'}) content = soup.find('div', attrs={'class':'news'})
if 'recenzj' in soup.title.string.lower(): words = ('recenzj', 'zapowied','fragmen', 'relacj', 'wywiad', 'nominacj')
for word in words:
if word in soup.title.string.lower():
for link in content.findAll(name='a'): for link in content.findAll(name='a'):
if 'recenzj' in link['href'] or 'muzyka/plyty' in link['href']: if word in link['href'] or (link.string and word in link.string):
return self.index_to_soup(link['href'], raw=True) return self.index_to_soup(link['href'], raw=True)
if 'fragmen' in soup.title.string.lower(): for tag in content.findAll(name='a', href=re.compile('/publicystyka/')):
for link in content.findAll(name='a'): if 'Wi&#281;cej...' == tag.string:
if 'fragment' in link['href']: return self.index_to_soup(tag['href'], raw=True)
return self.index_to_soup(link['href'], raw=True)
if 'relacj' in soup.title.string.lower():
for link in content.findAll(name='a'):
if 'relacj' in link['href']:
return self.index_to_soup(link['href'], raw=True)
if 'wywiad' in soup.title.string.lower():
for link in content.findAll(name='a'):
if 'wywiad' in link['href']:
return self.index_to_soup(link['href'], raw=True)
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a'): for a in soup('a'):
if a.has_key('href') and not a['href'].startswith('http'): if a.has_key('href') and not a['href'].startswith('http'):
if '/gry/' in a['href']: if '/gry/' in a['href']:
a['href']='http://www.gry.gildia.pl' + a['href'] a['href'] = 'http://www.gry.gildia.pl' + a['href']
elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower(): elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
a['href']='http://www.literatura.gildia.pl' + a['href'] a['href'] = 'http://www.literatura.gildia.pl' + a['href']
elif u'komiks' in soup.title.string.lower(): elif u'komiks' in soup.title.string.lower():
a['href']='http://www.literatura.gildia.pl' + a['href'] a['href'] = 'http://www.literatura.gildia.pl' + a['href']
else: else:
a['href']='http://www.gildia.pl' + a['href'] a['href'] = 'http://www.gildia.pl' + a['href']
return soup return soup

Binary file not shown.

After

Width:  |  Height:  |  Size: 909 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 294 B

View File

@ -3,33 +3,29 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = 'teepel' __copyright__ = 'teepel'
'''
media2.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class media2_pl(BasicNewsRecipe): class media2_pl(BasicNewsRecipe):
title = u'Media2' title = u'Media2'
__author__ = 'teepel <teepel44@gmail.com>' __author__ = 'teepel <teepel44@gmail.com>'
language = 'pl' language = 'pl'
description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
masthead_url='http://media2.pl/res/logo/www.png' masthead_url = 'http://media2.pl/res/logo/www.png'
remove_empty_feeds= True cover_url = 'http://media2.pl/res/logo/www.png'
oldest_article = 1 remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript = True
no_stylesheets=True no_stylesheets = True
simultaneous_downloads = 5 remove_attributes = ['style']
ignore_duplicate_articles = {'title', 'url'}
extra_css = '''.news-lead{font-weight: bold; }''' extra_css = '''.news-lead{font-weight: bold; }'''
keep_only_tags =[] keep_only_tags = [dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})) remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : 'item-sidebar'}), dict(name = 'div', attrs = {'class' : 'news-tags'})]
remove_tags =[] feeds = [(u'Media2', u'http://feeds.feedburner.com/media2'), (u'Internet', u'http://feeds.feedburner.com/media2/internet'),
remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'})) (u'Media', 'http://feeds.feedburner.com/media2/media'), (u'Telekomunikacja', 'http://feeds.feedburner.com/media2/telekomunikacja'),
remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) (u'Reklama/PR', 'http://feeds.feedburner.com/media2/reklama-pr'), (u'Technologie', 'http://feeds.feedburner.com/media2/technologie'),
remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) (u'Badania', 'http://feeds.feedburner.com/media2/badania')
]
feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')]

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class NaukawPolsce(BasicNewsRecipe): class NaukawPolsce(BasicNewsRecipe):
title = u'Nauka w Polsce' title = u'PAP Nauka w Polsce'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.' description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
category = 'science' category = 'science'

View File

@ -3,7 +3,7 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Poltergeist(BasicNewsRecipe): class Poltergeist(BasicNewsRecipe):
title = u'Poltergeist' title = u'Polter.pl'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.' description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.'
category = 'fantasy, books, rpg, games' category = 'fantasy, books, rpg, games'

View File

@ -1,41 +1,35 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ppeRecipe(BasicNewsRecipe): class ppeRecipe(BasicNewsRecipe):
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>' __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl' language = 'pl'
title = u'ppe.pl' title = u'ppe.pl'
category = u'News' category = u'News'
description = u'Portal o konsolach i grach wideo.' description = u'Portal o konsolach i grach wideo.'
cover_url='' extra_css = '.categories > li {list-style: none; display: inline;} .galmini > li {list-style: none; float: left;} .calibre_navbar {clear: both;}'
remove_empty_feeds= True remove_empty_feeds = True
no_stylesheets=True
oldest_article = 1
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
simultaneous_downloads = 2 remove_empty_feeds = True
remove_attributes = ['style']
keep_only_tags =[] keep_only_tags = [dict(attrs={'class':'box'})]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'})) remove_tags = [dict(attrs={'class':['voltage-1', 'voltage-2', 'encyklopedia', 'nag', 'related', 'comment_form', 'komentarze-box']})]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'}))
remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'}))
feeds = [ feeds = [
('Newsy', 'feed://ppe.pl/rss/rss.xml'), ('Newsy', 'http://ppe.pl/rss.html'),
('Recenzje', 'http://ppe.pl/rss-recenzje.html'),
('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'),
] ]
def get_cover_url(self):
soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html')
part = soup.find(attrs={'class':'archiwum-foto'})['style']
part = re.search("'(.+)'", part).group(1).replace('_min', '')
return 'http://www.ppe.pl' + part

View File

@ -1,3 +1,4 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment from calibre.ebooks.BeautifulSoup import Comment
@ -11,6 +12,7 @@ class PurePC(BasicNewsRecipe):
language = 'pl' language = 'pl'
masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg' masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg' cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
extra_css = '.wykres_logo {float: left; margin-right: 5px;}'
no_stylesheets = True no_stylesheets = True
keep_only_tags= [dict(id='content')] keep_only_tags= [dict(id='content')]
remove_tags_after= dict(attrs={'class':'fivestar-widget'}) remove_tags_after= dict(attrs={'class':'fivestar-widget'})
@ -19,11 +21,14 @@ class PurePC(BasicNewsRecipe):
def append_page(self, soup, appendtag): def append_page(self, soup, appendtag):
nexturl= appendtag.find(attrs={'class':'pager-next'}) lasturl = appendtag.find(attrs={'class':'pager-last'})
if nexturl: if lasturl:
while nexturl: regex = re.search('(.+?2C)(\d+)', lasturl.a['href'])
soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href']) baseurl = regex.group(1).replace('?page=0%2C', '?page=1%2C')
nexturl=soup2.find(attrs={'class':'pager-next'}) baseurl = 'http://www.purepc.pl' + baseurl
nr = int(regex.group(2))
for page_nr in range(1, nr+1):
soup2 = self.index_to_soup(baseurl+str(page_nr))
pagetext = soup2.find(attrs={'class':'article'}) pagetext = soup2.find(attrs={'class':'article'})
pos = len(appendtag.contents) pos = len(appendtag.contents)
appendtag.insert(pos, pagetext) appendtag.insert(pos, pagetext)

View File

@ -1880,7 +1880,7 @@ class KOBOTOUCH(KOBO):
# Remove any entries for the Activity table - removes tile from new home page # Remove any entries for the Activity table - removes tile from new home page
if self.has_activity_table(): if self.has_activity_table():
debug_print('KoboTouch:delete_via_sql: detete from Activity') debug_print('KoboTouch:delete_via_sql: delete from Activity')
cursor.execute('delete from Activity where Id =?', t) cursor.execute('delete from Activity where Id =?', t)
connection.commit() connection.commit()
@ -2391,6 +2391,7 @@ class KOBOTOUCH(KOBO):
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute(delete_query) cursor.execute(delete_query)
cursor.execute(update_query) cursor.execute(update_query)
if self.has_activity_table():
cursor.execute(delete_activity_query) cursor.execute(delete_activity_query)
connection.commit() connection.commit()
cursor.close() cursor.close()

View File

@ -14,9 +14,17 @@ class DOCXInput(InputFormatPlugin):
description = 'Convert DOCX files (.docx) to HTML' description = 'Convert DOCX files (.docx) to HTML'
file_types = set(['docx']) file_types = set(['docx'])
options = {
OptionRecommendation(name='docx_no_cover', recommended_value=False,
help=_('Normally, if a large image is present at the start of the document that looks like a cover, '
'it will be removed from the document and used as the cover for created ebook. This option '
'turns off that behavior.')),
}
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)]) recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
def convert(self, stream, options, file_ext, log, accelerators): def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.docx.to_html import Convert from calibre.ebooks.docx.to_html import Convert
return Convert(stream, log=log)() return Convert(stream, detect_cover=not options.docx_no_cover, log=log)()

View File

@ -132,10 +132,10 @@ class RunStyle(object):
all_properties = { all_properties = {
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'webHidden',
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color', 'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family' 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family',
} }
toggle_properties = { toggle_properties = {
@ -150,7 +150,7 @@ class RunStyle(object):
else: else:
for p in ( for p in (
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow', 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
'smallCaps', 'strike', 'vanish', 'smallCaps', 'strike', 'vanish', 'webHidden',
): ):
setattr(self, p, binary_property(rPr, p)) setattr(self, p, binary_property(rPr, p))
@ -210,7 +210,7 @@ class RunStyle(object):
c['text-shadow'] = '2px 2px' c['text-shadow'] = '2px 2px'
if self.smallCaps is True: if self.smallCaps is True:
c['font-variant'] = 'small-caps' c['font-variant'] = 'small-caps'
if self.vanish is True: if self.vanish is True or self.webHidden is True:
c['display'] = 'none' c['display'] = 'none'
self.get_border_css(c) self.get_border_css(c)

View File

@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
def mergeable(previous, current): def mergeable(previous, current):
if previous.tail or current.tail: if previous.tail or current.tail:
@ -83,8 +84,19 @@ def lift(span):
else: else:
add_text(last_child, 'tail', span.tail) add_text(last_child, 'tail', span.tail)
def before_count(root, tag, limit=10):
body = root.xpath('//body[1]')
if not body:
return limit
ans = 0
for elem in body[0].iterdescendants():
if elem is tag:
return ans
ans += 1
if ans > limit:
return limit
def cleanup_markup(root, styles): def cleanup_markup(log, root, styles, dest_dir, detect_cover):
# Merge consecutive spans that have the same styling # Merge consecutive spans that have the same styling
current_run = [] current_run = []
for span in root.xpath('//span'): for span in root.xpath('//span'):
@ -134,3 +146,22 @@ def cleanup_markup(root, styles):
for span in root.xpath('//span[not(@class) and not(@id)]'): for span in root.xpath('//span[not(@class) and not(@id)]'):
lift(span) lift(span)
if detect_cover:
# Check if the first image in the document is possibly a cover
img = root.xpath('//img[@src][1]')
if img:
img = img[0]
path = os.path.join(dest_dir, img.get('src'))
if os.path.exists(path) and before_count(root, img, limit=10) < 5:
from calibre.utils.magick.draw import identify
try:
width, height, fmt = identify(path)
except:
width, height, fmt = 0, 0, None
is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000
if is_cover:
log.debug('Detected an image that looks like a cover')
img.getparent().remove(img)
return path

View File

@ -112,15 +112,16 @@ class Images(object):
base += '.' + ext base += '.' + ext
exists = frozenset(self.used.itervalues()) exists = frozenset(self.used.itervalues())
c = 1 c = 1
while base in exists: name = base
while name in exists:
n, e = base.rpartition('.')[0::2] n, e = base.rpartition('.')[0::2]
base = '%s-%d.%s' % (n, c, e) name = '%s-%d.%s' % (n, c, e)
c += 1 c += 1
self.used[rid] = base self.used[rid] = name
with open(os.path.join(self.dest_dir, base), 'wb') as f: with open(os.path.join(self.dest_dir, name), 'wb') as f:
f.write(raw) f.write(raw)
self.all_images.add('images/' + base) self.all_images.add('images/' + name)
return base return name
def pic_to_img(self, pic, alt=None): def pic_to_img(self, pic, alt=None):
name = None name = None

View File

@ -260,6 +260,7 @@ class Styles(object):
for attr in ans.all_properties: for attr in ans.all_properties:
if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists
setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr)) setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
ans.linked_style = direct_formatting.linked_style
return ans return ans
def resolve_run(self, r): def resolve_run(self, r):
@ -389,6 +390,19 @@ class Styles(object):
else: else:
ps.numbering = (ps.numbering[0], lvl) ps.numbering = (ps.numbering[0], lvl)
def apply_contextual_spacing(self, paras):
last_para = None
for p in paras:
if last_para is not None:
ls = self.resolve_paragraph(last_para)
ps = self.resolve_paragraph(p)
if ls.linked_style is not None and ls.linked_style == ps.linked_style:
if ls.contextualSpacing is True:
ls.margin_bottom = 0
if ps.contextualSpacing is True:
ps.margin_top = 0
last_para = p
def register(self, css, prefix): def register(self, css, prefix):
h = hash(frozenset(css.iteritems())) h = hash(frozenset(css.iteritems()))
ans, _ = self.classes.get(h, (None, None)) ans, _ = self.classes.get(h, (None, None))

View File

@ -25,9 +25,8 @@ from calibre.ebooks.docx.tables import Tables
from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.docx.footnotes import Footnotes
from calibre.ebooks.docx.cleanup import cleanup_markup from calibre.ebooks.docx.cleanup import cleanup_markup
from calibre.ebooks.docx.theme import Theme from calibre.ebooks.docx.theme import Theme
from calibre.ebooks.docx.toc import create_toc
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
class Text: class Text:
@ -41,11 +40,12 @@ class Text:
class Convert(object): class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None):
self.docx = DOCX(path_or_stream, log=log) self.docx = DOCX(path_or_stream, log=log)
self.ms_pat = re.compile(r'\s{2,}') self.ms_pat = re.compile(r'\s{2,}')
self.ws_pat = re.compile(r'[\n\r\t]') self.ws_pat = re.compile(r'[\n\r\t]')
self.log = self.docx.log self.log = self.docx.log
self.detect_cover = detect_cover
self.notes_text = notes_text or _('Notes') self.notes_text = notes_text or _('Notes')
self.dest_dir = dest_dir or os.getcwdu() self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata self.mi = self.docx.metadata
@ -86,6 +86,7 @@ class Convert(object):
self.framed_map = {} self.framed_map = {}
self.anchor_map = {} self.anchor_map = {}
self.link_map = defaultdict(list) self.link_map = defaultdict(list)
paras = []
self.log.debug('Converting Word markup to HTML') self.log.debug('Converting Word markup to HTML')
self.read_page_properties(doc) self.read_page_properties(doc)
@ -94,6 +95,8 @@ class Convert(object):
if wp.tag.endswith('}p'): if wp.tag.endswith('}p'):
p = self.convert_p(wp) p = self.convert_p(wp)
self.body.append(p) self.body.append(p)
paras.append(wp)
self.styles.apply_contextual_spacing(paras)
notes_header = None notes_header = None
if self.footnotes.has_notes: if self.footnotes.has_notes:
@ -107,12 +110,16 @@ class Convert(object):
dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']' dl[-1][0].tail = ']'
dl.append(DD()) dl.append(DD())
paras = []
for wp in note: for wp in note:
if wp.tag.endswith('}tbl'): if wp.tag.endswith('}tbl'):
self.tables.register(wp, self.styles) self.tables.register(wp, self.styles)
self.page_map[wp] = self.current_page self.page_map[wp] = self.current_page
else:
p = self.convert_p(wp) p = self.convert_p(wp)
dl[-1].append(p) dl[-1].append(p)
paras.append(wp)
self.styles.apply_contextual_spacing(paras)
self.resolve_links(relationships_by_id) self.resolve_links(relationships_by_id)
@ -163,9 +170,9 @@ class Convert(object):
break break
self.log.debug('Cleaning up redundant markup generated by Word') self.log.debug('Cleaning up redundant markup generated by Word')
cleanup_markup(self.html, self.styles) self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover)
return self.write() return self.write(doc)
def read_page_properties(self, doc): def read_page_properties(self, doc):
current = [] current = []
@ -260,48 +267,8 @@ class Convert(object):
self.styles.resolve_numbering(numbering) self.styles.resolve_numbering(numbering)
def create_toc(self): def write(self, doc):
' Create a TOC from headings in the document ' toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map)
root = self.body
headings = ('h1', 'h2', 'h3')
tocroot = TOC()
xpaths = [XPath('//%s' % x) for x in headings]
level_prev = {i+1:None for i in xrange(len(xpaths))}
level_prev[0] = tocroot
level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
self.idcount = 0
def ensure_id(elem):
ans = elem.get('id', None)
if not ans:
self.idcount += 1
ans = 'toc_id_%d' % self.idcount
elem.set('id', ans)
return ans
for item in descendants(root, *headings):
lvl = plvl = item_level_map.get(item, None)
if lvl is None:
continue
parent = None
while parent is None:
plvl -= 1
parent = level_prev[plvl]
lvl = plvl + 1
elem_id = ensure_id(item)
text = elem_to_toc_text(item)
toc = parent.add_item('index.html', elem_id, text)
level_prev[lvl] = toc
for i in xrange(lvl+1, len(xpaths)+1):
level_prev[i] = None
if len(tuple(tocroot.flat())) > 1:
return tocroot
def write(self):
toc = self.create_toc()
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
f.write(raw) f.write(raw)
@ -314,6 +281,8 @@ class Convert(object):
opf.toc = toc opf.toc = toc
opf.create_manifest_from_files_in([self.dest_dir]) opf.create_manifest_from_files_in([self.dest_dir])
opf.create_spine(['index.html']) opf.create_spine(['index.html'])
if self.cover_image is not None:
opf.guide.set_cover(self.cover_image)
with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx: with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
opf.render(of, ncx, 'toc.ncx') opf.render(of, ncx, 'toc.ncx')
return os.path.join(self.dest_dir, 'metadata.opf') return os.path.join(self.dest_dir, 'metadata.opf')
@ -401,11 +370,13 @@ class Convert(object):
return wrapper return wrapper
def resolve_links(self, relationships_by_id): def resolve_links(self, relationships_by_id):
self.resolved_link_map = {}
for hyperlink, spans in self.link_map.iteritems(): for hyperlink, spans in self.link_map.iteritems():
span = spans[0] span = spans[0]
if len(spans) > 1: if len(spans) > 1:
span = self.wrap_elems(spans, SPAN()) span = self.wrap_elems(spans, SPAN())
span.tag = 'a' span.tag = 'a'
self.resolved_link_map[hyperlink] = span
tgt = get(hyperlink, 'w:tgtFrame') tgt = get(hyperlink, 'w:tgtFrame')
if tgt: if tgt:
span.set('target', tgt) span.set('target', tgt)
@ -474,8 +445,6 @@ class Convert(object):
l.set('class', 'noteref') l.set('class', 'noteref')
text.add_elem(l) text.add_elem(l)
ans.append(text.elem) ans.append(text.elem)
elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate':
text.buf.append('\xa0')
if text.buf: if text.buf:
setattr(text.elem, text.attr, ''.join(text.buf)) setattr(text.elem, text.attr, ''.join(text.buf))

View File

@ -0,0 +1,140 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import namedtuple
from lxml.etree import tostring
from calibre.ebooks.docx.names import XPath, descendants, get, ancestor
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
class Count(object):
__slots__ = ('val',)
def __init__(self):
self.val = 0
def from_headings(body):
' Create a TOC from headings in the document '
headings = ('h1', 'h2', 'h3')
tocroot = TOC()
xpaths = [XPath('//%s' % x) for x in headings]
level_prev = {i+1:None for i in xrange(len(xpaths))}
level_prev[0] = tocroot
level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)}
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
idcount = Count()
def ensure_id(elem):
ans = elem.get('id', None)
if not ans:
idcount.val += 1
ans = 'toc_id_%d' % idcount.val
elem.set('id', ans)
return ans
for item in descendants(body, *headings):
lvl = plvl = item_level_map.get(item, None)
if lvl is None:
continue
parent = None
while parent is None:
plvl -= 1
parent = level_prev[plvl]
lvl = plvl + 1
elem_id = ensure_id(item)
text = elem_to_toc_text(item)
toc = parent.add_item('index.html', elem_id, text)
level_prev[lvl] = toc
for i in xrange(lvl+1, len(xpaths)+1):
level_prev[i] = None
if len(tuple(tocroot.flat())) > 1:
return tocroot
def structure_toc(entries):
indent_vals = sorted({x.indent for x in entries})
last_found = [None for i in indent_vals]
newtoc = TOC()
if len(indent_vals) > 6:
for x in entries:
newtoc.add_item('index.html', x.anchor, x.text)
return newtoc
def find_parent(level):
candidates = last_found[:level]
for x in reversed(candidates):
if x is not None:
return x
return newtoc
for item in entries:
level = indent_vals.index(item.indent)
parent = find_parent(level)
last_found[level] = parent.add_item('index.html', item.anchor,
item.text)
for i in xrange(level+1, len(last_found)):
last_found[i] = None
return newtoc
def link_to_txt(a, styles, object_map):
if len(a) > 1:
for child in a:
run = object_map.get(child, None)
if run is not None:
rs = styles.resolve(run)
if rs.css.get('display', None) == 'none':
a.remove(child)
return tostring(a, method='text', with_tail=False, encoding=unicode).strip()
def from_toc(docx, link_map, styles, object_map):
toc_level = None
level = 0
TI = namedtuple('TI', 'text anchor indent')
toc = []
for tag in XPath('//*[(@w:fldCharType and name()="w:fldChar") or name()="w:hyperlink" or name()="w:instrText"]')(docx):
n = tag.tag.rpartition('}')[-1]
if n == 'fldChar':
t = get(tag, 'w:fldCharType')
if t == 'begin':
level += 1
elif t == 'end':
level -= 1
if toc_level is not None and level < toc_level:
break
elif n == 'instrText':
if level > 0 and tag.text and tag.text.strip().startswith('TOC '):
toc_level = level
elif n == 'hyperlink':
if toc_level is not None and level >= toc_level and tag in link_map:
a = link_map[tag]
href = a.get('href', None)
txt = link_to_txt(a, styles, object_map)
p = ancestor(tag, 'w:p')
if txt and href and p is not None:
ps = styles.resolve_paragraph(p)
try:
ml = int(ps.margin_left[:-2])
except (TypeError, ValueError, AttributeError):
ml = 0
if ps.text_align in {'center', 'right'}:
ml = 0
toc.append(TI(txt, href[1:], ml))
if toc:
return structure_toc(toc)
def create_toc(docx, body, link_map, styles, object_map):
return from_toc(docx, link_map, styles, object_map) or from_headings(body)

View File

@ -8,27 +8,37 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.ebooks.docx.container import DOCX from calibre.ebooks.docx.container import DOCX
from calibre.ebooks.docx.names import XPath, get
from calibre.utils.zipfile import ZipFile
from calibre.utils.magick.draw import identify_data from calibre.utils.magick.draw import identify_data
images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]')
def get_cover(docx):
doc = docx.document
rid_map = docx.document_relationships[0]
for image in images(doc):
rid = get(image, 'r:embed') or get(image, 'r:id')
if rid in rid_map:
try:
raw = docx.read(rid_map[rid])
width, height, fmt = identify_data(raw)
except Exception:
continue
if 0.8 <= height/width <= 1.8 and height*width >= 160000:
return (fmt, raw)
def get_metadata(stream): def get_metadata(stream):
c = DOCX(stream, extract=False) c = DOCX(stream, extract=False)
mi = c.metadata mi = c.metadata
try:
cdata = get_cover(c)
except Exception:
cdata = None
import traceback
traceback.print_exc()
c.close() c.close()
stream.seek(0) stream.seek(0)
cdata = None
with ZipFile(stream, 'r') as zf:
for zi in zf.infolist():
ext = zi.filename.rpartition('.')[-1].lower()
if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
raw = zf.read(zi)
try:
width, height, fmt = identify_data(raw)
except:
continue
if 0.8 <= height/width <= 1.8 and height*width >= 160000:
cdata = (fmt, raw)
if cdata is not None: if cdata is not None:
mi.cover_data = cdata mi.cover_data = cdata

View File

@ -8,7 +8,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import os import os
from urlparse import urldefrag from urlparse import urldefrag
import base64
from lxml import etree from lxml import etree
from PyQt4.QtCore import Qt from PyQt4.QtCore import Qt
from PyQt4.QtCore import QByteArray from PyQt4.QtCore import QByteArray
@ -23,6 +22,8 @@ from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import xml2str, xpath
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.imghdr import what
IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align']) KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
@ -46,6 +47,7 @@ class SVGRasterizer(object):
def __call__(self, oeb, context): def __call__(self, oeb, context):
oeb.logger.info('Rasterizing SVG images...') oeb.logger.info('Rasterizing SVG images...')
self.temp_files = []
self.stylizer_cache = {} self.stylizer_cache = {}
self.oeb = oeb self.oeb = oeb
self.opts = context self.opts = context
@ -54,6 +56,11 @@ class SVGRasterizer(object):
self.dataize_manifest() self.dataize_manifest()
self.rasterize_spine() self.rasterize_spine()
self.rasterize_cover() self.rasterize_cover()
for pt in self.temp_files:
try:
os.remove(pt)
except:
pass
def rasterize_svg(self, elem, width=0, height=0, format='PNG'): def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
view_box = elem.get('viewBox', elem.get('viewbox', None)) view_box = elem.get('viewBox', elem.get('viewbox', None))
@ -112,9 +119,12 @@ class SVGRasterizer(object):
if abshref not in hrefs: if abshref not in hrefs:
continue continue
linkee = hrefs[abshref] linkee = hrefs[abshref]
data = base64.encodestring(str(linkee)) data = str(linkee)
data = "data:%s;base64,%s" % (linkee.media_type, data) ext = what(None, data) or 'jpg'
elem.attrib[XLINK('href')] = data with PersistentTemporaryFile(suffix='.'+ext) as pt:
pt.write(data)
self.temp_files.append(pt.name)
elem.attrib[XLINK('href')] = pt.name
return svg return svg
def stylizer(self, item): def stylizer(self, item):

View File

@ -85,8 +85,15 @@ def getimagesize(url):
""" """
try:
from PIL import ImageFile
except ImportError:
try: try:
import ImageFile import ImageFile
except ImportError:
return None
try:
import urllib2 import urllib2
except ImportError: except ImportError:
return None return None

View File

@ -92,7 +92,7 @@ defs['tags_browser_partition_method'] = 'first letter'
defs['tags_browser_collapse_at'] = 100 defs['tags_browser_collapse_at'] = 100
defs['tag_browser_dont_collapse'] = [] defs['tag_browser_dont_collapse'] = []
defs['edit_metadata_single_layout'] = 'default' defs['edit_metadata_single_layout'] = 'default'
defs['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}' defs['default_author_link'] = 'https://en.wikipedia.org/w/index.php?search={author}'
defs['preserve_date_on_ctl'] = True defs['preserve_date_on_ctl'] = True
defs['manual_add_auto_convert'] = False defs['manual_add_auto_convert'] = False
defs['cb_fullscreen'] = False defs['cb_fullscreen'] = False

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from calibre.gui2.convert.docx_input_ui import Ui_Form
from calibre.gui2.convert import Widget
class PluginWidget(Widget, Ui_Form):
TITLE = _('DOCX Input')
HELP = _('Options specific to')+' DOCX '+_('input')
COMMIT_NAME = 'docx_input'
ICON = I('mimetypes/docx.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['docx_no_cover', ])
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>518</width>
<height>353</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
<widget class="QCheckBox" name="opt_docx_no_cover">
<property name="text">
<string>Do not try to autodetect a &amp;cover from images in the document</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>213</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

View File

@ -89,7 +89,7 @@ def get_installed_plugin_status(display_plugin):
display_plugin.installed_version = None display_plugin.installed_version = None
display_plugin.plugin = None display_plugin.plugin = None
for plugin in initialized_plugins(): for plugin in initialized_plugins():
if plugin.name == display_plugin.name: if plugin.name == display_plugin.name and plugin.plugin_path is not None:
display_plugin.plugin = plugin display_plugin.plugin = plugin
display_plugin.installed_version = plugin.version display_plugin.installed_version = plugin.version
break break
@ -254,7 +254,7 @@ Platforms: Windows, OSX, Linux; History: Yes;</span></i></li>
return self.installed_version is not None return self.installed_version is not None
def is_upgrade_available(self): def is_upgrade_available(self):
return self.is_installed() and (self.installed_version < self.available_version \ return self.is_installed() and (self.installed_version < self.available_version
or self.is_deprecated) or self.is_deprecated)
def is_valid_platform(self): def is_valid_platform(self):
@ -317,7 +317,7 @@ class DisplayPluginModel(QAbstractTableModel):
def data(self, index, role): def data(self, index, role):
if not index.isValid(): if not index.isValid():
return NONE; return NONE
row, col = index.row(), index.column() row, col = index.row(), index.column()
if row < 0 or row >= self.rowCount(): if row < 0 or row >= self.rowCount():
return NONE return NONE
@ -429,11 +429,11 @@ class DisplayPluginModel(QAbstractTableModel):
return QVariant(_('This plugin has been deprecated and should be uninstalled')+'\n\n'+ return QVariant(_('This plugin has been deprecated and should be uninstalled')+'\n\n'+
_('Right-click to see more options')) _('Right-click to see more options'))
if not display_plugin.is_valid_platform(): if not display_plugin.is_valid_platform():
return QVariant(_('This plugin can only be installed on: %s') % \ return QVariant(_('This plugin can only be installed on: %s') %
', '.join(display_plugin.platforms)+'\n\n'+ ', '.join(display_plugin.platforms)+'\n\n'+
_('Right-click to see more options')) _('Right-click to see more options'))
if numeric_version < display_plugin.calibre_required_version: if numeric_version < display_plugin.calibre_required_version:
return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') % \ return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') %
self._get_display_version(display_plugin.calibre_required_version)+'\n\n'+ self._get_display_version(display_plugin.calibre_required_version)+'\n\n'+
_('Right-click to see more options')) _('Right-click to see more options'))
if display_plugin.installed_version < display_plugin.available_version: if display_plugin.installed_version < display_plugin.available_version:
@ -687,7 +687,7 @@ class PluginUpdaterDialog(SizePersistedDialog):
def _install_clicked(self): def _install_clicked(self):
display_plugin = self._selected_display_plugin() display_plugin = self._selected_display_plugin()
if not question_dialog(self, _('Install %s')%display_plugin.name, '<p>' + \ if not question_dialog(self, _('Install %s')%display_plugin.name, '<p>' +
_('Installing plugins is a <b>security risk</b>. ' _('Installing plugins is a <b>security risk</b>. '
'Plugins can contain a virus/malware. ' 'Plugins can contain a virus/malware. '
'Only install it if you got it from a trusted source.' 'Only install it if you got it from a trusted source.'
@ -886,3 +886,4 @@ class PluginUpdaterDialog(SizePersistedDialog):
pt.write(raw) pt.write(raw)
pt.close() pt.close()
return pt.name return pt.name

View File

@ -19,7 +19,7 @@ from calibre.gui2.dialogs.message_box import ViewLog
Question = namedtuple('Question', 'payload callback cancel_callback ' Question = namedtuple('Question', 'payload callback cancel_callback '
'title msg html_log log_viewer_title log_is_file det_msg ' 'title msg html_log log_viewer_title log_is_file det_msg '
'show_copy_button checkbox_msg checkbox_checked action_callback ' 'show_copy_button checkbox_msg checkbox_checked action_callback '
'action_label action_icon') 'action_label action_icon focus_action')
class ProceedQuestion(QDialog): class ProceedQuestion(QDialog):
@ -155,13 +155,14 @@ class ProceedQuestion(QDialog):
self.checkbox.setChecked(question.checkbox_checked) self.checkbox.setChecked(question.checkbox_checked)
self.do_resize() self.do_resize()
self.show() self.show()
self.bb.button(self.bb.Yes).setDefault(True) button = self.action_button if question.focus_action and question.action_callback is not None else self.bb.button(self.bb.Yes)
self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason) button.setDefault(True)
button.setFocus(Qt.OtherFocusReason)
def __call__(self, callback, payload, html_log, log_viewer_title, title, def __call__(self, callback, payload, html_log, log_viewer_title, title,
msg, det_msg='', show_copy_button=False, cancel_callback=None, msg, det_msg='', show_copy_button=False, cancel_callback=None,
log_is_file=False, checkbox_msg=None, checkbox_checked=False, log_is_file=False, checkbox_msg=None, checkbox_checked=False,
action_callback=None, action_label=None, action_icon=None): action_callback=None, action_label=None, action_icon=None, focus_action=False):
''' '''
A non modal popup that notifies the user that a background task has A non modal popup that notifies the user that a background task has
been completed. This class guarantees that only a single popup is been completed. This class guarantees that only a single popup is
@ -192,13 +193,14 @@ class ProceedQuestion(QDialog):
exactly the same way as callback. exactly the same way as callback.
:param action_label: The text on the action button :param action_label: The text on the action button
:param action_icon: The icon for the action button, must be a QIcon object or None :param action_icon: The icon for the action button, must be a QIcon object or None
:param focus_action: If True, the action button will be focused instead of the Yes button
''' '''
question = Question( question = Question(
payload, callback, cancel_callback, title, msg, html_log, payload, callback, cancel_callback, title, msg, html_log,
log_viewer_title, log_is_file, det_msg, show_copy_button, log_viewer_title, log_is_file, det_msg, show_copy_button,
checkbox_msg, checkbox_checked, action_callback, action_label, checkbox_msg, checkbox_checked, action_callback, action_label,
action_icon) action_icon, focus_action)
self.questions.append(question) self.questions.append(question)
self.show_question() self.show_question()

View File

@ -24,8 +24,10 @@ class NPWebView(QWebView):
self.gui = None self.gui = None
self.tags = '' self.tags = ''
self.setPage(NPWebPage()) self._page = NPWebPage()
self.page().networkAccessManager().setCookieJar(QNetworkCookieJar()) self.setPage(self._page)
self.cookie_jar = QNetworkCookieJar()
self.page().networkAccessManager().setCookieJar(self.cookie_jar)
http_proxy = get_proxies().get('http', None) http_proxy = get_proxies().get('http', None)
if http_proxy: if http_proxy:

View File

@ -82,7 +82,8 @@ class History(list):
return None return None
item = self[self.forward_pos] item = self[self.forward_pos]
self.back_pos = self.forward_pos - 1 self.back_pos = self.forward_pos - 1
if self.back_pos < 0: self.back_pos = None if self.back_pos < 0:
self.back_pos = None
self.insert_pos = self.back_pos or 0 self.insert_pos = self.back_pos or 0
self.forward_pos = None if self.forward_pos > len(self) - 2 else self.forward_pos + 1 self.forward_pos = None if self.forward_pos > len(self) - 2 else self.forward_pos + 1
self.set_actions() self.set_actions()
@ -268,7 +269,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.action_full_screen.shortcuts()])) self.action_full_screen.shortcuts()]))
self.action_back.triggered[bool].connect(self.back) self.action_back.triggered[bool].connect(self.back)
self.action_forward.triggered[bool].connect(self.forward) self.action_forward.triggered[bool].connect(self.forward)
self.action_bookmark.triggered[bool].connect(self.bookmark)
self.action_preferences.triggered.connect(self.do_config) self.action_preferences.triggered.connect(self.do_config)
self.pos.editingFinished.connect(self.goto_page_num) self.pos.editingFinished.connect(self.goto_page_num)
self.vertical_scrollbar.valueChanged[int].connect(lambda self.vertical_scrollbar.valueChanged[int].connect(lambda
@ -294,7 +294,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.toc.setCursor(Qt.PointingHandCursor) self.toc.setCursor(Qt.PointingHandCursor)
self.tool_bar.setContextMenuPolicy(Qt.PreventContextMenu) self.tool_bar.setContextMenuPolicy(Qt.PreventContextMenu)
self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu) self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu)
self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup) self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.InstantPopup)
self.action_full_screen.setCheckable(True) self.action_full_screen.setCheckable(True)
self.full_screen_label = QLabel(''' self.full_screen_label = QLabel('''
<center> <center>
@ -394,7 +394,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.action_toggle_paged_mode.setToolTip(self.FLOW_MODE_TT if self.action_toggle_paged_mode.setToolTip(self.FLOW_MODE_TT if
self.action_toggle_paged_mode.isChecked() else self.action_toggle_paged_mode.isChecked() else
self.PAGED_MODE_TT) self.PAGED_MODE_TT)
if at_start: return if at_start:
return
self.reload() self.reload()
def settings_changed(self): def settings_changed(self):
@ -486,8 +487,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
at_start=True) at_start=True)
def lookup(self, word): def lookup(self, word):
self.dictionary_view.setHtml('<html><body><p>'+ \ self.dictionary_view.setHtml('<html><body><p>'+
_('Connecting to dict.org to lookup: <b>%s</b>&hellip;')%word + \ _('Connecting to dict.org to lookup: <b>%s</b>&hellip;')%word +
'</p></body></html>') '</p></body></html>')
self.dictionary_box.show() self.dictionary_box.show()
self._lookup = Lookup(word, parent=self) self._lookup = Lookup(word, parent=self)
@ -964,6 +965,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
def set_bookmarks(self, bookmarks): def set_bookmarks(self, bookmarks):
self.bookmarks_menu.clear() self.bookmarks_menu.clear()
self.bookmarks_menu.addAction(_("Bookmark this location"), self.bookmark)
self.bookmarks_menu.addAction(_("Manage Bookmarks"), self.manage_bookmarks) self.bookmarks_menu.addAction(_("Manage Bookmarks"), self.manage_bookmarks)
self.bookmarks_menu.addSeparator() self.bookmarks_menu.addSeparator()
current_page = None current_page = None
@ -1202,3 +1204,4 @@ def main(args=sys.argv):
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@ -139,7 +139,7 @@ class Kobo(Device):
id = 'kobo' id = 'kobo'
class KoboVox(Kobo): class KoboVox(Kobo):
name = 'Kobo Vox' name = 'Kobo Vox and Kobo Aura HD'
output_profile = 'tablet' output_profile = 'tablet'
id = 'kobo_vox' id = 'kobo_vox'

View File

@ -26,7 +26,7 @@ class Template(object):
self.html_lang = lang self.html_lang = lang
def generate(self, *args, **kwargs): def generate(self, *args, **kwargs):
if not kwargs.has_key('style'): if 'style' not in kwargs:
kwargs['style'] = '' kwargs['style'] = ''
for key in kwargs.keys(): for key in kwargs.keys():
if isbytestring(kwargs[key]): if isbytestring(kwargs[key]):
@ -152,8 +152,8 @@ class FeedTemplate(Template):
body.append(div) body.append(div)
if getattr(feed, 'image', None): if getattr(feed, 'image', None):
div.append(DIV(IMG( div.append(DIV(IMG(
alt = feed.image_alt if feed.image_alt else '', alt=feed.image_alt if feed.image_alt else '',
src = feed.image_url src=feed.image_url
), ),
CLASS('calibre_feed_image'))) CLASS('calibre_feed_image')))
if getattr(feed, 'description', None): if getattr(feed, 'description', None):
@ -261,8 +261,8 @@ class TouchscreenIndexTemplate(Template):
for i, feed in enumerate(feeds): for i, feed in enumerate(feeds):
if feed: if feed:
tr = TR() tr = TR()
tr.append(TD( CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i))) tr.append(TD(CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i)))
tr.append(TD( '%s' % len(feed.articles), style="text-align:right")) tr.append(TD('%s' % len(feed.articles), style="text-align:right"))
toc.append(tr) toc.append(tr)
div = DIV( div = DIV(
masthead_p, masthead_p,
@ -307,7 +307,7 @@ class TouchscreenFeedTemplate(Template):
if f > 0: if f > 0:
link = A(CLASS('feed_link'), link = A(CLASS('feed_link'),
trim_title(feeds[f-1].title), trim_title(feeds[f-1].title),
href = '../feed_%d/index.html' % int(f-1)) href='../feed_%d/index.html' % int(f-1))
navbar_tr.append(TD(CLASS('feed_prev'),link)) navbar_tr.append(TD(CLASS('feed_prev'),link))
# Up to Sections # Up to Sections
@ -319,13 +319,12 @@ class TouchscreenFeedTemplate(Template):
if f < len(feeds)-1: if f < len(feeds)-1:
link = A(CLASS('feed_link'), link = A(CLASS('feed_link'),
trim_title(feeds[f+1].title), trim_title(feeds[f+1].title),
href = '../feed_%d/index.html' % int(f+1)) href='../feed_%d/index.html' % int(f+1))
navbar_tr.append(TD(CLASS('feed_next'),link)) navbar_tr.append(TD(CLASS('feed_next'),link))
navbar_t.append(navbar_tr) navbar_t.append(navbar_tr)
top_navbar = navbar_t top_navbar = navbar_t
bottom_navbar = copy.copy(navbar_t) bottom_navbar = copy.copy(navbar_t)
#print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True) # print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True)
# Build the page # Build the page
head = HEAD(TITLE(feed.title)) head = HEAD(TITLE(feed.title))
@ -342,8 +341,8 @@ class TouchscreenFeedTemplate(Template):
if getattr(feed, 'image', None): if getattr(feed, 'image', None):
div.append(DIV(IMG( div.append(DIV(IMG(
alt = feed.image_alt if feed.image_alt else '', alt=feed.image_alt if feed.image_alt else '',
src = feed.image_url src=feed.image_url
), ),
CLASS('calibre_feed_image'))) CLASS('calibre_feed_image')))
if getattr(feed, 'description', None): if getattr(feed, 'description', None):
@ -388,6 +387,14 @@ class TouchscreenNavBarTemplate(Template):
navbar_t = TABLE(CLASS('touchscreen_navbar')) navbar_t = TABLE(CLASS('touchscreen_navbar'))
navbar_tr = TR() navbar_tr = TR()
if bottom and not url.startswith('file://'):
navbar.append(HR())
text = 'This article was downloaded by '
p = PT(text, STRONG(__appname__), A(url, href=url),
style='text-align:left; max-width: 100%; overflow: hidden;')
p[0].tail = ' from '
navbar.append(p)
navbar.append(BR())
# | Previous # | Previous
if art > 0: if art > 0:
link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1)) link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1))
@ -411,6 +418,7 @@ class TouchscreenNavBarTemplate(Template):
navbar_tr.append(TD(CLASS('article_next'),link)) navbar_tr.append(TD(CLASS('article_next'),link))
navbar_t.append(navbar_tr) navbar_t.append(navbar_tr)
navbar.append(navbar_t) navbar.append(navbar_t)
#print "\n%s\n" % etree.tostring(navbar, pretty_print=True) # print "\n%s\n" % etree.tostring(navbar, pretty_print=True)
self.root = HTML(head, BODY(navbar)) self.root = HTML(head, BODY(navbar))