diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe new file mode 100644 index 0000000000..2a8b3b9a2e --- /dev/null +++ b/recipes/cdrinfo_pl.recipe @@ -0,0 +1,65 @@ +__license__ = 'GPL v3' +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Comment +class cdrinfo(BasicNewsRecipe): + title = u'CDRinfo.pl' + __author__ = 'fenuks' + description = u'Serwis poświęcony archiwizacji danych. Testy i recenzje nagrywarek. Programy do nagrywania płyt. Dyski twarde, dyski SSD i serwery sieciowe NAS. Rankingi dyskow twardych, najszybsze dyski twarde, newsy, artykuły, testy, recenzje, porady, oprogramowanie. Zestawienie nagrywarek, najnowsze biosy do nagrywarek, programy dla dysków twardych.' + category = 'it, hardware' + #publication_type = '' + language = 'pl' + #encoding = '' + #extra_css = '' + cover_url = 'http://www.cdrinfo.pl/gfx/graph3/top.jpg' + #masthead_url = '' + use_embedded_content = False + oldest_article = 777 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + remove_javascript = True + remove_attributes = ['style'] + preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com\.

', re.DOTALL), lambda match: '')] + ignore_duplicate_articles = {'title', 'url'} + + keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id='text')] + remove_tags = [dict(attrs={'class':['navigation', 'sociable']}), dict(name='hr'), dict(id='respond')] + remove_tags_after = dict(id='artnawigacja') + feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'), + (u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'), + (u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml') + ] + + def preprocess_html(self, soup): + if soup.find(id='artnawigacja'): + self.append_page(soup, soup.body) + return soup + + def append_page(self, soup, appendtag): + baseurl = 'http://cdrinfo.pl' + soup.find(name='input', attrs={'name':'ref'})['value'] + '/' + if baseurl[-2] == '/': + baseurl = baseurl[:-1] + tag = soup.find(id='artnawigacja') + div = tag.find('div', attrs={'align':'right'}) + while div: + counter = 0 + while counter < 5: + try: + soup2 = self.index_to_soup(baseurl+div.a['href']) + break + except: + counter += 1 + tag2 = soup2.find(id='artnawigacja') + div = tag2.find('div', attrs={'align':'right'}) + pagetext = soup2.find(attrs={'class':'art'}) + comments = pagetext.findAll(text=lambda text:isinstance(text, Comment)) + for comment in comments: + comment.extract() + for r in soup2.findAll(attrs={'class':'star-rating'}): + r.extract() + for r in soup2.findAll(attrs={'class':'star-rating2'}): + r.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag.extract() \ No newline at end of file diff --git a/recipes/ekologia_pl.recipe b/recipes/ekologia_pl.recipe index e925ebad6f..c053e6d5bc 100644 --- a/recipes/ekologia_pl.recipe +++ b/recipes/ekologia_pl.recipe @@ -9,13 +9,15 @@ class EkologiaPl(BasicNewsRecipe): language = 'pl' cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png' ignore_duplicate_articles = {'title', 'url'} - extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}' + extra_css = '.title {font-size: 200%;} .imagePowiazane {float:left; margin-right:5px; width: 200px;}' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True + remove_javascript = True use_embedded_content = False remove_attrs = ['style'] + keep_only_tags = [dict(attrs={'class':'contentParent'})] remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})] feeds = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')] diff --git a/recipes/gazeta_pl_bydgoszcz.recipe b/recipes/gazeta_pl_bydgoszcz.recipe new file mode 100644 index 0000000000..c0e9b265a8 --- /dev/null +++ b/recipes/gazeta_pl_bydgoszcz.recipe @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Comment +import re +class gw_bydgoszcz(BasicNewsRecipe): + title = u'Gazeta Wyborcza Bydgoszcz' + __author__ = 'fenuks' + language = 'pl' + description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.' + category = 'newspaper' + publication_type = 'newspaper' + masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif' + INDEX = 'http://bydgoszcz.gazeta.pl' + cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif' + remove_empty_feeds = True + oldest_article = 3 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + + #rules for gazeta.pl + preprocess_regexps = [(re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] + keep_only_tags = [dict(id='gazeta_article')] + remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] + remove_tags_after = dict(id='gazeta_article_body') + + feeds = [(u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')] + + def print_version(self, url): + if 'feedsportal.com' in url: + s = url.rpartition('gazeta0Bpl') + u = s[2] + if not s[0]: + u = url.rpartition('wyborcza0Bpl')[2] + u = u.replace('/l/', '/') + u = u.replace('/ia1.htm', '') + u = u.replace('0Dbo0F1', '') + u = u.replace('/story01.htm', '') + u = u.replace('0C', '/') + u = u.replace('A', '') + u = u.replace('0E', '-') + u = u.replace('0H', ',') + u = u.replace('0I', '_') + u = u.replace('0B', '.') + u = self.INDEX + u + return u + else: + return url + + def preprocess_html(self, soup): + tag = soup.find(id='Str') + if soup.find(attrs={'class': 'piano_btn_1'}): + return None + elif tag and tag.findAll('a'): + self.append_page(soup, soup.body) + return soup + + def append_page(self, soup, appendtag): + tag = soup.find('div', attrs={'id': 'Str'}) + try: + baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content'] + except: + return 1 + link = tag.findAll('a')[-1] + while link: + soup2 = self.index_to_soup(baseurl + link['href']) + link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1] + if not u'następne' in link.string: + link = '' + pagetext = soup2.find(id='artykul') + comments = pagetext.findAll(text=lambda text:isinstance(text, Comment)) + for comment in comments: + comment.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag.extract() + + def image_url_processor(self, baseurl, url): + if url.startswith(' '): + return url.strip() + else: + return url diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe index 37c129aaa1..513bbe44d6 100644 --- a/recipes/gildia_pl.recipe +++ b/recipes/gildia_pl.recipe @@ -16,40 +16,47 @@ class Gildia(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} preprocess_regexps = [(re.compile(ur''), lambda match: '') ] ignore_duplicate_articles = {'title', 'url'} - remove_tags = [dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] - keep_only_tags = dict(name='div', attrs={'class':'widetext'}) - feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')] - + remove_tags = [dict(name='div', attrs={'class':['backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})] + keep_only_tags = [dict(name='div', attrs={'class':'widetext'})] + feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), + (u'Literatura', u'http://www.literatura.gildia.pl/rss'), + (u'Film', u'http://www.film.gildia.pl/rss'), + (u'Horror', u'http://www.horror.gildia.pl/rss'), + (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), + (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), + (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), + (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), + (u'Techno', u'http://www.techno.gildia.pl/rss'), + (u'Historia', u'http://www.historia.gildia.pl/rss'), + (u'Magia', u'http://www.magia.gildia.pl/rss'), + (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), + (u'RPG', u'http://www.rpg.gildia.pl/rss'), + (u'LARP', u'http://www.larp.gildia.pl/rss'), + (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), + (u'Nauka', u'http://www.nauka.gildia.pl/rss'), + ] def skip_ad_pages(self, soup): content = soup.find('div', attrs={'class':'news'}) - if 'recenzj' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'recenzj' in link['href'] or 'muzyka/plyty' in link['href']: - return self.index_to_soup(link['href'], raw=True) - if 'fragmen' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'fragment' in link['href']: - return self.index_to_soup(link['href'], raw=True) - if 'relacj' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'relacj' in link['href']: - return self.index_to_soup(link['href'], raw=True) - if 'wywiad' in soup.title.string.lower(): - for link in content.findAll(name='a'): - if 'wywiad' in link['href']: - return self.index_to_soup(link['href'], raw=True) - + words = ('recenzj', 'zapowied','fragmen', 'relacj', 'wywiad', 'nominacj') + for word in words: + if word in soup.title.string.lower(): + for link in content.findAll(name='a'): + if word in link['href'] or (link.string and word in link.string): + return self.index_to_soup(link['href'], raw=True) + for tag in content.findAll(name='a', href=re.compile('/publicystyka/')): + if 'Więcej...' == tag.string: + return self.index_to_soup(tag['href'], raw=True) def preprocess_html(self, soup): for a in soup('a'): if a.has_key('href') and not a['href'].startswith('http'): if '/gry/' in a['href']: - a['href']='http://www.gry.gildia.pl' + a['href'] + a['href'] = 'http://www.gry.gildia.pl' + a['href'] elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower(): - a['href']='http://www.literatura.gildia.pl' + a['href'] + a['href'] = 'http://www.literatura.gildia.pl' + a['href'] elif u'komiks' in soup.title.string.lower(): - a['href']='http://www.literatura.gildia.pl' + a['href'] + a['href'] = 'http://www.literatura.gildia.pl' + a['href'] else: - a['href']='http://www.gildia.pl' + a['href'] - return soup + a['href'] = 'http://www.gildia.pl' + a['href'] + return soup \ No newline at end of file diff --git a/recipes/icons/cdrinfo_pl.png b/recipes/icons/cdrinfo_pl.png new file mode 100644 index 0000000000..73dbc33692 Binary files /dev/null and b/recipes/icons/cdrinfo_pl.png differ diff --git a/recipes/icons/gazeta_pl_bydgoszcz.png b/recipes/icons/gazeta_pl_bydgoszcz.png new file mode 100644 index 0000000000..49d76d2ddc Binary files /dev/null and b/recipes/icons/gazeta_pl_bydgoszcz.png differ diff --git a/recipes/media2.recipe b/recipes/media2.recipe index 135740a62e..d685a90803 100644 --- a/recipes/media2.recipe +++ b/recipes/media2.recipe @@ -3,33 +3,29 @@ __license__ = 'GPL v3' __copyright__ = 'teepel' -''' -media2.pl -''' - from calibre.web.feeds.news import BasicNewsRecipe class media2_pl(BasicNewsRecipe): title = u'Media2' __author__ = 'teepel ' language = 'pl' - description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' - masthead_url='http://media2.pl/res/logo/www.png' - remove_empty_feeds= True - oldest_article = 1 + description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' + masthead_url = 'http://media2.pl/res/logo/www.png' + cover_url = 'http://media2.pl/res/logo/www.png' + remove_empty_feeds = True + oldest_article = 7 max_articles_per_feed = 100 - remove_javascript=True - no_stylesheets=True - simultaneous_downloads = 5 - + remove_javascript = True + no_stylesheets = True + remove_attributes = ['style'] + ignore_duplicate_articles = {'title', 'url'} extra_css = '''.news-lead{font-weight: bold; }''' - keep_only_tags =[] - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})) + keep_only_tags = [dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})] + remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : 'item-sidebar'}), dict(name = 'div', attrs = {'class' : 'news-tags'})] - remove_tags =[] - remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) - - feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')] + feeds = [(u'Media2', u'http://feeds.feedburner.com/media2'), (u'Internet', u'http://feeds.feedburner.com/media2/internet'), + (u'Media', 'http://feeds.feedburner.com/media2/media'), (u'Telekomunikacja', 'http://feeds.feedburner.com/media2/telekomunikacja'), + (u'Reklama/PR', 'http://feeds.feedburner.com/media2/reklama-pr'), (u'Technologie', 'http://feeds.feedburner.com/media2/technologie'), + (u'Badania', 'http://feeds.feedburner.com/media2/badania') + ] \ No newline at end of file diff --git a/recipes/nauka_w_polsce.recipe b/recipes/nauka_w_polsce.recipe index 715780d162..2a44aa7e84 100644 --- a/recipes/nauka_w_polsce.recipe +++ b/recipes/nauka_w_polsce.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe import re class NaukawPolsce(BasicNewsRecipe): - title = u'Nauka w Polsce' + title = u'PAP Nauka w Polsce' __author__ = 'fenuks' description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.' category = 'science' diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe index 1f9cef3be3..aea21dca9c 100644 --- a/recipes/polter_pl.recipe +++ b/recipes/polter_pl.recipe @@ -3,7 +3,7 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class Poltergeist(BasicNewsRecipe): - title = u'Poltergeist' + title = u'Polter.pl' __author__ = 'fenuks' description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.' category = 'fantasy, books, rpg, games' diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe index 2edc611ad7..597c9ef2d3 100644 --- a/recipes/ppe_pl.recipe +++ b/recipes/ppe_pl.recipe @@ -1,41 +1,35 @@ #!/usr/bin/env python __license__ = 'GPL v3' - +import re from calibre.web.feeds.news import BasicNewsRecipe class ppeRecipe(BasicNewsRecipe): __author__ = u'Artur Stachecki ' language = 'pl' - title = u'ppe.pl' category = u'News' description = u'Portal o konsolach i grach wideo.' - cover_url='' - remove_empty_feeds= True - no_stylesheets=True - oldest_article = 1 - max_articles_per_feed = 100000 - recursions = 0 + extra_css = '.categories > li {list-style: none; display: inline;} .galmini > li {list-style: none; float: left;} .calibre_navbar {clear: both;}' + remove_empty_feeds = True no_stylesheets = True + oldest_article = 7 + max_articles_per_feed = 100 remove_javascript = True - simultaneous_downloads = 2 + remove_empty_feeds = True + remove_attributes = ['style'] + + keep_only_tags = [dict(attrs={'class':'box'})] + remove_tags = [dict(attrs={'class':['voltage-1', 'voltage-2', 'encyklopedia', 'nag', 'related', 'comment_form', 'komentarze-box']})] - keep_only_tags =[] - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'})) + feeds = [ + ('Newsy', 'http://ppe.pl/rss.html'), + ('Recenzje', 'http://ppe.pl/rss-recenzje.html'), + ('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'), + ] - remove_tags =[] - remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'})) - - remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'})) - - feeds = [ - ('Newsy', 'feed://ppe.pl/rss/rss.xml'), - ] + def get_cover_url(self): + soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html') + part = soup.find(attrs={'class':'archiwum-foto'})['style'] + part = re.search("'(.+)'", part).group(1).replace('_min', '') + return 'http://www.ppe.pl' + part diff --git a/recipes/pure_pc.recipe b/recipes/pure_pc.recipe index 13d9307a09..167136c90f 100644 --- a/recipes/pure_pc.recipe +++ b/recipes/pure_pc.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Comment @@ -11,6 +12,7 @@ class PurePC(BasicNewsRecipe): language = 'pl' masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg' cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg' + extra_css = '.wykres_logo {float: left; margin-right: 5px;}' no_stylesheets = True keep_only_tags= [dict(id='content')] remove_tags_after= dict(attrs={'class':'fivestar-widget'}) @@ -19,11 +21,14 @@ class PurePC(BasicNewsRecipe): def append_page(self, soup, appendtag): - nexturl= appendtag.find(attrs={'class':'pager-next'}) - if nexturl: - while nexturl: - soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href']) - nexturl=soup2.find(attrs={'class':'pager-next'}) + lasturl = appendtag.find(attrs={'class':'pager-last'}) + if lasturl: + regex = re.search('(.+?2C)(\d+)', lasturl.a['href']) + baseurl = regex.group(1).replace('?page=0%2C', '?page=1%2C') + baseurl = 'http://www.purepc.pl' + baseurl + nr = int(regex.group(2)) + for page_nr in range(1, nr+1): + soup2 = self.index_to_soup(baseurl+str(page_nr)) pagetext = soup2.find(attrs={'class':'article'}) pos = len(appendtag.contents) appendtag.insert(pos, pagetext) @@ -35,4 +40,4 @@ class PurePC(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) - return soup + return soup \ No newline at end of file diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index cddf6a561f..cb325efb07 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -1880,7 +1880,7 @@ class KOBOTOUCH(KOBO): # Remove any entries for the Activity table - removes tile from new home page if self.has_activity_table(): - debug_print('KoboTouch:delete_via_sql: detete from Activity') + debug_print('KoboTouch:delete_via_sql: delete from Activity') cursor.execute('delete from Activity where Id =?', t) connection.commit() @@ -2391,7 +2391,8 @@ class KOBOTOUCH(KOBO): cursor = connection.cursor() cursor.execute(delete_query) cursor.execute(update_query) - cursor.execute(delete_activity_query) + if self.has_activity_table(): + cursor.execute(delete_activity_query) connection.commit() cursor.close() diff --git a/src/calibre/ebooks/conversion/plugins/docx_input.py b/src/calibre/ebooks/conversion/plugins/docx_input.py index 7492d46c68..190a771379 100644 --- a/src/calibre/ebooks/conversion/plugins/docx_input.py +++ b/src/calibre/ebooks/conversion/plugins/docx_input.py @@ -14,9 +14,17 @@ class DOCXInput(InputFormatPlugin): description = 'Convert DOCX files (.docx) to HTML' file_types = set(['docx']) + options = { + OptionRecommendation(name='docx_no_cover', recommended_value=False, + help=_('Normally, if a large image is present at the start of the document that looks like a cover, ' + 'it will be removed from the document and used as the cover for created ebook. This option ' + 'turns off that behavior.')), + + } + recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)]) def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.docx.to_html import Convert - return Convert(stream, log=log)() + return Convert(stream, detect_cover=not options.docx_no_cover, log=log)() diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index 02b8299c94..c9a2fee4c9 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -132,10 +132,10 @@ class RunStyle(object): all_properties = { 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', - 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', + 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'webHidden', 'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color', - 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family' + 'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family', } toggle_properties = { @@ -150,7 +150,7 @@ class RunStyle(object): else: for p in ( 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow', - 'smallCaps', 'strike', 'vanish', + 'smallCaps', 'strike', 'vanish', 'webHidden', ): setattr(self, p, binary_property(rPr, p)) @@ -210,7 +210,7 @@ class RunStyle(object): c['text-shadow'] = '2px 2px' if self.smallCaps is True: c['font-variant'] = 'small-caps' - if self.vanish is True: + if self.vanish is True or self.webHidden is True: c['display'] = 'none' self.get_border_css(c) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 2b1e095025..a55f8449d8 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' +import os def mergeable(previous, current): if previous.tail or current.tail: @@ -83,8 +84,19 @@ def lift(span): else: add_text(last_child, 'tail', span.tail) +def before_count(root, tag, limit=10): + body = root.xpath('//body[1]') + if not body: + return limit + ans = 0 + for elem in body[0].iterdescendants(): + if elem is tag: + return ans + ans += 1 + if ans > limit: + return limit -def cleanup_markup(root, styles): +def cleanup_markup(log, root, styles, dest_dir, detect_cover): # Merge consecutive spans that have the same styling current_run = [] for span in root.xpath('//span'): @@ -134,3 +146,22 @@ def cleanup_markup(root, styles): for span in root.xpath('//span[not(@class) and not(@id)]'): lift(span) + if detect_cover: + # Check if the first image in the document is possibly a cover + img = root.xpath('//img[@src][1]') + if img: + img = img[0] + path = os.path.join(dest_dir, img.get('src')) + if os.path.exists(path) and before_count(root, img, limit=10) < 5: + from calibre.utils.magick.draw import identify + try: + width, height, fmt = identify(path) + except: + width, height, fmt = 0, 0, None + is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000 + if is_cover: + log.debug('Detected an image that looks like a cover') + img.getparent().remove(img) + return path + + diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index 76f43e7e0c..e24b550797 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -112,15 +112,16 @@ class Images(object): base += '.' + ext exists = frozenset(self.used.itervalues()) c = 1 - while base in exists: + name = base + while name in exists: n, e = base.rpartition('.')[0::2] - base = '%s-%d.%s' % (n, c, e) + name = '%s-%d.%s' % (n, c, e) c += 1 - self.used[rid] = base - with open(os.path.join(self.dest_dir, base), 'wb') as f: + self.used[rid] = name + with open(os.path.join(self.dest_dir, name), 'wb') as f: f.write(raw) - self.all_images.add('images/' + base) - return base + self.all_images.add('images/' + name) + return name def pic_to_img(self, pic, alt=None): name = None diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 8e4d811803..21f45616fa 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -260,6 +260,7 @@ class Styles(object): for attr in ans.all_properties: if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr)) + ans.linked_style = direct_formatting.linked_style return ans def resolve_run(self, r): @@ -389,6 +390,19 @@ class Styles(object): else: ps.numbering = (ps.numbering[0], lvl) + def apply_contextual_spacing(self, paras): + last_para = None + for p in paras: + if last_para is not None: + ls = self.resolve_paragraph(last_para) + ps = self.resolve_paragraph(p) + if ls.linked_style is not None and ls.linked_style == ps.linked_style: + if ls.contextualSpacing is True: + ls.margin_bottom = 0 + if ps.contextualSpacing is True: + ps.margin_top = 0 + last_para = p + def register(self, css, prefix): h = hash(frozenset(css.iteritems())) ans, _ = self.classes.get(h, (None, None)) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index ad26f91d46..963d1fc6c8 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -25,9 +25,8 @@ from calibre.ebooks.docx.tables import Tables from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.docx.cleanup import cleanup_markup from calibre.ebooks.docx.theme import Theme +from calibre.ebooks.docx.toc import create_toc from calibre.ebooks.metadata.opf2 import OPFCreator -from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.oeb.polish.toc import elem_to_toc_text from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -41,11 +40,12 @@ class Text: class Convert(object): - def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): + def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.ms_pat = re.compile(r'\s{2,}') self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log + self.detect_cover = detect_cover self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata @@ -86,6 +86,7 @@ class Convert(object): self.framed_map = {} self.anchor_map = {} self.link_map = defaultdict(list) + paras = [] self.log.debug('Converting Word markup to HTML') self.read_page_properties(doc) @@ -94,6 +95,8 @@ class Convert(object): if wp.tag.endswith('}p'): p = self.convert_p(wp) self.body.append(p) + paras.append(wp) + self.styles.apply_contextual_spacing(paras) notes_header = None if self.footnotes.has_notes: @@ -107,12 +110,16 @@ class Convert(object): dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor)) dl[-1][0].tail = ']' dl.append(DD()) + paras = [] for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp, self.styles) self.page_map[wp] = self.current_page - p = self.convert_p(wp) - dl[-1].append(p) + else: + p = self.convert_p(wp) + dl[-1].append(p) + paras.append(wp) + self.styles.apply_contextual_spacing(paras) self.resolve_links(relationships_by_id) @@ -163,9 +170,9 @@ class Convert(object): break self.log.debug('Cleaning up redundant markup generated by Word') - cleanup_markup(self.html, self.styles) + self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover) - return self.write() + return self.write(doc) def read_page_properties(self, doc): current = [] @@ -260,48 +267,8 @@ class Convert(object): self.styles.resolve_numbering(numbering) - def create_toc(self): - ' Create a TOC from headings in the document ' - root = self.body - headings = ('h1', 'h2', 'h3') - tocroot = TOC() - xpaths = [XPath('//%s' % x) for x in headings] - level_prev = {i+1:None for i in xrange(len(xpaths))} - level_prev[0] = tocroot - level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)} - item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} - - self.idcount = 0 - - def ensure_id(elem): - ans = elem.get('id', None) - if not ans: - self.idcount += 1 - ans = 'toc_id_%d' % self.idcount - elem.set('id', ans) - return ans - - for item in descendants(root, *headings): - lvl = plvl = item_level_map.get(item, None) - if lvl is None: - continue - parent = None - while parent is None: - plvl -= 1 - parent = level_prev[plvl] - lvl = plvl + 1 - elem_id = ensure_id(item) - text = elem_to_toc_text(item) - toc = parent.add_item('index.html', elem_id, text) - level_prev[lvl] = toc - for i in xrange(lvl+1, len(xpaths)+1): - level_prev[i] = None - - if len(tuple(tocroot.flat())) > 1: - return tocroot - - def write(self): - toc = self.create_toc() + def write(self, doc): + toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map) raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) @@ -314,6 +281,8 @@ class Convert(object): opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) opf.create_spine(['index.html']) + if self.cover_image is not None: + opf.guide.set_cover(self.cover_image) with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return os.path.join(self.dest_dir, 'metadata.opf') @@ -401,11 +370,13 @@ class Convert(object): return wrapper def resolve_links(self, relationships_by_id): + self.resolved_link_map = {} for hyperlink, spans in self.link_map.iteritems(): span = spans[0] if len(spans) > 1: span = self.wrap_elems(spans, SPAN()) span.tag = 'a' + self.resolved_link_map[hyperlink] = span tgt = get(hyperlink, 'w:tgtFrame') if tgt: span.set('target', tgt) @@ -474,8 +445,6 @@ class Convert(object): l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) - elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': - text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py new file mode 100644 index 0000000000..5936d34355 --- /dev/null +++ b/src/calibre/ebooks/docx/toc.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from collections import namedtuple + +from lxml.etree import tostring + +from calibre.ebooks.docx.names import XPath, descendants, get, ancestor +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.oeb.polish.toc import elem_to_toc_text + +class Count(object): + + __slots__ = ('val',) + + def __init__(self): + self.val = 0 + +def from_headings(body): + ' Create a TOC from headings in the document ' + headings = ('h1', 'h2', 'h3') + tocroot = TOC() + xpaths = [XPath('//%s' % x) for x in headings] + level_prev = {i+1:None for i in xrange(len(xpaths))} + level_prev[0] = tocroot + level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)} + item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} + + idcount = Count() + + def ensure_id(elem): + ans = elem.get('id', None) + if not ans: + idcount.val += 1 + ans = 'toc_id_%d' % idcount.val + elem.set('id', ans) + return ans + + for item in descendants(body, *headings): + lvl = plvl = item_level_map.get(item, None) + if lvl is None: + continue + parent = None + while parent is None: + plvl -= 1 + parent = level_prev[plvl] + lvl = plvl + 1 + elem_id = ensure_id(item) + text = elem_to_toc_text(item) + toc = parent.add_item('index.html', elem_id, text) + level_prev[lvl] = toc + for i in xrange(lvl+1, len(xpaths)+1): + level_prev[i] = None + + if len(tuple(tocroot.flat())) > 1: + return tocroot + +def structure_toc(entries): + indent_vals = sorted({x.indent for x in entries}) + last_found = [None for i in indent_vals] + newtoc = TOC() + + if len(indent_vals) > 6: + for x in entries: + newtoc.add_item('index.html', x.anchor, x.text) + return newtoc + + def find_parent(level): + candidates = last_found[:level] + for x in reversed(candidates): + if x is not None: + return x + return newtoc + + for item in entries: + level = indent_vals.index(item.indent) + parent = find_parent(level) + last_found[level] = parent.add_item('index.html', item.anchor, + item.text) + for i in xrange(level+1, len(last_found)): + last_found[i] = None + + return newtoc + +def link_to_txt(a, styles, object_map): + if len(a) > 1: + for child in a: + run = object_map.get(child, None) + if run is not None: + rs = styles.resolve(run) + if rs.css.get('display', None) == 'none': + a.remove(child) + + return tostring(a, method='text', with_tail=False, encoding=unicode).strip() + +def from_toc(docx, link_map, styles, object_map): + toc_level = None + level = 0 + TI = namedtuple('TI', 'text anchor indent') + toc = [] + for tag in XPath('//*[(@w:fldCharType and name()="w:fldChar") or name()="w:hyperlink" or name()="w:instrText"]')(docx): + n = tag.tag.rpartition('}')[-1] + if n == 'fldChar': + t = get(tag, 'w:fldCharType') + if t == 'begin': + level += 1 + elif t == 'end': + level -= 1 + if toc_level is not None and level < toc_level: + break + elif n == 'instrText': + if level > 0 and tag.text and tag.text.strip().startswith('TOC '): + toc_level = level + elif n == 'hyperlink': + if toc_level is not None and level >= toc_level and tag in link_map: + a = link_map[tag] + href = a.get('href', None) + txt = link_to_txt(a, styles, object_map) + p = ancestor(tag, 'w:p') + if txt and href and p is not None: + ps = styles.resolve_paragraph(p) + try: + ml = int(ps.margin_left[:-2]) + except (TypeError, ValueError, AttributeError): + ml = 0 + if ps.text_align in {'center', 'right'}: + ml = 0 + toc.append(TI(txt, href[1:], ml)) + if toc: + return structure_toc(toc) + +def create_toc(docx, body, link_map, styles, object_map): + return from_toc(docx, link_map, styles, object_map) or from_headings(body) + + diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index ea34d27d3a..2c8b91bc70 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -8,29 +8,39 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.ebooks.docx.container import DOCX +from calibre.ebooks.docx.names import XPath, get -from calibre.utils.zipfile import ZipFile from calibre.utils.magick.draw import identify_data +images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]') + +def get_cover(docx): + doc = docx.document + rid_map = docx.document_relationships[0] + for image in images(doc): + rid = get(image, 'r:embed') or get(image, 'r:id') + if rid in rid_map: + try: + raw = docx.read(rid_map[rid]) + width, height, fmt = identify_data(raw) + except Exception: + continue + if 0.8 <= height/width <= 1.8 and height*width >= 160000: + return (fmt, raw) + def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata + try: + cdata = get_cover(c) + except Exception: + cdata = None + import traceback + traceback.print_exc() c.close() stream.seek(0) - cdata = None - with ZipFile(stream, 'r') as zf: - for zi in zf.infolist(): - ext = zi.filename.rpartition('.')[-1].lower() - if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: - raw = zf.read(zi) - try: - width, height, fmt = identify_data(raw) - except: - continue - if 0.8 <= height/width <= 1.8 and height*width >= 160000: - cdata = (fmt, raw) - if cdata is not None: - mi.cover_data = cdata + if cdata is not None: + mi.cover_data = cdata return mi diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index d5eb7c5008..acd815524e 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -8,7 +8,6 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import os from urlparse import urldefrag -import base64 from lxml import etree from PyQt4.QtCore import Qt from PyQt4.QtCore import QByteArray @@ -23,6 +22,8 @@ from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer +from calibre.ptempfile import PersistentTemporaryFile +from calibre.utils.imghdr import what IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align']) @@ -46,6 +47,7 @@ class SVGRasterizer(object): def __call__(self, oeb, context): oeb.logger.info('Rasterizing SVG images...') + self.temp_files = [] self.stylizer_cache = {} self.oeb = oeb self.opts = context @@ -54,6 +56,11 @@ class SVGRasterizer(object): self.dataize_manifest() self.rasterize_spine() self.rasterize_cover() + for pt in self.temp_files: + try: + os.remove(pt) + except: + pass def rasterize_svg(self, elem, width=0, height=0, format='PNG'): view_box = elem.get('viewBox', elem.get('viewbox', None)) @@ -112,9 +119,12 @@ class SVGRasterizer(object): if abshref not in hrefs: continue linkee = hrefs[abshref] - data = base64.encodestring(str(linkee)) - data = "data:%s;base64,%s" % (linkee.media_type, data) - elem.attrib[XLINK('href')] = data + data = str(linkee) + ext = what(None, data) or 'jpg' + with PersistentTemporaryFile(suffix='.'+ext) as pt: + pt.write(data) + self.temp_files.append(pt.name) + elem.attrib[XLINK('href')] = pt.name return svg def stylizer(self, item): diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py index 45e73cfe8f..7380b10d1f 100755 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -86,7 +86,14 @@ def getimagesize(url): """ try: - import ImageFile + from PIL import ImageFile + except ImportError: + try: + import ImageFile + except ImportError: + return None + + try: import urllib2 except ImportError: return None @@ -220,7 +227,7 @@ class Textile(object): (re.compile(r'{(S\^|\^S)}'), r'Ŝ'), # S-circumflex (re.compile(r'{(s\^|\^s)}'), r'ŝ'), # s-circumflex - + (re.compile(r'{(S\ˇ|\ˇS)}'), r'Š'), # S-caron (re.compile(r'{(s\ˇ|\ˇs)}'), r'š'), # s-caron (re.compile(r'{(T\ˇ|\ˇT)}'), r'Ť'), # T-caron @@ -229,7 +236,7 @@ class Textile(object): (re.compile(r'{(u\°|\°u)}'), r'ů'), # u-ring (re.compile(r'{(Z\ˇ|\ˇZ)}'), r'Ž'), # Z-caron (re.compile(r'{(z\ˇ|\ˇz)}'), r'ž'), # z-caron - + (re.compile(r'{\*}'), r'•'), # bullet (re.compile(r'{Fr}'), r'₣'), # Franc (re.compile(r'{(L=|=L)}'), r'₤'), # Lira @@ -245,7 +252,7 @@ class Textile(object): (re.compile(r"{(’|'/|/')}"), r'’'), # closing-single-quote - apostrophe (re.compile(r"{(‘|\\'|'\\)}"), r'‘'), # opening-single-quote (re.compile(r'{(”|"/|/")}'), r'”'), # closing-double-quote - (re.compile(r'{(“|\\"|"\\)}'), r'“'), # opening-double-quote + (re.compile(r'{(“|\\"|"\\)}'), r'“'), # opening-double-quote ] glyph_defaults = [ (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 5fcde65ff5..a552ad8594 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -92,7 +92,7 @@ defs['tags_browser_partition_method'] = 'first letter' defs['tags_browser_collapse_at'] = 100 defs['tag_browser_dont_collapse'] = [] defs['edit_metadata_single_layout'] = 'default' -defs['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}' +defs['default_author_link'] = 'https://en.wikipedia.org/w/index.php?search={author}' defs['preserve_date_on_ctl'] = True defs['manual_add_auto_convert'] = False defs['cb_fullscreen'] = False diff --git a/src/calibre/gui2/convert/docx_input.py b/src/calibre/gui2/convert/docx_input.py new file mode 100644 index 0000000000..46234c6a36 --- /dev/null +++ b/src/calibre/gui2/convert/docx_input.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from calibre.gui2.convert.docx_input_ui import Ui_Form +from calibre.gui2.convert import Widget + +class PluginWidget(Widget, Ui_Form): + + TITLE = _('DOCX Input') + HELP = _('Options specific to')+' DOCX '+_('input') + COMMIT_NAME = 'docx_input' + ICON = I('mimetypes/docx.png') + + def __init__(self, parent, get_option, get_help, db=None, book_id=None): + Widget.__init__(self, parent, + ['docx_no_cover', ]) + self.initialize_options(get_option, get_help, db, book_id) + diff --git a/src/calibre/gui2/convert/docx_input.ui b/src/calibre/gui2/convert/docx_input.ui new file mode 100644 index 0000000000..41948118dc --- /dev/null +++ b/src/calibre/gui2/convert/docx_input.ui @@ -0,0 +1,41 @@ + + + Form + + + + 0 + 0 + 518 + 353 + + + + Form + + + + + + Do not try to autodetect a &cover from images in the document + + + + + + + Qt::Vertical + + + + 20 + 213 + + + + + + + + + diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py index 3820169876..c5d79218f9 100644 --- a/src/calibre/gui2/dialogs/plugin_updater.py +++ b/src/calibre/gui2/dialogs/plugin_updater.py @@ -89,7 +89,7 @@ def get_installed_plugin_status(display_plugin): display_plugin.installed_version = None display_plugin.plugin = None for plugin in initialized_plugins(): - if plugin.name == display_plugin.name: + if plugin.name == display_plugin.name and plugin.plugin_path is not None: display_plugin.plugin = plugin display_plugin.installed_version = plugin.version break @@ -254,7 +254,7 @@ Platforms: Windows, OSX, Linux; History: Yes; return self.installed_version is not None def is_upgrade_available(self): - return self.is_installed() and (self.installed_version < self.available_version \ + return self.is_installed() and (self.installed_version < self.available_version or self.is_deprecated) def is_valid_platform(self): @@ -317,7 +317,7 @@ class DisplayPluginModel(QAbstractTableModel): def data(self, index, role): if not index.isValid(): - return NONE; + return NONE row, col = index.row(), index.column() if row < 0 or row >= self.rowCount(): return NONE @@ -357,7 +357,7 @@ class DisplayPluginModel(QAbstractTableModel): else: return self._get_status_tooltip(display_plugin) elif role == Qt.ForegroundRole: - if col != 1: # Never change colour of the donation column + if col != 1: # Never change colour of the donation column if display_plugin.is_deprecated: return QVariant(QBrush(Qt.blue)) if display_plugin.is_disabled(): @@ -417,7 +417,7 @@ class DisplayPluginModel(QAbstractTableModel): icon_name = 'plugin_upgrade_invalid.png' else: icon_name = 'plugin_upgrade_ok.png' - else: # A plugin available not currently installed + else: # A plugin available not currently installed if display_plugin.is_valid_to_install(): icon_name = 'plugin_new_valid.png' else: @@ -429,11 +429,11 @@ class DisplayPluginModel(QAbstractTableModel): return QVariant(_('This plugin has been deprecated and should be uninstalled')+'\n\n'+ _('Right-click to see more options')) if not display_plugin.is_valid_platform(): - return QVariant(_('This plugin can only be installed on: %s') % \ + return QVariant(_('This plugin can only be installed on: %s') % ', '.join(display_plugin.platforms)+'\n\n'+ _('Right-click to see more options')) if numeric_version < display_plugin.calibre_required_version: - return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') % \ + return QVariant(_('You must upgrade to at least Calibre %s before installing this plugin') % self._get_display_version(display_plugin.calibre_required_version)+'\n\n'+ _('Right-click to see more options')) if display_plugin.installed_version < display_plugin.available_version: @@ -687,7 +687,7 @@ class PluginUpdaterDialog(SizePersistedDialog): def _install_clicked(self): display_plugin = self._selected_display_plugin() - if not question_dialog(self, _('Install %s')%display_plugin.name, '

' + \ + if not question_dialog(self, _('Install %s')%display_plugin.name, '

' + _('Installing plugins is a security risk. ' 'Plugins can contain a virus/malware. ' 'Only install it if you got it from a trusted source.' @@ -886,3 +886,4 @@ class PluginUpdaterDialog(SizePersistedDialog): pt.write(raw) pt.close() return pt.name + diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py index 67efe48b53..0dd1a2189d 100644 --- a/src/calibre/gui2/proceed.py +++ b/src/calibre/gui2/proceed.py @@ -19,7 +19,7 @@ from calibre.gui2.dialogs.message_box import ViewLog Question = namedtuple('Question', 'payload callback cancel_callback ' 'title msg html_log log_viewer_title log_is_file det_msg ' 'show_copy_button checkbox_msg checkbox_checked action_callback ' - 'action_label action_icon') + 'action_label action_icon focus_action') class ProceedQuestion(QDialog): @@ -155,13 +155,14 @@ class ProceedQuestion(QDialog): self.checkbox.setChecked(question.checkbox_checked) self.do_resize() self.show() - self.bb.button(self.bb.Yes).setDefault(True) - self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason) + button = self.action_button if question.focus_action and question.action_callback is not None else self.bb.button(self.bb.Yes) + button.setDefault(True) + button.setFocus(Qt.OtherFocusReason) def __call__(self, callback, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, cancel_callback=None, log_is_file=False, checkbox_msg=None, checkbox_checked=False, - action_callback=None, action_label=None, action_icon=None): + action_callback=None, action_label=None, action_icon=None, focus_action=False): ''' A non modal popup that notifies the user that a background task has been completed. This class guarantees that only a single popup is @@ -192,13 +193,14 @@ class ProceedQuestion(QDialog): exactly the same way as callback. :param action_label: The text on the action button :param action_icon: The icon for the action button, must be a QIcon object or None + :param focus_action: If True, the action button will be focused instead of the Yes button ''' question = Question( payload, callback, cancel_callback, title, msg, html_log, log_viewer_title, log_is_file, det_msg, show_copy_button, checkbox_msg, checkbox_checked, action_callback, action_label, - action_icon) + action_icon, focus_action) self.questions.append(question) self.show_question() diff --git a/src/calibre/gui2/store/web_control.py b/src/calibre/gui2/store/web_control.py index 48e1b7dff0..8318ae9078 100644 --- a/src/calibre/gui2/store/web_control.py +++ b/src/calibre/gui2/store/web_control.py @@ -24,8 +24,10 @@ class NPWebView(QWebView): self.gui = None self.tags = '' - self.setPage(NPWebPage()) - self.page().networkAccessManager().setCookieJar(QNetworkCookieJar()) + self._page = NPWebPage() + self.setPage(self._page) + self.cookie_jar = QNetworkCookieJar() + self.page().networkAccessManager().setCookieJar(self.cookie_jar) http_proxy = get_proxies().get('http', None) if http_proxy: diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 3b63d51c15..113e1201e2 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -82,7 +82,8 @@ class History(list): return None item = self[self.forward_pos] self.back_pos = self.forward_pos - 1 - if self.back_pos < 0: self.back_pos = None + if self.back_pos < 0: + self.back_pos = None self.insert_pos = self.back_pos or 0 self.forward_pos = None if self.forward_pos > len(self) - 2 else self.forward_pos + 1 self.set_actions() @@ -268,7 +269,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.action_full_screen.shortcuts()])) self.action_back.triggered[bool].connect(self.back) self.action_forward.triggered[bool].connect(self.forward) - self.action_bookmark.triggered[bool].connect(self.bookmark) self.action_preferences.triggered.connect(self.do_config) self.pos.editingFinished.connect(self.goto_page_num) self.vertical_scrollbar.valueChanged[int].connect(lambda @@ -294,7 +294,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.toc.setCursor(Qt.PointingHandCursor) self.tool_bar.setContextMenuPolicy(Qt.PreventContextMenu) self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu) - self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup) + self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.InstantPopup) self.action_full_screen.setCheckable(True) self.full_screen_label = QLabel('''

@@ -394,7 +394,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.action_toggle_paged_mode.setToolTip(self.FLOW_MODE_TT if self.action_toggle_paged_mode.isChecked() else self.PAGED_MODE_TT) - if at_start: return + if at_start: + return self.reload() def settings_changed(self): @@ -486,8 +487,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): at_start=True) def lookup(self, word): - self.dictionary_view.setHtml('

'+ \ - _('Connecting to dict.org to lookup: %s…')%word + \ + self.dictionary_view.setHtml('

'+ + _('Connecting to dict.org to lookup: %s…')%word + '

') self.dictionary_box.show() self._lookup = Lookup(word, parent=self) @@ -964,6 +965,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): def set_bookmarks(self, bookmarks): self.bookmarks_menu.clear() + self.bookmarks_menu.addAction(_("Bookmark this location"), self.bookmark) self.bookmarks_menu.addAction(_("Manage Bookmarks"), self.manage_bookmarks) self.bookmarks_menu.addSeparator() current_page = None @@ -1202,3 +1204,4 @@ def main(args=sys.argv): if __name__ == '__main__': sys.exit(main()) + diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py index 798ac5faca..f813eed892 100644 --- a/src/calibre/gui2/wizard/__init__.py +++ b/src/calibre/gui2/wizard/__init__.py @@ -139,7 +139,7 @@ class Kobo(Device): id = 'kobo' class KoboVox(Kobo): - name = 'Kobo Vox' + name = 'Kobo Vox and Kobo Aura HD' output_profile = 'tablet' id = 'kobo_vox' diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index a22a79ef20..3ee90c43a6 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -13,7 +13,7 @@ from lxml.html.builder import HTML, HEAD, TITLE, STYLE, DIV, BODY, \ from calibre import preferred_encoding, strftime, isbytestring -def CLASS(*args, **kwargs): # class is a reserved word in Python +def CLASS(*args, **kwargs): # class is a reserved word in Python kwargs['class'] = ' '.join(args) return kwargs @@ -26,7 +26,7 @@ class Template(object): self.html_lang = lang def generate(self, *args, **kwargs): - if not kwargs.has_key('style'): + if 'style' not in kwargs: kwargs['style'] = '' for key in kwargs.keys(): if isbytestring(kwargs[key]): @@ -152,8 +152,8 @@ class FeedTemplate(Template): body.append(div) if getattr(feed, 'image', None): div.append(DIV(IMG( - alt = feed.image_alt if feed.image_alt else '', - src = feed.image_url + alt=feed.image_alt if feed.image_alt else '', + src=feed.image_url ), CLASS('calibre_feed_image'))) if getattr(feed, 'description', None): @@ -261,8 +261,8 @@ class TouchscreenIndexTemplate(Template): for i, feed in enumerate(feeds): if feed: tr = TR() - tr.append(TD( CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i))) - tr.append(TD( '%s' % len(feed.articles), style="text-align:right")) + tr.append(TD(CLASS('calibre_rescale_120'), A(feed.title, href='feed_%d/index.html'%i))) + tr.append(TD('%s' % len(feed.articles), style="text-align:right")) toc.append(tr) div = DIV( masthead_p, @@ -307,7 +307,7 @@ class TouchscreenFeedTemplate(Template): if f > 0: link = A(CLASS('feed_link'), trim_title(feeds[f-1].title), - href = '../feed_%d/index.html' % int(f-1)) + href='../feed_%d/index.html' % int(f-1)) navbar_tr.append(TD(CLASS('feed_prev'),link)) # Up to Sections @@ -319,13 +319,12 @@ class TouchscreenFeedTemplate(Template): if f < len(feeds)-1: link = A(CLASS('feed_link'), trim_title(feeds[f+1].title), - href = '../feed_%d/index.html' % int(f+1)) + href='../feed_%d/index.html' % int(f+1)) navbar_tr.append(TD(CLASS('feed_next'),link)) navbar_t.append(navbar_tr) top_navbar = navbar_t bottom_navbar = copy.copy(navbar_t) - #print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True) - + # print "\n%s\n" % etree.tostring(navbar_t, pretty_print=True) # Build the page head = HEAD(TITLE(feed.title)) @@ -342,8 +341,8 @@ class TouchscreenFeedTemplate(Template): if getattr(feed, 'image', None): div.append(DIV(IMG( - alt = feed.image_alt if feed.image_alt else '', - src = feed.image_url + alt=feed.image_alt if feed.image_alt else '', + src=feed.image_url ), CLASS('calibre_feed_image'))) if getattr(feed, 'description', None): @@ -388,6 +387,14 @@ class TouchscreenNavBarTemplate(Template): navbar_t = TABLE(CLASS('touchscreen_navbar')) navbar_tr = TR() + if bottom and not url.startswith('file://'): + navbar.append(HR()) + text = 'This article was downloaded by ' + p = PT(text, STRONG(__appname__), A(url, href=url), + style='text-align:left; max-width: 100%; overflow: hidden;') + p[0].tail = ' from ' + navbar.append(p) + navbar.append(BR()) # | Previous if art > 0: link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1)) @@ -411,6 +418,7 @@ class TouchscreenNavBarTemplate(Template): navbar_tr.append(TD(CLASS('article_next'),link)) navbar_t.append(navbar_tr) navbar.append(navbar_t) - #print "\n%s\n" % etree.tostring(navbar, pretty_print=True) + # print "\n%s\n" % etree.tostring(navbar, pretty_print=True) self.root = HTML(head, BODY(navbar)) +