From 73b0b0fca7fb0da9700af26ae31b8618c0063fbc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 May 2010 10:30:54 -0600 Subject: [PATCH 01/14] Fix #5623 (wired.recipe broken) --- resources/recipes/wired.recipe | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/resources/recipes/wired.recipe b/resources/recipes/wired.recipe index d45d987f48..9599d54de9 100644 --- a/resources/recipes/wired.recipe +++ b/resources/recipes/wired.recipe @@ -16,13 +16,15 @@ class Wired(BasicNewsRecipe): publisher = 'Conde Nast Digital' category = 'news, games, IT, gadgets' oldest_article = 32 + delay = 1 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False masthead_url = 'http://www.wired.com/images/home/wired_logo.gif' language = 'en' - extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} ' + publication_type = 'magazine' + extra_css = ' body{font-family: Arial,Verdana,sans-serif} .entryDescription li {display: inline; list-style-type: none} ' index = 'http://www.wired.com/magazine/' preprocess_regexps = [(re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '')] @@ -38,6 +40,8 @@ class Wired(BasicNewsRecipe): remove_tags = [ dict(name=['object','embed','iframe','link']) ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']}) + ,dict(attrs={'id':'ff_bottom_nav'}) + ,dict(name='a',attrs={'href':'http://www.wired.com/app'}) ] remove_attributes = ['height','width'] @@ -72,17 +76,18 @@ class Wired(BasicNewsRecipe): farticles = [] for item in features.findAll('div',attrs={'class':'section'}): divurl = item.find('div',attrs={'class':'feature-header'}) - divdesc = item.find('div',attrs={'class':'feature-text'}) - url = 'http://www.wired.com' + divurl.a['href'] - title = self.tag_to_string(divurl.a) - description = self.tag_to_string(divdesc) - date = strftime(self.timefmt) - farticles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':description - }) + if divurl: + divdesc = item.find('div',attrs={'class':'feature-text'}) + url = 'http://www.wired.com' + divurl.a['href'] + title = self.tag_to_string(divurl.a) + description = self.tag_to_string(divdesc) + date = strftime(self.timefmt) + farticles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) totalfeeds.append(('Featured Articles', farticles)) #department feeds departments = ['rants','start','test','play','found'] From 2516128360e5c1cb2b709c4268259e56fc9e9c4d Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 10:36:29 -0600 Subject: [PATCH 02/14] Troitskiy variant by Vadim Dyadkin --- resources/recipes/trv.recipe | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 resources/recipes/trv.recipe diff --git a/resources/recipes/trv.recipe b/resources/recipes/trv.recipe new file mode 100644 index 0000000000..dcb6b84e3b --- /dev/null +++ b/resources/recipes/trv.recipe @@ -0,0 +1,31 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Vadim Dyadkin dyadkin@lns.pnpi.spb.ru' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Trv(BasicNewsRecipe): + + + title = u'\u0422\u0440\u043e\u0438\u0446\u043a\u0438\u0439 \u0432\u0430\u0440\u0438\u0430\u043d\u0442' + language = 'ru' + __author__ = 'Vadim Dyadkin' + oldest_article = 30 + max_articles_per_feed = 100 + recursion = 4 + no_stylesheets = True + simultaneous_downloads = 1 + + keep_only_tags = [dict(name='h1'), + dict(name='div', attrs={'id' : 'content'}) + ] + + remove_tags = [dict(name='div', attrs={'class' : ['dateright', + 'postmeta', 'adsense-post', 'comments', 'nocomments', 'widgetarea', + 'breadcrumb']}), {'id' : ['sidebar', 'l_sidebar', 'r_sidebar', 'footer', + 'homepageright0']}, {'style' : 'clear:both;'}, + dict(name='ul'), + dict(name='h2') + ] + + feeds = [(u'\u0422\u0440\u043e\u0438\u0446\u043a\u0438\u0439 \u0432\u0430\u0440\u0438\u0430\u043d\u0442', + u'http://trv-science.ru/feed/')] From 3fcb9307775ddd015e3aa0ed4d508dbea3ae1653 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 12:22:59 -0600 Subject: [PATCH 03/14] Las Vegas Review by Joel and Nokia E71X support --- resources/recipes/las_vegas_review.recipe | 24 ++++++++++++++++++ src/calibre/customize/builtins.py | 6 +++-- src/calibre/devices/kobo/__init__.py | 9 +++++++ src/calibre/devices/kobo/driver.py | 31 +++++++++++++++++++++++ src/calibre/devices/misc.py | 21 --------------- src/calibre/devices/nokia/driver.py | 22 ++++++++++++++++ 6 files changed, 90 insertions(+), 23 deletions(-) create mode 100644 resources/recipes/las_vegas_review.recipe create mode 100644 src/calibre/devices/kobo/__init__.py create mode 100644 src/calibre/devices/kobo/driver.py diff --git a/resources/recipes/las_vegas_review.recipe b/resources/recipes/las_vegas_review.recipe new file mode 100644 index 0000000000..9292c105a4 --- /dev/null +++ b/resources/recipes/las_vegas_review.recipe @@ -0,0 +1,24 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1274742400(BasicNewsRecipe): + + title = u'Las Vegas Review Journal' + __author__ = 'Joel' + language = 'en' + + oldest_article = 7 + + max_articles_per_feed = 100 + + feeds = [ + (u'News', u'http://www.lvrj.com/news.rss'), + (u'Business', u'http://www.lvrj.com/business.rss'), + (u'Living', u'http://www.lvrj.com/living.rss'), + (u'Opinion', u'http://www.lvrj.com/opinion.rss'), + (u'Neon', u'http://www.lvrj.com/neon.rss'), + (u'Image', u'http://www.lvrj.com/image.rss'), + (u'Home & Garden', u'http://www.lvrj.com/home_and_garden.rss'), + (u'Furniture & Design', u'http://www.lvrj.com/furniture_and_design.rss'), + (u'Drive', u'http://www.lvrj.com/drive.rss'), + (u'Real Estate', u'http://www.lvrj.com/real_estate.rss'), + (u'Sports', u'http://www.lvrj.com/sports.rss')] diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 6bc94d30b0..697bba8152 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -445,7 +445,7 @@ from calibre.devices.nook.driver import NOOK from calibre.devices.prs500.driver import PRS500 from calibre.devices.prs505.driver import PRS505, PRS700 from calibre.devices.android.driver import ANDROID, S60 -from calibre.devices.nokia.driver import N770, N810 +from calibre.devices.nokia.driver import N770, N810, E71X from calibre.devices.eslick.driver import ESLICK from calibre.devices.nuut2.driver import NUUT2 from calibre.devices.iriver.driver import IRIVER_STORY @@ -454,7 +454,8 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS from calibre.devices.sne.driver import SNE -from calibre.devices.misc import PALMPRE, KOBO, AVANT +from calibre.devices.misc import PALMPRE, AVANT +from calibre.devices.kobo.driver import KOBO from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon from calibre.library.catalog import CSV_XML, EPUB_MOBI @@ -515,6 +516,7 @@ plugins += [ ANDROID, S60, N770, + E71X, N810, COOL_ER, ESLICK, diff --git a/src/calibre/devices/kobo/__init__.py b/src/calibre/devices/kobo/__init__.py new file mode 100644 index 0000000000..0080175bfa --- /dev/null +++ b/src/calibre/devices/kobo/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + + + diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py new file mode 100644 index 0000000000..4b14b2bf8e --- /dev/null +++ b/src/calibre/devices/kobo/driver.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +from calibre.devices.usbms.driver import USBMS + +class KOBO(USBMS): + + name = 'Kobo Reader Device Interface' + gui_name = 'Kobo Reader' + description = _('Communicate with the Kobo Reader') + author = 'Kovid Goyal' + + supported_platforms = ['windows', 'osx', 'linux'] + + # Ordered list of supported formats + FORMATS = ['epub', 'pdf'] + + VENDOR_ID = [0x2237] + PRODUCT_ID = [0x4161] + BCD = [0x0110] + + VENDOR_NAME = 'KOBO_INC' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '.KOBOEREADER' + + EBOOK_DIR_MAIN = '' + SUPPORTS_SUB_DIRS = True + diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index c7e0356f32..4310c51421 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -28,27 +28,6 @@ class PALMPRE(USBMS): EBOOK_DIR_MAIN = 'E-books' -class KOBO(USBMS): - - name = 'Kobo Reader Device Interface' - gui_name = 'Kobo Reader' - description = _('Communicate with the Kobo Reader') - author = 'Kovid Goyal' - - supported_platforms = ['windows', 'osx', 'linux'] - - # Ordered list of supported formats - FORMATS = ['epub', 'pdf'] - - VENDOR_ID = [0x2237] - PRODUCT_ID = [0x4161] - BCD = [0x0110] - - VENDOR_NAME = 'KOBO_INC' - WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '.KOBOEREADER' - - EBOOK_DIR_MAIN = '' - SUPPORTS_SUB_DIRS = True class AVANT(USBMS): name = 'Booq Avant Device Interface' diff --git a/src/calibre/devices/nokia/driver.py b/src/calibre/devices/nokia/driver.py index 59c181a4da..66a4243f2b 100644 --- a/src/calibre/devices/nokia/driver.py +++ b/src/calibre/devices/nokia/driver.py @@ -45,3 +45,25 @@ class N810(N770): WINDOWS_MAIN_MEM = 'N810' MAIN_MEMORY_VOLUME_LABEL = 'N810 Main Memory' + +class E71X(USBMS): + + name = 'Nokia E71X device interface' + gui_name = 'Nokia E71X' + description = 'Communicate with the Nokia E71X' + author = 'Kovid Goyal' + supported_platforms = ['windows', 'linux', 'osx'] + + VENDOR_ID = [0x421] + PRODUCT_ID = [0x1a0] + BCD = [0x100] + + + FORMATS = ['mobi', 'prc'] + + EBOOK_DIR_MAIN = 'eBooks' + SUPPORTS_SUB_DIRS = True + + VENDOR_NAME = 'NOKIA' + WINDOWS_MAIN_MEM = 'S60' + From c83e888bb9b14bdd93288e5e420ffa026bfa8f28 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 12:33:54 -0600 Subject: [PATCH 04/14] Fix NYTimes recipe to skip ads --- resources/recipes/nytimes_sub.recipe | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index a3ef2555f4..86bb3409f2 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -82,6 +82,7 @@ class NYTimes(BasicNewsRecipe): 'articleExtras', 'articleInline', 'blog_sidebar', + 'businessSearchBar', 'cCol', 'entertainmentSearchBar', 'footer', @@ -286,9 +287,14 @@ class NYTimes(BasicNewsRecipe): raw = self.browser.open('http://www.nytimes.com'+content).read() return BeautifulSoup(raw.decode('cp1252', 'replace')) ''' + # Skip ad pages before actual article + skip_tag = soup.find(True, {'name':'skip'}) + if skip_tag is not None: + soup = self.index_to_soup(skip_tag.parent['href']) return self.strip_anchors(soup) def postprocess_html(self,soup, True): + print "\npostprocess_html()\n" if self.one_picture_per_article: # Remove all images after first @@ -411,6 +417,7 @@ class NYTimes(BasicNewsRecipe): return soup def postprocess_book(self, oeb, opts, log) : + print "\npostprocess_book()\n" def extract_byline(href) : # <meta name="byline" content= From 6363aaa5b9894d5aad3720eebbeef1275ba6e7d4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 12:44:15 -0600 Subject: [PATCH 05/14] Raise an appropriate error for DTBook EPUB files --- src/calibre/ebooks/epub/input.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index 5c4e255177..0abb8f49de 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -146,6 +146,10 @@ class EPUBInput(InputFormatPlugin): self.rationalize_cover(opf, log) self.optimize_opf_parsing = opf + for x in opf.itermanifest(): + if x.get('media-type', '') == 'application/x-dtbook+xml': + raise ValueError( + 'EPUB files with DTBook markup are not supported') with open('content.opf', 'wb') as nopf: nopf.write(opf.render()) From 25c4013b042332fd5e2a1452c756d13f343a2569 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 12:50:18 -0600 Subject: [PATCH 06/14] Fix NYTimes Top Stories recipe --- resources/recipes/nytimes.recipe | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index a991f2b83c..bd429040d4 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -9,14 +9,13 @@ import re import time from calibre import entity_to_unicode from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \ -Comment, BeautifulStoneSoup +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment class NYTimes(BasicNewsRecipe): title = 'New York Times Top Stories' __author__ = 'GRiker' - language = 'en' + language = _('English') description = 'Top Stories from the New York Times' # List of sections typically included in Top Stories. Use a keyword from the @@ -257,6 +256,7 @@ class NYTimes(BasicNewsRecipe): # Fetch the outer table table = soup.find('table') previousTable = table + contentTable = None # Find the deepest table containing the stories while True : @@ -388,6 +388,10 @@ class NYTimes(BasicNewsRecipe): return ans def preprocess_html(self, soup): + # Skip ad pages before actual article + skip_tag = soup.find(True, {'name':'skip'}) + if skip_tag is not None: + soup = self.index_to_soup(skip_tag.parent['href']) return self.strip_anchors(soup) def postprocess_html(self,soup, True): From 8d8e40fed59796c7e1d08edbc02c729d3ff04b77 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 13:00:56 -0600 Subject: [PATCH 07/14] Fix #5619 (Problem with BBC news feeds) --- resources/recipes/bbc.recipe | 82 ++++++++++++++----------------- resources/recipes/bbc_fast.recipe | 31 ++++++------ 2 files changed, 51 insertions(+), 62 deletions(-) diff --git a/resources/recipes/bbc.recipe b/resources/recipes/bbc.recipe index 3634769d85..46be17a9e7 100644 --- a/resources/recipes/bbc.recipe +++ b/resources/recipes/bbc.recipe @@ -1,38 +1,47 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' +__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' ''' -bbc.co.uk +news.bbc.co.uk ''' -from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.web.feeds.recipes import BasicNewsRecipe class BBC(BasicNewsRecipe): - title = u'The BBC' - __author__ = 'Kovid Goyal ans Sujata Raman' - description = 'Global news and current affairs from the British Broadcasting Corporation' - language = 'en' + title = 'The BBC' + __author__ = 'Darko Miletic' + description = 'Global news and current affairs from the British Broadcasting Corporation' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + #delay = 1 + use_embedded_content = False + encoding = 'utf8' + publisher = 'BBC' + category = 'news, UK, world' + language = 'en_GB' + publication_type = 'newsportal' + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')] - no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'}), - {'id' : ['popstory','blq-footer']}, - {'class' : ['arrup','links','relatedbbcsites','arr','promobottombg','bbccom_visibility_hidden', 'sharesb', 'sib606', 'mvtb', 'storyextra', 'sidebar1', 'bbccom_text','promotopbg', 'gppromo','promotopbg','bbccom_display_none']}, - ] + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } - keep_only_tags = [dict(name='div', attrs={'class':'mainwrapper'})] - - extra_css = ''' - body{font-family:Arial,Helvetica,sans-serif; font-size:small; align:left} - h1{font-size:large;} - .sh{font-size:large; font-weight:bold} - .cap{font-size:xx-small; } - .lu{font-size:xx-small; } - .ds{font-size:xx-small; } - .mvb{font-size:xx-small;} - .by1{font-size:x-small; color:#666666} - .byd{font-size:x-small;} - ''' + keep_only_tags = [ + dict(attrs={'id' :['meta-information','story-body']}) + ,dict(attrs={'class':['mxb' ,'storybody' ]}) + ] + remove_tags = [ + dict(name=['object','link','table']) + ,dict(attrs={'class':['caption','caption full-width','story-actions','hidden','sharesb','audioInStoryC']}) + ] + remove_tags_after = dict(attrs={'class':'sharesb'}) + remove_attributes = ['width','height'] feeds = [ ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'), @@ -50,22 +59,3 @@ class BBC(BasicNewsRecipe): ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'), ] - def postprocess_html(self, soup, first): - - for tag in soup.findAll(name= 'img', alt=""): - tag.extract() - - for item in soup.findAll(align = "right"): - del item['align'] - - for tag in soup.findAll(name=['table', 'tr', 'td']): - tag.name = 'div' - - return soup - - - - # def print_version(self, url): - # return url.replace('http://', 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/') - - diff --git a/resources/recipes/bbc_fast.recipe b/resources/recipes/bbc_fast.recipe index 12ae9ce1eb..1af3bf8d1f 100644 --- a/resources/recipes/bbc_fast.recipe +++ b/resources/recipes/bbc_fast.recipe @@ -3,7 +3,7 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' ''' news.bbc.co.uk ''' - +import re from calibre.web.feeds.recipes import BasicNewsRecipe class BBC(BasicNewsRecipe): @@ -18,22 +18,28 @@ class BBC(BasicNewsRecipe): encoding = 'utf8' publisher = 'BBC' category = 'news, UK, world' - language = 'en' - extra_css = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } ' - + language = 'en_GB' + publication_type = 'newsportal' + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')] conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher + ,'linearize_tables': True } - remove_tags_before = dict(name='div',attrs={'class':'headline'}) - remove_tags_after = dict(name='div', attrs={'class':'footer'}) - remove_tags = [ - dict(name=['object','link','script','iframe']) - ,dict(name='div', attrs={'class':'footer'}) + keep_only_tags = [ + dict(attrs={'id' :['meta-information','story-body']}) + ,dict(attrs={'class':['mxb' ,'storybody' ]}) ] + remove_tags = [ + dict(name=['object','link','table','img']) + ,dict(attrs={'class':['caption','caption full-width','story-actions','hidden','sharesb','audioInStoryC']}) + ] + remove_tags_after = dict(attrs={'class':'sharesb'}) + remove_attributes = ['width','height'] feeds = [ ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'), @@ -51,10 +57,3 @@ class BBC(BasicNewsRecipe): ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'), ] - def print_version(self, url): - emp,sep,rstrip = url.partition('http://') - return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip - - def get_article_url(self, article): - return article.get('guid', None) - From 678e3a871c1972280df0493f6bba1e5203927005 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 13:16:28 -0600 Subject: [PATCH 08/14] Fix #5596 (epub to mobi fail) --- src/calibre/ebooks/epub/input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index 0abb8f49de..214511ae14 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -117,7 +117,7 @@ class EPUBInput(InputFormatPlugin): encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) opf = None for f in walk(u'.'): - if f.lower().endswith('.opf'): + if f.lower().endswith('.opf') and '__MACOSX' not in f: opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') From af48c67fc7b09060fcfe7c7b67dd1dabd326d337 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 13:36:54 -0600 Subject: [PATCH 09/14] ... --- src/calibre/ebooks/oeb/iterator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 020cf8d202..7912d26e83 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -21,7 +21,9 @@ from calibre.utils.logging import Log from calibre import guess_type, prints from calibre.ebooks.oeb.transforms.cover import CoverManager -TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace('__ar__', 'none') +TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\ + '__ar__', 'none').replace('__viewbox__', '0 0 600 800' + ).replace('__width__', '600').replace('__height__', '800') def character_count(html): ''' From 95229025ade03d9417bbd981e4c0a753e9a2a3d5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 13:57:01 -0600 Subject: [PATCH 10/14] EPUB Output: Generate a default one entry TOC if no TOC is present. This allows the EPUB to pass epubcheck and work on the Kobo --- src/calibre/ebooks/epub/output.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 180b0c1f23..ee779aaefa 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -106,7 +106,7 @@ class EPUBOutput(OutputFormatPlugin): recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)]) - def workaround_webkit_quirks(self): + def workaround_webkit_quirks(self): # {{{ from calibre.ebooks.oeb.base import XPath for x in self.oeb.spine: root = x.data @@ -120,8 +120,9 @@ class EPUBOutput(OutputFormatPlugin): for pre in XPath('//h:pre')(body): if not pre.text and len(pre) == 0: pre.tag = 'div' + # }}} - def upshift_markup(self): + def upshift_markup(self): # {{{ 'Upgrade markup to comply with XHTML 1.1 where possible' from calibre.ebooks.oeb.base import XPath for x in self.oeb.spine: @@ -135,6 +136,7 @@ class EPUBOutput(OutputFormatPlugin): for u in XPath('//h:u')(root): u.tag = 'span' u.set('style', 'text-decoration:underline') + # }}} def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb @@ -161,8 +163,10 @@ class EPUBOutput(OutputFormatPlugin): self.workaround_sony_quirks() if self.oeb.toc.count() == 0: - self.log.warn('This EPUB file has no Table of Contents. It will ' - 'not validate via epubcheck') + self.log.warn('This EPUB file has no Table of Contents. ' + 'Creating a default TOC') + first = iter(self.oeb.spine).next() + self.oeb.toc.add(_('Start'), first.href) from calibre.ebooks.oeb.base import OPF identifiers = oeb.metadata['identifier'] @@ -202,7 +206,7 @@ class EPUBOutput(OutputFormatPlugin): self.log.info('EPUB extracted to', opts.extract_to) epub.close() - def encrypt_fonts(self, uris, tdir, uuid): + def encrypt_fonts(self, uris, tdir, uuid): # {{{ from binascii import unhexlify key = re.sub(r'[^a-fA-F0-9]', '', uuid) @@ -247,6 +251,7 @@ class EPUBOutput(OutputFormatPlugin): ans += (u'\n'.join(fonts)).encode('utf-8') ans += '\n</encryption>' return ans + # }}} def condense_ncx(self, ncx_path): if not self.opts.pretty_print: @@ -259,7 +264,7 @@ class EPUBOutput(OutputFormatPlugin): compressed = etree.tostring(tree.getroot(), encoding='utf-8') open(ncx_path, 'wb').write(compressed) - def workaround_ade_quirks(self): + def workaround_ade_quirks(self): # {{{ ''' Perform various markup transforms to get the output to render correctly in the quirky ADE. @@ -388,8 +393,9 @@ class EPUBOutput(OutputFormatPlugin): else: self.oeb.log.warn('No stylesheet found') + # }}} - def workaround_sony_quirks(self): + def workaround_sony_quirks(self): # {{{ ''' Perform toc link transforms to alleviate slow loading. ''' @@ -436,3 +442,6 @@ class EPUBOutput(OutputFormatPlugin): if self.oeb.toc: simplify_toc_entry(self.oeb.toc) + + # }}} + From 34401387a219bb3bb413548252ae6b6317b7ed44 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 14:31:11 -0600 Subject: [PATCH 11/14] Cleanup Apple driver so it passes automated checking --- src/calibre/devices/apple/driver.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 88c262a4d9..2e7fbc46ea 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -1,11 +1,11 @@ -''' - Device driver for iTunes +# -*- coding: utf-8 -*- - GRiker +__license__ = 'GPL v3' +__copyright__ = '2010, Gregory Riker' +__docformat__ = 'restructuredtext en' - 22 May 2010 -''' -import atexit, cStringIO, datetime, os, re, shutil, sys, time, zipfile + +import cStringIO, os, re, shutil, sys, time, zipfile from calibre.constants import DEBUG from calibre import fit_image @@ -13,18 +13,18 @@ from calibre.constants import isosx, iswindows from calibre.devices.interface import DevicePlugin from calibre.ebooks.metadata import MetaInformation from calibre.library.server.utils import strftime -from calibre.ptempfile import PersistentTemporaryFile, cleanup +from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import Config, config_dir from calibre.utils.date import parse_date from calibre.utils.logging import Log -from PIL import Image as PILImage, TarIO +from PIL import Image as PILImage if isosx: - import appscript, osax + import appscript -if iswindows: - import win32com.client +#if iswindows: +# import win32com.client class UserInteractionRequired(Exception): pass @@ -356,7 +356,7 @@ class ITUNES(DevicePlugin): # Init the iTunes source list names = [s.name() for s in self.iTunes.sources()] kinds = [str(s.kind()).rpartition('.')[2] for s in self.iTunes.sources()] - self.sources = sources = dict(zip(kinds,names)) + self.sources = dict(zip(kinds,names)) # Check to see if Library|Books out of sync with Device|Books if 'iPod' in self.sources and self.presync: @@ -711,7 +711,6 @@ class ITUNES(DevicePlugin): ''' if 'iPod' in self.sources: device = self.sources['iPod'] - device_books = [] if 'Books' in self.iTunes.sources[device].playlists.name(): return self.iTunes.sources[device].playlists['Books'].file_tracks() From 5aee0a8037a3d6e8d6c72821076652b5c646cb71 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 14:32:25 -0600 Subject: [PATCH 12/14] ... --- resources/recipes/nytimes.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index bd429040d4..1f781dfa94 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -9,7 +9,8 @@ import re import time from calibre import entity_to_unicode from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \ + Comment, BeautifulStoneSoup class NYTimes(BasicNewsRecipe): @@ -256,7 +257,6 @@ class NYTimes(BasicNewsRecipe): # Fetch the outer table table = soup.find('table') previousTable = table - contentTable = None # Find the deepest table containing the stories while True : From 170dd6539a4b234d044daf8ccf2fdb7deddc5fc6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 14:52:05 -0600 Subject: [PATCH 13/14] Hook up open feedback and user feedback to the GUI --- src/calibre/devices/apple/driver.py | 21 +++++---------------- src/calibre/devices/errors.py | 17 ++++++++++++++--- src/calibre/devices/interface.py | 4 ++++ src/calibre/gui2/device.py | 4 ++++ src/calibre/gui2/ui.py | 11 +++++++++++ 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 2e7fbc46ea..7e9ca43ce2 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -17,6 +17,7 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import Config, config_dir from calibre.utils.date import parse_date from calibre.utils.logging import Log +from calibre.devices.errors import UserFeedback from PIL import Image as PILImage @@ -26,30 +27,18 @@ if isosx: #if iswindows: # import win32com.client -class UserInteractionRequired(Exception): - pass - -class UserFeedback(Exception): - INFO = 0 - WARN = 1 - ERROR = 2 - - def __init__(self, msg, details, level): - Exception.__init__(self, msg) - self.level = level - self.details = details - self.msg = msg - class ITUNES(DevicePlugin): + name = 'Apple device interface' gui_name = 'Apple device' icon = I('devices/ipad.png') description = _('Communicate with iBooks through iTunes.') - supported_platforms = ['windows','osx'] + supported_platforms = ['osx'] author = 'GRiker' driver_version = '0.1' - OPEN_FEEDBACK_MESSAGE = _('Apple device detected, launching iTunes') + OPEN_FEEDBACK_MESSAGE = _( + 'Apple device detected, launching iTunes, please wait...') FORMATS = ['epub'] diff --git a/src/calibre/devices/errors.py b/src/calibre/devices/errors.py index 6ac76176f3..8191b15db6 100644 --- a/src/calibre/devices/errors.py +++ b/src/calibre/devices/errors.py @@ -22,9 +22,20 @@ class DeviceError(ProtocolError): """ Raised when device is not found """ def __init__(self, msg=None): if msg is None: - msg = "Unable to find SONY Reader. Is it connected?" + msg = "Unable to find SONY Reader. Is it connected?" ProtocolError.__init__(self, msg) +class UserFeedback(DeviceError): + INFO = 0 + WARN = 1 + ERROR = 2 + + def __init__(self, msg, details, level): + Exception.__init__(self, msg) + self.level = level + self.details = details + self.msg = msg + class DeviceBusy(ProtocolError): """ Raised when device is busy """ def __init__(self, uerr=""): @@ -57,8 +68,8 @@ class ControlError(ProtocolError): self.query = query self.response = response ProtocolError.__init__(self, desc) - - def __str__(self): + + def __str__(self): if self.query and self.response: return "Got unexpected response:\n" + \ "query:\n"+str(self.query.query)+"\n"+\ diff --git a/src/calibre/devices/interface.py b/src/calibre/devices/interface.py index 80c0b3d339..35617d8097 100644 --- a/src/calibre/devices/interface.py +++ b/src/calibre/devices/interface.py @@ -47,6 +47,10 @@ class DevicePlugin(Plugin): # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations() UserAnnotation = namedtuple('Annotation','type, value') + #: GUI displays this as a message if not None. Useful if opening can take a + #: long time + OPEN_FEEDBACK_MESSAGE = None + @classmethod def get_gui_name(cls): if hasattr(cls, 'gui_name'): diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 41abc6cb95..283d3bf5ec 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -81,6 +81,8 @@ class DeviceJob(BaseJob): class DeviceManager(Thread): + open_feedback = pyqtSignal(object) + def __init__(self, connected_slot, job_manager, sleep_time=2): ''' :sleep_time: Time to sleep between device probes in secs @@ -114,6 +116,8 @@ class DeviceManager(Thread): def do_connect(self, connected_devices, is_folder_device): for dev, detected_device in connected_devices: + if dev.OPEN_FEEDBACK_MESSAGE is not None: + self.open_feedback.emit(dev.OPEN_FEEDBACK_MESSAGE) dev.reset(detected_device=detected_device, report_progress=self.report_progress) try: diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 91b2353469..8669aa55eb 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -30,6 +30,7 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import prefs, dynamic from calibre.utils.ipc.server import Server from calibre.utils.search_query_parser import saved_searches +from calibre.devices.errors import UserFeedback from calibre.gui2 import warning_dialog, choose_files, error_dialog, \ question_dialog,\ pixmap_to_data, choose_dir, \ @@ -234,6 +235,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): ####################### Setup device detection ######################## self.device_manager = DeviceManager(Dispatcher(self.device_detected), self.job_manager) + self.device_manager.open_feedback.connect(self.status.showMessage, + type=Qt.QueuedConnection) self.device_manager.start() @@ -2327,6 +2330,14 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): ''' Handle exceptions in threaded device jobs. ''' + if isinstance(getattr(job, 'exception', None), UserFeedback): + ex = job.exception + func = {UserFeedback.ERROR:error_dialog, + UserFeedback.WARNING:warning_dialog, + UserFeedback.INFO:info_dialog}[ex.level] + return func(self, _('Failed'), ex.msg, det_msg=ex.details if + ex.details else '', show=True) + try: if 'Could not read 32 bytes on the control bus.' in \ unicode(job.details): From 2c597b855e903e23b9a1a3793d90d390c42d5188 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 28 May 2010 15:07:11 -0600 Subject: [PATCH 14/14] Fix #5624 ("Automatically number books" is in the wrong tab order) --- src/calibre/gui2/dialogs/metadata_bulk.ui | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui index 01b5fc0adb..e1278164f0 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.ui +++ b/src/calibre/gui2/dialogs/metadata_bulk.ui @@ -303,7 +303,9 @@ Book A will have series number 1 and Book B series number 2.</string> <tabstop>tags</tabstop> <tabstop>remove_tags</tabstop> <tabstop>series</tabstop> + <tabstop>autonumber_series</tabstop> <tabstop>remove_format</tabstop> + <tabstop>swap_title_and_author</tabstop> <tabstop>button_box</tabstop> </tabstops> <resources>