diff --git a/recipes/foreignaffairs.recipe b/recipes/foreignaffairs.recipe index 69511cbd09..c7fa21b3e9 100644 --- a/recipes/foreignaffairs.recipe +++ b/recipes/foreignaffairs.recipe @@ -3,10 +3,17 @@ import re from calibre.ptempfile import PersistentTemporaryFile class ForeignAffairsRecipe(BasicNewsRecipe): + ''' there are three modifications: + 1) fetch issue cover + 2) toggle ignore premium articles + 3) extract proper section names, ie. "Comments", "Essay" + + by Chen Wei weichen302@gmx.com, 2012-02-05''' + __license__ = 'GPL v3' __author__ = 'kwetal' language = 'en' - version = 1 + version = 1.01 title = u'Foreign Affairs (Subcription or (free) Registration)' publisher = u'Council on Foreign Relations' @@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe): remove_javascript = True INDEX = 'http://www.foreignaffairs.com' + FRONTPAGE = 'http://www.foreignaffairs.com/magazine' + INCLUDE_PREMIUM = False + remove_tags = [] remove_tags.append(dict(name = 'base')) @@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe): temp_files = [] articles_are_obfuscated = True + def get_cover_url(self): + soup = self.index_to_soup(self.FRONTPAGE) + div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'}) + img_url = div.find('img')['src'] + return self.INDEX + img_url + def get_obfuscated_article(self, url): br = self.get_browser() br.open(url) @@ -50,57 +66,47 @@ class ForeignAffairsRecipe(BasicNewsRecipe): return self.temp_files[-1].name + def parse_index(self): - soup = self.index_to_soup('http://www.foreignaffairs.com/magazine') - articles = [] answer = [] - content = soup.find('div', attrs = {'class': 'center-wrapper'}) - if content: - for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}): - tag = div.find('div', attrs = {'class': 'views-field-title'}) - if tag: - a = tag.find('a') - if a: - title = self.tag_to_string(a) - url = self.INDEX + a['href'] - - author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})) - tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'}) - # If they ever fix their markup, this will break :-( - summary = self.tag_to_string(tag.findNextSibling('p')) - description = author + '
' + summary - - articles.append({'title': title, 'date': None, 'url': url, 'description': description}) - else: - continue - else: - continue - - answer.append(('Magazine', articles)) - - ul = content.find('ul') - if ul: + soup = self.index_to_soup(self.FRONTPAGE) + sec_start = soup.findAll('div', attrs={'class':'panel-separator'}) + for sec in sec_start: + content = sec.nextSibling + if content: + section = self.tag_to_string(content.find('h2')) articles = [] - for li in ul.findAll('li'): - tag = li.find('div', attrs = {'class': 'views-field-title'}) - if tag: - a = tag.find('a') - if a: - title = self.tag_to_string(a) - url = self.INDEX + a['href'] - description = '' - tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}) - if tag: - description = self.tag_to_string(tag) - articles.append({'title': title, 'date': None, 'url': url, 'description': description}) - else: - continue + tags = [] + for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}): + tags.append(div) + ul = content.find('ul') + for li in content.findAll('li'): + tags.append(li) + + for div in tags: + title = url = description = author = None + + if self.INCLUDE_PREMIUM: + found_premium = False else: - continue - - answer.append(('Letters to the Editor', articles)) + found_premium = div.findAll('span', attrs={'class': + 'premium-icon'}) + if not found_premium: + tag = div.find('div', attrs={'class': 'views-field-title'}) + if tag: + a = tag.find('a') + if a: + title = self.tag_to_string(a) + url = self.INDEX + a['href'] + author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})) + tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'}) + description = self.tag_to_string(tag_summary) + articles.append({'title':title, 'date':None, 'url':url, + 'description':description, 'author':author}) + if articles: + answer.append((section, articles)) return answer def preprocess_html(self, soup): diff --git a/recipes/ilmanifesto.recipe b/recipes/ilmanifesto.recipe new file mode 100644 index 0000000000..d7428cebb2 --- /dev/null +++ b/recipes/ilmanifesto.recipe @@ -0,0 +1,110 @@ +from calibre import strftime +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/' + +class IlManifesto(BasicNewsRecipe): + title = 'Il Manifesto' + __author__ = 'Giacomo Lacava' + description = 'quotidiano comunista - ultima edizione html disponibile' + publication_type = 'newspaper' + publisher = 'il manifesto coop. editrice a r.l.' + language = 'it' + + oldest_article = 2 + max_articles_per_feed = 100 + delay = 1 + no_stylesheets = True + simultaneous_downloads = 5 + timeout = 30 + auto_cleanup = True + remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})] + remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'}) + remove_tags_after = dict(id='myPrintArea') + + manifesto_index = None + manifesto_datestr = None + + def _set_manifesto_index(self): + if self.manifesto_index == None: + startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/' + startSoup = self.index_to_soup(startUrl) + lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href'] + del(startSoup) + self.manifesto_index = MANIFESTO_BASEURL + lastEdition + urlsplit = lastEdition.split('/') + self.manifesto_datestr = urlsplit[-1] + if urlsplit[-1] == '': + self.manifesto_datestr = urlsplit[-2] + + + + def get_cover_url(self): + self._set_manifesto_index() + url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr + return url + + def parse_index(self): + self._set_manifesto_index() + soup = self.index_to_soup(self.manifesto_index) + feedLinks = soup.find('div',id='accordion_inedicola').findAll('a') + result = [] + for feed in feedLinks: + articles = [] + feedName = feed.find('h2').string + feedUrl = MANIFESTO_BASEURL + feed['href'] + feedSoup = self.index_to_soup(feedUrl) + indexRoot = feedSoup.find('div',attrs={'class':'column1'}) + for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}): + artLink = div.find('a') + if artLink is None: continue # empty div + title = artLink.string + url = MANIFESTO_BASEURL + artLink['href'] + + description = '' + descNode = div.find('div',attrs={'class':'text_12'}) + if descNode is not None: + description = descNode.string + + author = '' + authNode = div.find('div',attrs={'class':'firma'}) + if authNode is not None: + author = authNode.string + + articleText = '' + article = { + 'title':title, + 'url':url, + 'date': strftime('%d %B %Y'), + 'description': description, + 'content': articleText, + 'author': author + } + articles.append(article) + result.append((feedName,articles)) + return result + + + def extract_readable_article(self, html, url): + + bs = BeautifulSoup(html) + col1 = bs.find('div',attrs={'class':'column1'}) + + content = col1.find('div',attrs={'class':'bodytext'}) + title = bs.find(id='titolo_articolo').string + author = col1.find('span',attrs={'class':'firma'}) + subtitle = '' + subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'}) + if subNode is not None: + subtitle = subNode + summary = '' + sommNode = bs.find('div',attrs={'class':'sommario'}) + if sommNode is not None: + summary = sommNode + + template = "%(title)s

%(title)s

%(subtitle)s

%(author)s

%(summary)s
%(content)s
" + del(bs) + return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content) + + diff --git a/recipes/mwjournal.recipe b/recipes/mwjournal.recipe index 0eacee6703..65fb948eaa 100644 --- a/recipes/mwjournal.recipe +++ b/recipes/mwjournal.recipe @@ -1,58 +1,53 @@ +#!/usr/bin/env python ## -## Title: Microwave Journal RSS recipe +## Title: Microwave Journal ## Contact: Kiavash (use Mobile Read) ## ## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html ## Copyright: Kiavash ## ## Written: Jan 2012 -## Last Edited: Jan 2012 +## Last Edited: Feb 2012 ## +# Feb 2012: New Recipe compatible with the MWJournal 2.0 website + __license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html' __copyright__ = 'Kiavash' __author__ = 'Kaivash' ''' -Microwave Journal Monthly Magazine -You need to sign up (free) and get username/password. +microwavejournal.com ''' -import re # Import the regular expressions module. -from calibre.ptempfile import TemporaryFile # we need this for saving to a temp file +import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.magick import Image class MWJournal(BasicNewsRecipe): - # Title to use for the ebook. - title = u'Microwave Journal' - __author__ = 'Kiavash' - language = 'en' - #A brief description for the ebook. - description = u'Microwave Journal web site ebook created using rss feeds.' - - # Set publisher and publication type. - publisher = 'Horizon House' + title = u'Microwave Journal' + description = u'Microwave Journal Monthly Magazine' + publisher = 'Horizon House' publication_type = 'magazine' + INDEX = 'http://www.microwavejournal.com/publications/' - oldest_article = 31 # monthly published magazine. Some months are 31 days! - max_articles_per_feed = 100 - remove_empty_feeds = True - auto_cleanup = True - - # Disable stylesheets and javascript from site. - no_stylesheets = True - remove_javascript = True - - asciiize = True # Converts all none ascii characters to their ascii equivalents - - needs_subscription = True # oh yeah... we need to login btw. - - # Timeout for fetching files from the server in seconds. The default of 120 seconds, seems somewhat excessive. + language = 'en' timeout = 30 - # Specify extra CSS - overrides ALL other CSS (IE. Added last). + Convert_Grayscale = False # Convert images to gray scale or not + keep_only_tags = [dict(name='div', attrs={'class':'record'})] + no_stylesheets = True + remove_javascript = True + remove_tags = [ + dict(name='font', attrs={'class':'footer'}), # remove fonts + ] + + remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan', + 'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ] + + # Specify extra CSS - overrides ALL other CSS (IE. Added last). extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ .introduction, .first { font-weight: bold; } \ .cross-head { font-weight: bold; font-size: 125%; } \ @@ -72,72 +67,75 @@ class MWJournal(BasicNewsRecipe): h3 { font-size: 125%; font-weight: bold; } \ h4, h5, h6 { font-size: 100%; font-weight: bold; }' - remove_tags = [ - dict(name='div', attrs={'class':'boxadzonearea350'}), # Removes banner ads - dict(name='font', attrs={'class':'footer'}), # remove fonts if you do like your fonts more! Comment out to use website's fonts - dict(name='div', attrs={'class':'newsarticlead'}) - ] - - # Remove various tag attributes to improve the look of the ebook pages. - remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan', - 'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ] - - # Remove the line breaks as well as href links. Books don't have links generally speaking + # Remove the line breaks, href links and float left/right and picture width/height. preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), (re.compile(r'', re.IGNORECASE), lambda m: ''), (re.compile(r''), lambda h1: ''), - (re.compile(r''), lambda h2: '') + (re.compile(r''), lambda h2: ''), + (re.compile(r'float:.*?'), lambda h3: ''), + (re.compile(r'width:.*?px'), lambda h4: ''), + (re.compile(r'height:.*?px'), lambda h5: '') ] - # Select the feeds that you are interested. - feeds = [ - (u'Current Issue', u'http://www.mwjournal.com/rss/Rss.asp?type=99'), - (u'Industry News', u'http://www.mwjournal.com/rss/Rss.asp?type=1'), - (u'Resources', u'http://www.mwjournal.com/rss/Rss.asp?type=3'), - (u'Buyer\'s Guide', u'http://www.mwjournal.com/rss/Rss.asp?type=5'), - (u'Events', u'http://www.mwjournal.com/rss/Rss.asp?type=2'), - (u'All Updates', u'http://www.mwjournal.com/rss/Rss.asp?type=0'), - ] - - # No magazine is complete without cover. Let's get it then! - # The function is adapted from the Economist recipe - def get_cover_url(self): - cover_url = None - cover_page_location = 'http://www.mwjournal.com/Journal/' # Cover image is located on this page - soup = self.index_to_soup(cover_page_location) - cover_item = soup.find('img',attrs={'src':lambda x: x and '/IssueImg/3_MWJ_CurrIss_CoverImg' in x}) # There are three files named cover, we want the highest resolution which is the 3rd image. So we look for the pattern. Remember that the name of the cover image changes every month so we cannot search for the complete name. Instead we are searching for the pattern - if cover_item: - cover_url = 'http://www.mwjournal.com' + cover_item['src'].strip() # yeah! we found it. Let's fetch the image file and pass it as cover to calibre - return cover_url def print_version(self, url): - if url.find('/Journal/article.asp?HH_ID=') >= 0: - return self.browser.open_novisit(url).geturl().replace('/Journal/article.asp?HH_ID=', '/Journal/Print.asp?Id=') - elif url.find('/News/article.asp?HH_ID=') >= 0: - return self.browser.open_novisit(url).geturl().replace('/News/article.asp?HH_ID=', '/Journal/Print.asp?Id=') - elif url.find('/Resources/TechLib.asp?HH_ID=') >= 0: - return self.browser.open_novisit(url).geturl().replace('/Resources/TechLib.asp?HH_ID=', '/Resources/PrintRessource.asp?Id=') + return url.replace('/articles/', '/articles/print/') - def get_browser(self): - ''' - Microwave Journal website, directs the login page to omeda.com once login info is submitted, omeda.com redirects to mwjournal.com with again the browser logs in into that site (hidden from the user). To overcome this obsticle, first login page is fetch and its output is stored to an HTML file. Then the HTML file is opened again and second login form is submitted (Many thanks to Barty which helped with second page login). - ''' - br = BasicNewsRecipe.get_browser() - if self.username is not None and self.password is not None: - url = ('http://www.omeda.com/cgi-win/mwjreg.cgi?m=login') # main login page. - br.open(url) # fetch the 1st login page - br.select_form('login') # finds the login form - br['EMAIL_ADDRESS'] = self.username # fills the username - br['PASSWORD'] = self.password # fills the password - raw = br.submit().read() # submit the form and read the 2nd login form - # save it to an htm temp file (from ESPN recipe written by Kovid Goyal kovid@kovidgoyal.net - with TemporaryFile(suffix='.htm') as fname: - with open(fname, 'wb') as f: - f.write(raw) - br.open_local_file(fname) - br.select_form(nr=0) # finds submit on the 2nd form - didwelogin = br.submit().read() # submit it and read the return html - if 'Welcome ' not in didwelogin: # did it login successfully? Is Username/password correct? - raise Exception('Failed to login, are you sure your username and password are correct?') - #login is done - return br + def parse_index(self): + articles = [] + + soup = self.index_to_soup(self.INDEX) + ts = soup.find('div', attrs={'class':'box1 article publications-show'}) + ds = self.tag_to_string(ts.find('h2')) + self.log('Found Current Issue:', ds) + self.timefmt = ' [%s]'%ds + + cover = ts.find('img', src=True) + if cover is not None: + self.cover_url = 'http://www.microwavejournal.com' + cover['src'] + self.log('Found Cover image:', self.cover_url) + + feeds = [] + seen_titles = set([]) # This is used to remove duplicant articles + sections = soup.find('div', attrs={'class':'box2 publication'}) + for section in sections.findAll('div', attrs={'class':'records'}): + section_title = self.tag_to_string(section.find('h3')) + self.log('Found section:', section_title) + articles = [] + for post in section.findAll('div', attrs={'class':'record'}): + h = post.find('h2') + title = self.tag_to_string(h) + if title.find('The MWJ Puzzler') >=0: #Let's get rid of the useless Puzzler! + continue + if title in seen_titles: + continue + seen_titles.add(title) + a = post.find('a', href=True) + url = a['href'] + if url.startswith('/'): + url = 'http://www.microwavejournal.com'+url + abstract = post.find('div', attrs={'class':'abstract'}) + p = abstract.find('p') + desc = None + self.log('\tFound article:', title, 'at', url) + if p is not None: + desc = self.tag_to_string(p) + self.log('\t\t', desc) + articles.append({'title':title, 'url':url, 'description':desc, + 'date':self.timefmt}) + if articles: + feeds.append((section_title, articles)) + return feeds + + def postprocess_html(self, soup, first): + if self.Convert_Grayscale: + #process all the images + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + if img < 0: + raise RuntimeError('Out of memory') + img.type = "GrayscaleType" + img.save(iurl) + return soup diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index ea9c92868b..38f7ec1a9a 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,30 +1,36 @@ +""" +readitlaterlist.com +""" __license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger -''' -''' -readitlaterlist.com +2012, tBunnyMan ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe + class Readitlater(BasicNewsRecipe): - title = 'Read It Later' - __author__ = 'Darko Miletic, Przemyslaw Kryger' - description = '''Personalized news feeds. Go to readitlaterlist.com to - setup up your news. Fill in your account - username, and optionally you can add password.''' - publisher = 'readitlater.com' + title = 'ReadItLater' + __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' + description = '''Personalized news feeds. Go to readitlaterlist.com to setup \ + up your news. This version displays pages of articles from \ + oldest to newest, with max & minimum counts, and marks articles \ + read after downloading.''' + publisher = 'readitlaterlist.com' category = 'news, custom' oldest_article = 7 - max_articles_per_feed = 100 + max_articles_per_feed = 50 + minimum_articles = 1 no_stylesheets = True use_embedded_content = False needs_subscription = True INDEX = u'http://readitlaterlist.com' LOGIN = INDEX + u'/l' + readList = [] + def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe): br.select_form(nr=0) br['feed_id'] = self.username if self.password is not None: - br['password'] = self.password + br['password'] = self.password br.submit() return br def get_feeds(self): - self.report_progress(0, ('Fetching list of feeds...')) + self.report_progress(0, ('Fetching list of pages...')) lfeeds = [] i = 1 feedurl = self.INDEX + u'/unread/1' while True: title = u'Unread articles, page ' + str(i) - lfeeds.append((title, feedurl)) - self.report_progress(0, ('Got ') + str(i) + (' feeds')) + lfeeds.insert(0, (title, feedurl)) + self.report_progress(0, ('Got ') + str(i) + (' pages')) i += 1 soup = self.index_to_soup(feedurl) - ritem = soup.find('a',attrs={'id':'next', 'class':'active'}) + ritem = soup.find('a', attrs={'id':'next', 'class':'active'}) if ritem is None: break feedurl = self.INDEX + ritem['href'] - if self.test: - return lfeeds[:2] return lfeeds def parse_index(self): totalfeeds = [] + articlesToGrab = self.max_articles_per_feed lfeeds = self.get_feeds() for feedobj in lfeeds: + if articlesToGrab < 1: + break feedtitle, feedurl = feedobj self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) - ritem = soup.find('ul',attrs={'id':'list'}) - for item in ritem.findAll('li'): + ritem = soup.find('ul', attrs={'id':'list'}) + for item in reversed(ritem.findAll('li')): + if articlesToGrab < 1: + break + else: + articlesToGrab -= 1 description = '' - atag = item.find('a',attrs={'class':'text'}) + atag = item.find('a', attrs={'class':'text'}) if atag and atag.has_key('href'): url = self.INDEX + atag['href'] title = self.tag_to_string(item.div) @@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe): ,'url' :url ,'description':description }) + readLink = item.find('a', attrs={'class':'check'})['href'] + self.readList.append(readLink) totalfeeds.append((feedtitle, articles)) + if len(self.readList) < self.minimum_articles: + raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") return totalfeeds + def mark_as_read(self, markList): + br = self.get_browser() + for link in markList: + url = self.INDEX + link + response = br.open(url) + response + + def cleanup(self): + self.mark_as_read(self.readList) + diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 268dad4328..855d105e15 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -5,13 +5,14 @@ __copyright__ = '2008, Kovid Goyal ' import os, glob, functools, re from calibre import guess_type -from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \ - MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase +from calibre.customize import (FileTypePlugin, MetadataReaderPlugin, + MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase) from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata -from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.html.to_zip import HTML2ZIP +plugins = [] + # To archive plugins {{{ class PML2PMLZ(FileTypePlugin): @@ -86,6 +87,8 @@ class TXT2TXTZ(FileTypePlugin): return list(set(images)) def run(self, path_to_ebook): + from calibre.ebooks.metadata.opf2 import metadata_to_opf + with open(path_to_ebook, 'rb') as ebf: txt = ebf.read() base_dir = os.path.dirname(path_to_ebook) @@ -117,6 +120,7 @@ class TXT2TXTZ(FileTypePlugin): # No images so just import the TXT file. return path_to_ebook +plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,] # }}} # Metadata reader plugins {{{ @@ -399,6 +403,10 @@ class ZipMetadataReader(MetadataReaderPlugin): def get_metadata(self, stream, ftype): from calibre.ebooks.metadata.zip import get_metadata return get_metadata(stream) + +plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ + x.__name__.endswith('MetadataReader')] + # }}} # Metadata writer plugins {{{ @@ -499,107 +507,51 @@ class TXTZMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.metadata.extz import set_metadata set_metadata(stream, mi) -# }}} - -from calibre.ebooks.comic.input import ComicInput -from calibre.ebooks.djvu.input import DJVUInput -from calibre.ebooks.epub.input import EPUBInput -from calibre.ebooks.fb2.input import FB2Input -from calibre.ebooks.html.input import HTMLInput -from calibre.ebooks.htmlz.input import HTMLZInput -from calibre.ebooks.lit.input import LITInput -from calibre.ebooks.mobi.input import MOBIInput -from calibre.ebooks.odt.input import ODTInput -from calibre.ebooks.pdb.input import PDBInput -from calibre.ebooks.azw4.input import AZW4Input -from calibre.ebooks.pdf.input import PDFInput -from calibre.ebooks.pml.input import PMLInput -from calibre.ebooks.rb.input import RBInput -from calibre.web.feeds.input import RecipeInput -from calibre.ebooks.rtf.input import RTFInput -from calibre.ebooks.tcr.input import TCRInput -from calibre.ebooks.txt.input import TXTInput -from calibre.ebooks.lrf.input import LRFInput -from calibre.ebooks.chm.input import CHMInput -from calibre.ebooks.snb.input import SNBInput - -from calibre.ebooks.epub.output import EPUBOutput -from calibre.ebooks.fb2.output import FB2Output -from calibre.ebooks.lit.output import LITOutput -from calibre.ebooks.lrf.output import LRFOutput -from calibre.ebooks.mobi.output import MOBIOutput -from calibre.ebooks.oeb.output import OEBOutput -from calibre.ebooks.pdb.output import PDBOutput -from calibre.ebooks.pdf.output import PDFOutput -from calibre.ebooks.pml.output import PMLOutput -from calibre.ebooks.rb.output import RBOutput -from calibre.ebooks.rtf.output import RTFOutput -from calibre.ebooks.tcr.output import TCROutput -from calibre.ebooks.txt.output import TXTOutput -from calibre.ebooks.txt.output import TXTZOutput -from calibre.ebooks.html.output import HTMLOutput -from calibre.ebooks.htmlz.output import HTMLZOutput -from calibre.ebooks.snb.output import SNBOutput - -from calibre.customize.profiles import input_profiles, output_profiles - -from calibre.devices.apple.driver import ITUNES -from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA -from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK -from calibre.devices.cybook.driver import CYBOOK, ORIZON -from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK, - POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, - BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602, - POCKETBOOK701, POCKETBOOK360P, PI2) -from calibre.devices.iliad.driver import ILIAD -from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800 -from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI -from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX, - KINDLE_FIRE) -from calibre.devices.nook.driver import NOOK, NOOK_COLOR -from calibre.devices.prs505.driver import PRS505 -from calibre.devices.prst1.driver import PRST1 -from calibre.devices.user_defined.driver import USER_DEFINED -from calibre.devices.android.driver import ANDROID, S60, WEBOS -from calibre.devices.nokia.driver import N770, N810, E71X, E52 -from calibre.devices.eslick.driver import ESLICK, EBK52 -from calibre.devices.nuut2.driver import NUUT2 -from calibre.devices.iriver.driver import IRIVER_STORY -from calibre.devices.binatone.driver import README -from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK, - LIBREAIR, ODYSSEY) -from calibre.devices.edge.driver import EDGE -from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS, - SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER) -from calibre.devices.sne.driver import SNE -from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, - GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, - TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G) -from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG -from calibre.devices.kobo.driver import KOBO -from calibre.devices.bambook.driver import BAMBOOK -from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX - -from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX -from calibre.ebooks.epub.fix.unmanifested import Unmanifested -from calibre.ebooks.epub.fix.epubcheck import Epubcheck - -plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, - Epubcheck, ] - -# New metadata download plugins {{{ -from calibre.ebooks.metadata.sources.google import GoogleBooks -from calibre.ebooks.metadata.sources.amazon import Amazon -from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary -from calibre.ebooks.metadata.sources.isbndb import ISBNDB -from calibre.ebooks.metadata.sources.overdrive import OverDrive -from calibre.ebooks.metadata.sources.douban import Douban -from calibre.ebooks.metadata.sources.ozon import Ozon - -plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] +plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ + x.__name__.endswith('MetadataWriter')] # }}} +# Conversion plugins {{{ +from calibre.ebooks.conversion.plugins.comic_input import ComicInput +from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput +from calibre.ebooks.conversion.plugins.epub_input import EPUBInput +from calibre.ebooks.conversion.plugins.fb2_input import FB2Input +from calibre.ebooks.conversion.plugins.html_input import HTMLInput +from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput +from calibre.ebooks.conversion.plugins.lit_input import LITInput +from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput +from calibre.ebooks.conversion.plugins.odt_input import ODTInput +from calibre.ebooks.conversion.plugins.pdb_input import PDBInput +from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input +from calibre.ebooks.conversion.plugins.pdf_input import PDFInput +from calibre.ebooks.conversion.plugins.pml_input import PMLInput +from calibre.ebooks.conversion.plugins.rb_input import RBInput +from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput +from calibre.ebooks.conversion.plugins.rtf_input import RTFInput +from calibre.ebooks.conversion.plugins.tcr_input import TCRInput +from calibre.ebooks.conversion.plugins.txt_input import TXTInput +from calibre.ebooks.conversion.plugins.lrf_input import LRFInput +from calibre.ebooks.conversion.plugins.chm_input import CHMInput +from calibre.ebooks.conversion.plugins.snb_input import SNBInput + +from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput +from calibre.ebooks.conversion.plugins.fb2_output import FB2Output +from calibre.ebooks.conversion.plugins.lit_output import LITOutput +from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput +from calibre.ebooks.conversion.plugins.mobi_output import MOBIOutput +from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput +from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput +from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput +from calibre.ebooks.conversion.plugins.pml_output import PMLOutput +from calibre.ebooks.conversion.plugins.rb_output import RBOutput +from calibre.ebooks.conversion.plugins.rtf_output import RTFOutput +from calibre.ebooks.conversion.plugins.tcr_output import TCROutput +from calibre.ebooks.conversion.plugins.txt_output import TXTOutput, TXTZOutput +from calibre.ebooks.conversion.plugins.html_output import HTMLOutput +from calibre.ebooks.conversion.plugins.htmlz_output import HTMLZOutput +from calibre.ebooks.conversion.plugins.snb_output import SNBOutput + plugins += [ ComicInput, DJVUInput, @@ -642,6 +594,66 @@ plugins += [ HTMLZOutput, SNBOutput, ] +# }}} + +# Catalog plugins {{{ +from calibre.library.catalogs.csv_xml import CSV_XML +from calibre.library.catalogs.bibtex import BIBTEX +from calibre.library.catalogs.epub_mobi import EPUB_MOBI +plugins += [CSV_XML, BIBTEX, EPUB_MOBI] +# }}} + +# EPUB Fix plugins {{{ +from calibre.ebooks.epub.fix.unmanifested import Unmanifested +from calibre.ebooks.epub.fix.epubcheck import Epubcheck +plugins += [Unmanifested, Epubcheck] +# }}} + +# Profiles {{{ +from calibre.customize.profiles import input_profiles, output_profiles +plugins += input_profiles + output_profiles +# }}} + +# Device driver plugins {{{ +from calibre.devices.apple.driver import ITUNES +from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA +from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK +from calibre.devices.cybook.driver import CYBOOK, ORIZON +from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK, + POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, + BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602, + POCKETBOOK701, POCKETBOOK360P, PI2) +from calibre.devices.iliad.driver import ILIAD +from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800 +from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI +from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX, + KINDLE_FIRE) +from calibre.devices.nook.driver import NOOK, NOOK_COLOR +from calibre.devices.prs505.driver import PRS505 +from calibre.devices.prst1.driver import PRST1 +from calibre.devices.user_defined.driver import USER_DEFINED +from calibre.devices.android.driver import ANDROID, S60, WEBOS +from calibre.devices.nokia.driver import N770, N810, E71X, E52 +from calibre.devices.eslick.driver import ESLICK, EBK52 +from calibre.devices.nuut2.driver import NUUT2 +from calibre.devices.iriver.driver import IRIVER_STORY +from calibre.devices.binatone.driver import README +from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK, + LIBREAIR, ODYSSEY) +from calibre.devices.edge.driver import EDGE +from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS, + SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER) +from calibre.devices.sne.driver import SNE +from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, + GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, + TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G) +from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG +from calibre.devices.kobo.driver import KOBO +from calibre.devices.bambook.driver import BAMBOOK +from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX + + + # Order here matters. The first matched device is the one used. plugins += [ HANLINV3, @@ -716,11 +728,20 @@ plugins += [ BOEYE_BDX, USER_DEFINED, ] -plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ - x.__name__.endswith('MetadataReader')] -plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ - x.__name__.endswith('MetadataWriter')] -plugins += input_profiles + output_profiles +# }}} + +# New metadata download plugins {{{ +from calibre.ebooks.metadata.sources.google import GoogleBooks +from calibre.ebooks.metadata.sources.amazon import Amazon +from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary +from calibre.ebooks.metadata.sources.isbndb import ISBNDB +from calibre.ebooks.metadata.sources.overdrive import OverDrive +from calibre.ebooks.metadata.sources.douban import Douban +from calibre.ebooks.metadata.sources.ozon import Ozon + +plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] + +# }}} # Interface Actions {{{ @@ -1623,3 +1644,34 @@ plugins += [ ] # }}} + +if __name__ == '__main__': + # Test load speed + import subprocess, textwrap + try: + subprocess.check_call(['python', '-c', textwrap.dedent( + ''' + from __future__ import print_function + import time, sys, init_calibre + st = time.time() + import calibre.customize.builtins + t = time.time() - st + ret = 0 + + for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid', + 'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image', + 'sqlite3', 'mechanize', 'httplib', 'xml'): + if x in sys.modules: + ret = 1 + print (x, 'has been loaded by a plugin') + if ret: + print ('\\nA good way to track down what is loading something is to run' + ' python -c "import init_calibre; import calibre.customize.builtins"') + print() + print ('Time taken to import all plugins: %.2f'%t) + sys.exit(ret) + + ''')]) + except subprocess.CalledProcessError: + raise SystemExit(1) + diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 8bb0e55f5e..f6ed6ce3ec 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -5,7 +5,6 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' from itertools import izip -from xml.sax.saxutils import escape from calibre.customize import Plugin as _Plugin @@ -268,6 +267,7 @@ class OutputProfile(Plugin): @classmethod def tags_to_string(cls, tags): + from xml.sax.saxutils import escape return escape(', '.join(tags)) class iPadOutput(OutputProfile): diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index e309533235..b365eb1346 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -447,11 +447,14 @@ def plugin_for_catalog_format(fmt): # }}} -def device_plugins(): # {{{ +def device_plugins(include_disabled=False): # {{{ for plugin in _initialized_plugins: if isinstance(plugin, DevicePlugin): - if not is_disabled(plugin): + if include_disabled or not is_disabled(plugin): if platform in plugin.supported_platforms: + if getattr(plugin, 'plugin_needs_delayed_initialization', + False): + plugin.do_delayed_plugin_initialization() yield plugin # }}} @@ -496,7 +499,7 @@ def initialize_plugin(plugin, path_to_zip_file): def has_external_plugins(): return bool(config['plugins']) -def initialize_plugins(): +def initialize_plugins(perf=False): global _initialized_plugins _initialized_plugins = [] conflicts = [name for name in config['plugins'] if name in @@ -504,6 +507,11 @@ def initialize_plugins(): for p in conflicts: remove_plugin(p) external_plugins = config['plugins'] + ostdout, ostderr = sys.stdout, sys.stderr + if perf: + from collections import defaultdict + import time + times = defaultdict(lambda:0) for zfp in list(external_plugins) + builtin_plugins: try: if not isinstance(zfp, type): @@ -516,12 +524,22 @@ def initialize_plugins(): plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp except PluginNotFound: continue + if perf: + st = time.time() plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp) + if perf: + times[plugin.name] = time.time() - st _initialized_plugins.append(plugin) except: print 'Failed to initialize plugin:', repr(zfp) if DEBUG: traceback.print_exc() + # Prevent a custom plugin from overriding stdout/stderr as this breaks + # ipython + sys.stdout, sys.stderr = ostdout, ostderr + if perf: + for x in sorted(times, key=lambda x:times[x]): + print ('%50s: %.3f'%(x, times[x])) _initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True) reread_filetype_plugins() reread_metadata_plugins() diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 210b277788..eca9b2a679 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -221,6 +221,20 @@ class ANDROID(USBMS): drives['main'] = letter_a return drives + @classmethod + def configure_for_kindle_app(cls): + proxy = cls._configProxy() + proxy['format_map'] = ['mobi', 'azw', 'azw1', 'azw4', 'pdf'] + proxy['use_subdirs'] = False + proxy['extra_customization'] = ','.join(['kindle']+cls.EBOOK_DIR_MAIN) + + @classmethod + def configure_for_generic_epub_app(cls): + proxy = cls._configProxy() + del proxy['format_map'] + del proxy['use_subdirs'] + del proxy['extra_customization'] + class S60(USBMS): name = 'S60 driver' diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index b6d258ad81..524a62224f 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -8,27 +8,39 @@ __docformat__ = 'restructuredtext en' import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time from calibre.constants import __appname__, __version__, DEBUG -from calibre import fit_image, confirm_config_name +from calibre import fit_image, confirm_config_name, strftime as _strftime from calibre.constants import isosx, iswindows from calibre.devices.errors import OpenFeedback, UserFeedback from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre.devices.interface import DevicePlugin -from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.metadata import authors_to_string, MetaInformation, title_sort from calibre.ebooks.metadata.book.base import Metadata -from calibre.ebooks.metadata.epub import set_metadata -from calibre.library.server.utils import strftime from calibre.utils.config import config_dir, dynamic, prefs from calibre.utils.date import now, parse_date -from calibre.utils.logging import Log from calibre.utils.zipfile import ZipFile +def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None): + + if not hasattr(dt, 'timetuple'): + dt = now() + dt = dt.timetuple() + try: + return _strftime(fmt, dt) + except: + return _strftime(fmt, now().timetuple()) + +_log = None +def logger(): + global _log + if _log is None: + from calibre.utils.logging import ThreadSafeLog + _log = ThreadSafeLog() + return _log class AppleOpenFeedback(OpenFeedback): def __init__(self, plugin): OpenFeedback.__init__(self, u'') - self.log = plugin.log self.plugin = plugin def custom_dialog(self, parent): @@ -78,19 +90,18 @@ class AppleOpenFeedback(OpenFeedback): self.finished.connect(self.do_it) def do_it(self, return_code): + from calibre.utils.logging import default_log if return_code == self.Accepted: - self.cd.log.info(" Apple driver ENABLED") + default_log.info(" Apple driver ENABLED") dynamic[confirm_config_name(self.cd.plugin.DISPLAY_DISABLE_DIALOG)] = False else: from calibre.customize.ui import disable_plugin - self.cd.log.info(" Apple driver DISABLED") + default_log.info(" Apple driver DISABLED") disable_plugin(self.cd.plugin) return Dialog(parent, self) -from PIL import Image as PILImage -from lxml import etree if isosx: try: @@ -297,7 +308,6 @@ class ITUNES(DriverBase): iTunes= None iTunes_local_storage = None library_orphans = None - log = Log() manual_sync_mode = False path_template = 'iTunes/%s - %s.%s' plugboards = None @@ -323,7 +333,7 @@ class ITUNES(DriverBase): L{books}(oncard='cardb')). ''' if DEBUG: - self.log.info("ITUNES.add_books_to_metadata()") + logger().info("ITUNES.add_books_to_metadata()") task_count = float(len(self.update_list)) @@ -337,10 +347,10 @@ class ITUNES(DriverBase): for (j,p_book) in enumerate(self.update_list): if False: if isosx: - self.log.info(" looking for '%s' by %s uuid:%s" % + logger().info(" looking for '%s' by %s uuid:%s" % (p_book['title'],p_book['author'], p_book['uuid'])) elif iswindows: - self.log.info(" looking for '%s' by %s (%s)" % + logger().info(" looking for '%s' by %s (%s)" % (p_book['title'],p_book['author'], p_book['uuid'])) # Purge the booklist, self.cached_books @@ -350,10 +360,10 @@ class ITUNES(DriverBase): booklists[0].pop(i) if False: if isosx: - self.log.info(" removing old %s %s from booklists[0]" % + logger().info(" removing old %s %s from booklists[0]" % (p_book['title'], str(p_book['lib_book'])[-9:])) elif iswindows: - self.log.info(" removing old '%s' from booklists[0]" % + logger().info(" removing old '%s' from booklists[0]" % (p_book['title'])) # If >1 matching uuid, remove old title @@ -383,7 +393,7 @@ class ITUNES(DriverBase): # for new_book in metadata[0]: for new_book in locations[0]: if DEBUG: - self.log.info(" adding '%s' by '%s' to booklists[0]" % + logger().info(" adding '%s' by '%s' to booklists[0]" % (new_book.title, new_book.author)) booklists[0].append(new_book) @@ -408,15 +418,15 @@ class ITUNES(DriverBase): """ if not oncard: if DEBUG: - self.log.info("ITUNES:books():") + logger().info("ITUNES:books():") if self.settings().extra_customization[self.CACHE_COVERS]: - self.log.info(" Cover fetching/caching enabled") + logger().info(" Cover fetching/caching enabled") else: - self.log.info(" Cover fetching/caching disabled") + logger().info(" Cover fetching/caching disabled") # Fetch a list of books from iPod device connected to iTunes if 'iPod' in self.sources: - booklist = BookList(self.log) + booklist = BookList(logger()) cached_books = {} if isosx: @@ -507,7 +517,7 @@ class ITUNES(DriverBase): self._dump_cached_books('returning from books()',indent=2) return booklist else: - return BookList(self.log) + return BookList(logger()) def can_handle(self, device_info, debug=False): ''' @@ -544,7 +554,7 @@ class ITUNES(DriverBase): # We need to know if iTunes sees the iPad # It may have been ejected if DEBUG: - self.log.info("ITUNES.can_handle()") + logger().info("ITUNES.can_handle()") self._launch_iTunes() self.sources = self._get_sources() @@ -557,15 +567,15 @@ class ITUNES(DriverBase): attempts -= 1 time.sleep(0.5) if DEBUG: - self.log.warning(" waiting for connected iPad, attempt #%d" % (10 - attempts)) + logger().warning(" waiting for connected iPad, attempt #%d" % (10 - attempts)) else: if DEBUG: - self.log.info(' found connected iPad') + logger().info(' found connected iPad') break else: # iTunes running, but not connected iPad if DEBUG: - self.log.info(' self.ejected = True') + logger().info(' self.ejected = True') self.ejected = True return False @@ -599,26 +609,26 @@ class ITUNES(DriverBase): sys.stdout.write('.') sys.stdout.flush() if DEBUG: - self.log.info('ITUNES.can_handle_windows:\n confirming connected iPad') + logger().info('ITUNES.can_handle_windows:\n confirming connected iPad') self.ejected = False self._discover_manual_sync_mode() return True else: if DEBUG: - self.log.info("ITUNES.can_handle_windows():\n device ejected") + logger().info("ITUNES.can_handle_windows():\n device ejected") self.ejected = True return False except: # iTunes connection failed, probably not running anymore - self.log.error("ITUNES.can_handle_windows():\n lost connection to iTunes") + logger().error("ITUNES.can_handle_windows():\n lost connection to iTunes") return False finally: pythoncom.CoUninitialize() else: if DEBUG: - self.log.info("ITUNES:can_handle_windows():\n Launching iTunes") + logger().info("ITUNES:can_handle_windows():\n Launching iTunes") try: pythoncom.CoInitialize() @@ -633,19 +643,19 @@ class ITUNES(DriverBase): attempts -= 1 time.sleep(0.5) if DEBUG: - self.log.warning(" waiting for connected iPad, attempt #%d" % (10 - attempts)) + logger().warning(" waiting for connected iPad, attempt #%d" % (10 - attempts)) else: if DEBUG: - self.log.info(' found connected iPad in iTunes') + logger().info(' found connected iPad in iTunes') break else: # iTunes running, but not connected iPad if DEBUG: - self.log.info(' iDevice has been ejected') + logger().info(' iDevice has been ejected') self.ejected = True return False - self.log.info(' found connected iPad in sources') + logger().info(' found connected iPad in sources') self._discover_manual_sync_mode(wait=1.0) finally: @@ -688,11 +698,11 @@ class ITUNES(DriverBase): self.problem_msg = _("Some books not found in iTunes database.\n" "Delete using the iBooks app.\n" "Click 'Show Details' for a list.") - self.log.info("ITUNES:delete_books()") + logger().info("ITUNES:delete_books()") for path in paths: if self.cached_books[path]['lib_book']: if DEBUG: - self.log.info(" Deleting '%s' from iTunes library" % (path)) + logger().info(" Deleting '%s' from iTunes library" % (path)) if isosx: self._remove_from_iTunes(self.cached_books[path]) @@ -712,7 +722,7 @@ class ITUNES(DriverBase): self.update_needed = True self.update_msg = "Deleted books from device" else: - self.log.info(" skipping sync phase, manual_sync_mode: True") + logger().info(" skipping sync phase, manual_sync_mode: True") else: if self.manual_sync_mode: metadata = MetaInformation(self.cached_books[path]['title'], @@ -739,7 +749,7 @@ class ITUNES(DriverBase): are pending GUI jobs that need to communicate with the device. ''' if DEBUG: - self.log.info("ITUNES:eject(): ejecting '%s'" % self.sources['iPod']) + logger().info("ITUNES:eject(): ejecting '%s'" % self.sources['iPod']) if isosx: self.iTunes.eject(self.sources['iPod']) elif iswindows: @@ -768,7 +778,7 @@ class ITUNES(DriverBase): In Windows, a sync-in-progress blocks this call until sync is complete """ if DEBUG: - self.log.info("ITUNES:free_space()") + logger().info("ITUNES:free_space()") free_space = 0 if isosx: @@ -790,7 +800,7 @@ class ITUNES(DriverBase): pythoncom.CoUninitialize() break except: - self.log.error(' waiting for free_space() call to go through') + logger().error(' waiting for free_space() call to go through') return (free_space,-1,-1) @@ -800,7 +810,7 @@ class ITUNES(DriverBase): @return: (device name, device version, software version on device, mime type) """ if DEBUG: - self.log.info("ITUNES:get_device_information()") + logger().info("ITUNES:get_device_information()") return (self.sources['iPod'],'hw v1.0','sw v1.0', 'mime type normally goes here') @@ -810,7 +820,7 @@ class ITUNES(DriverBase): @param outfile: file object like C{sys.stdout} or the result of an C{open} call ''' if DEBUG: - self.log.info("ITUNES.get_file(): exporting '%s'" % path) + logger().info("ITUNES.get_file(): exporting '%s'" % path) outfile.write(open(self.cached_books[path]['lib_book'].location().path).read()) @@ -830,7 +840,7 @@ class ITUNES(DriverBase): ''' if DEBUG: - self.log.info("ITUNES.open(connected_device: %s)" % repr(connected_device)) + logger().info("ITUNES.open(connected_device: %s)" % repr(connected_device)) # Display a dialog recommending using 'Connect to iTunes' if user hasn't # previously disabled the dialog @@ -838,33 +848,33 @@ class ITUNES(DriverBase): raise AppleOpenFeedback(self) else: if DEBUG: - self.log.warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE) + logger().warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE) # Confirm/create thumbs archive if not os.path.exists(self.cache_dir): if DEBUG: - self.log.info(" creating thumb cache at '%s'" % self.cache_dir) + logger().info(" creating thumb cache at '%s'" % self.cache_dir) os.makedirs(self.cache_dir) if not os.path.exists(self.archive_path): - self.log.info(" creating zip archive") + logger().info(" creating zip archive") zfw = ZipFile(self.archive_path, mode='w') zfw.writestr("iTunes Thumbs Archive",'') zfw.close() else: if DEBUG: - self.log.info(" existing thumb cache at '%s'" % self.archive_path) + logger().info(" existing thumb cache at '%s'" % self.archive_path) # If enabled in config options, create/confirm an iTunes storage folder if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]: self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage') if not os.path.exists(self.iTunes_local_storage): if DEBUG: - self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage) + logger()(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage) os.mkdir(self.iTunes_local_storage) else: if DEBUG: - self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage) + logger()(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage) def remove_books_from_metadata(self, paths, booklists): ''' @@ -879,11 +889,11 @@ class ITUNES(DriverBase): as uuids are different ''' if DEBUG: - self.log.info("ITUNES.remove_books_from_metadata()") + logger().info("ITUNES.remove_books_from_metadata()") for path in paths: if DEBUG: self._dump_cached_book(self.cached_books[path], indent=2) - self.log.info(" looking for '%s' by '%s' uuid:%s" % + logger().info(" looking for '%s' by '%s' uuid:%s" % (self.cached_books[path]['title'], self.cached_books[path]['author'], self.cached_books[path]['uuid'])) @@ -891,19 +901,19 @@ class ITUNES(DriverBase): # Purge the booklist, self.cached_books, thumb cache for i,bl_book in enumerate(booklists[0]): if False: - self.log.info(" evaluating '%s' by '%s' uuid:%s" % + logger().info(" evaluating '%s' by '%s' uuid:%s" % (bl_book.title, bl_book.author,bl_book.uuid)) found = False if bl_book.uuid == self.cached_books[path]['uuid']: if False: - self.log.info(" matched with uuid") + logger().info(" matched with uuid") booklists[0].pop(i) found = True elif bl_book.title == self.cached_books[path]['title'] and \ bl_book.author[0] == self.cached_books[path]['author']: if False: - self.log.info(" matched with title + author") + logger().info(" matched with title + author") booklists[0].pop(i) found = True @@ -924,17 +934,17 @@ class ITUNES(DriverBase): thumb = None if thumb: if DEBUG: - self.log.info(" deleting '%s' from cover cache" % (thumb_path)) + logger().info(" deleting '%s' from cover cache" % (thumb_path)) zf.delete(thumb_path) else: if DEBUG: - self.log.info(" '%s' not found in cover cache" % thumb_path) + logger().info(" '%s' not found in cover cache" % thumb_path) zf.close() break else: if DEBUG: - self.log.error(" unable to find '%s' by '%s' (%s)" % + logger().error(" unable to find '%s' by '%s' (%s)" % (bl_book.title, bl_book.author,bl_book.uuid)) if False: @@ -953,7 +963,7 @@ class ITUNES(DriverBase): :detected_device: Device information from the device scanner """ if DEBUG: - self.log.info("ITUNES.reset()") + logger().info("ITUNES.reset()") if report_progress: self.set_progress_reporter(report_progress) @@ -965,7 +975,7 @@ class ITUNES(DriverBase): task does not have any progress information ''' if DEBUG: - self.log.info("ITUNES.set_progress_reporter()") + logger().info("ITUNES.set_progress_reporter()") self.report_progress = report_progress @@ -973,8 +983,8 @@ class ITUNES(DriverBase): # This method is called with the plugboard that matches the format # declared in use_plugboard_ext and a device name of ITUNES if DEBUG: - self.log.info("ITUNES.set_plugboard()") - #self.log.info(' plugboard: %s' % plugboards) + logger().info("ITUNES.set_plugboard()") + #logger().info(' plugboard: %s' % plugboards) self.plugboards = plugboards self.plugboard_func = pb_func @@ -987,11 +997,11 @@ class ITUNES(DriverBase): ''' if DEBUG: - self.log.info("ITUNES.sync_booklists()") + logger().info("ITUNES.sync_booklists()") if self.update_needed: if DEBUG: - self.log.info(' calling _update_device') + logger().info(' calling _update_device') self._update_device(msg=self.update_msg, wait=False) self.update_needed = False @@ -1014,7 +1024,7 @@ class ITUNES(DriverBase): particular device doesn't have any of these locations it should return 0. """ if DEBUG: - self.log.info("ITUNES:total_space()") + logger().info("ITUNES:total_space()") capacity = 0 if isosx: if 'iPod' in self.sources: @@ -1052,7 +1062,7 @@ class ITUNES(DriverBase): "Click 'Show Details' for a list.") if DEBUG: - self.log.info("ITUNES.upload_books()") + logger().info("ITUNES.upload_books()") if isosx: for (i,fpath) in enumerate(files): @@ -1069,8 +1079,8 @@ class ITUNES(DriverBase): # Add new_book to self.cached_books if DEBUG: - self.log.info("ITUNES.upload_books()") - self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % + logger().info("ITUNES.upload_books()") + logger().info(" adding '%s' by '%s' uuid:%s to self.cached_books" % (metadata[i].title, authors_to_string(metadata[i].authors), metadata[i].uuid)) @@ -1113,8 +1123,8 @@ class ITUNES(DriverBase): # Add new_book to self.cached_books if DEBUG: - self.log.info("ITUNES.upload_books()") - self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % + logger().info("ITUNES.upload_books()") + logger().info(" adding '%s' by '%s' uuid:%s to self.cached_books" % (metadata[i].title, authors_to_string(metadata[i].authors), metadata[i].uuid)) @@ -1151,7 +1161,7 @@ class ITUNES(DriverBase): ''' assumes pythoncom wrapper for windows ''' - self.log.info(" ITUNES._add_device_book()") + logger().info(" ITUNES._add_device_book()") if isosx: if 'iPod' in self.sources: connected_device = self.sources['iPod'] @@ -1161,12 +1171,12 @@ class ITUNES(DriverBase): break else: if DEBUG: - self.log.error(" Device|Books playlist not found") + logger().error(" Device|Books playlist not found") # Add the passed book to the Device|Books playlist added = pl.add(appscript.mactypes.File(fpath),to=pl) if False: - self.log.info(" '%s' added to Device|Books" % metadata.title) + logger().info(" '%s' added to Device|Books" % metadata.title) self._wait_for_writable_metadata(added) return added @@ -1183,7 +1193,7 @@ class ITUNES(DriverBase): break else: if DEBUG: - self.log.info(" no Books playlist found") + logger().info(" no Books playlist found") # Add the passed book to the Device|Books playlist if pl: @@ -1245,7 +1255,7 @@ class ITUNES(DriverBase): windows assumes pythoncom wrapper ''' if DEBUG: - self.log.info(" ITUNES._add_library_book()") + logger().info(" ITUNES._add_library_book()") if isosx: added = self.iTunes.add(appscript.mactypes.File(file)) @@ -1256,9 +1266,9 @@ class ITUNES(DriverBase): fa = FileArray(file_s) op_status = lib.AddFiles(fa) if DEBUG: - self.log.info(" file added to Library|Books") + logger().info(" file added to Library|Books") - self.log.info(" iTunes adding '%s'" % file) + logger().info(" iTunes adding '%s'" % file) if DEBUG: sys.stdout.write(" iTunes copying '%s' ..." % metadata.title) @@ -1312,7 +1322,7 @@ class ITUNES(DriverBase): fp = cached_book['lib_book'].Location ''' if DEBUG: - self.log.info(" ITUNES._add_new_copy()") + logger().info(" ITUNES._add_new_copy()") if fpath.rpartition('.')[2].lower() == 'epub': self._update_epub_metadata(fpath, metadata) @@ -1333,7 +1343,7 @@ class ITUNES(DriverBase): db_added = self._add_device_book(fpath, metadata) lb_added = self._add_library_book(fpath, metadata) if not lb_added and DEBUG: - self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title) + logger().warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title) else: lb_added = self._add_library_book(fpath, metadata) if not lb_added: @@ -1348,8 +1358,10 @@ class ITUNES(DriverBase): assumes pythoncom wrapper for db_added as of iTunes 9.2, iBooks 1.1, can't set artwork for PDF files via automation ''' + from PIL import Image as PILImage + if DEBUG: - self.log.info(" ITUNES._cover_to_thumb()") + logger().info(" ITUNES._cover_to_thumb()") thumb = None if metadata.cover: @@ -1366,7 +1378,7 @@ class ITUNES(DriverBase): scaled, nwidth, nheight = fit_image(width, height, self.MAX_COVER_WIDTH, self.MAX_COVER_HEIGHT) if scaled: if DEBUG: - self.log.info(" cover scaled from %sx%s to %sx%s" % + logger().info(" cover scaled from %sx%s to %sx%s" % (width,height,nwidth,nheight)) img = img.resize((nwidth, nheight), PILImage.ANTIALIAS) cd = cStringIO.StringIO() @@ -1378,7 +1390,7 @@ class ITUNES(DriverBase): cover_data = cd.read() except: self.problem_titles.append("'%s' by %s" % (metadata.title, authors_to_string(metadata.authors))) - self.log.error(" error scaling '%s' for '%s'" % (metadata.cover,metadata.title)) + logger().error(" error scaling '%s' for '%s'" % (metadata.cover,metadata.title)) import traceback traceback.print_exc() @@ -1396,17 +1408,17 @@ class ITUNES(DriverBase): lb_added.artworks[1].data_.set(cover_data) except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " adding artwork to '%s' in the iTunes Library" % metadata.title) pass if db_added: try: db_added.artworks[1].data_.set(cover_data) - self.log.info(" writing '%s' cover to iDevice" % metadata.title) + logger().info(" writing '%s' cover to iDevice" % metadata.title) except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " adding artwork to '%s' on the iDevice" % metadata.title) #import traceback #traceback.print_exc() @@ -1428,7 +1440,7 @@ class ITUNES(DriverBase): lb_added.AddArtworkFromFile(tc) except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " when adding artwork to '%s' in the iTunes Library" % metadata.title) pass @@ -1440,7 +1452,7 @@ class ITUNES(DriverBase): elif format == 'pdf': if DEBUG: - self.log.info(" unable to set PDF cover via automation interface") + logger().info(" unable to set PDF cover via automation interface") try: # Resize for thumb @@ -1455,13 +1467,13 @@ class ITUNES(DriverBase): # Refresh the thumbnail cache if DEBUG: - self.log.info( " refreshing cached thumb for '%s'" % metadata.title) + logger().info( " refreshing cached thumb for '%s'" % metadata.title) zfw = ZipFile(self.archive_path, mode='a') thumb_path = path.rpartition('.')[0] + '.jpg' zfw.writestr(thumb_path, thumb) except: self.problem_titles.append("'%s' by %s" % (metadata.title, authors_to_string(metadata.authors))) - self.log.error(" error converting '%s' to thumb for '%s'" % (metadata.cover,metadata.title)) + logger().error(" error converting '%s' to thumb for '%s'" % (metadata.cover,metadata.title)) finally: try: zfw.close() @@ -1469,14 +1481,14 @@ class ITUNES(DriverBase): pass else: if DEBUG: - self.log.info(" no cover defined in metadata for '%s'" % metadata.title) + logger().info(" no cover defined in metadata for '%s'" % metadata.title) return thumb def _create_new_book(self,fpath, metadata, path, db_added, lb_added, thumb, format): ''' ''' if DEBUG: - self.log.info(" ITUNES._create_new_book()") + logger().info(" ITUNES._create_new_book()") this_book = Book(metadata.title, authors_to_string(metadata.authors)) this_book.datetime = time.gmtime() @@ -1525,7 +1537,7 @@ class ITUNES(DriverBase): wait is passed when launching iTunes, as it seems to need a moment to come to its senses ''' if DEBUG: - self.log.info(" ITUNES._discover_manual_sync_mode()") + logger().info(" ITUNES._discover_manual_sync_mode()") if wait: time.sleep(wait) if isosx: @@ -1537,12 +1549,12 @@ class ITUNES(DriverBase): dev_books = pl.file_tracks() break else: - self.log.error(" book_playlist not found") + logger().error(" book_playlist not found") if dev_books is not None and len(dev_books): first_book = dev_books[0] if False: - self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist())) + logger().info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist())) try: first_book.bpm.set(0) self.manual_sync_mode = True @@ -1550,7 +1562,7 @@ class ITUNES(DriverBase): self.manual_sync_mode = False else: if DEBUG: - self.log.info(" adding tracer to empty Books|Playlist") + logger().info(" adding tracer to empty Books|Playlist") try: added = pl.add(appscript.mactypes.File(P('tracer.epub')),to=pl) time.sleep(0.5) @@ -1573,7 +1585,7 @@ class ITUNES(DriverBase): if dev_books is not None and dev_books.Count: first_book = dev_books.Item(1) #if DEBUG: - #self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist)) + #logger().info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist)) try: first_book.BPM = 0 self.manual_sync_mode = True @@ -1581,7 +1593,7 @@ class ITUNES(DriverBase): self.manual_sync_mode = False else: if DEBUG: - self.log.info(" sending tracer to empty Books|Playlist") + logger().info(" sending tracer to empty Books|Playlist") fpath = P('tracer.epub') mi = MetaInformation('Tracer',['calibre']) try: @@ -1592,24 +1604,24 @@ class ITUNES(DriverBase): except: self.manual_sync_mode = False - self.log.info(" iTunes.manual_sync_mode: %s" % self.manual_sync_mode) + logger().info(" iTunes.manual_sync_mode: %s" % self.manual_sync_mode) def _dump_booklist(self, booklist, header=None,indent=0): ''' ''' if header: msg = '\n%sbooklist %s:' % (' '*indent,header) - self.log.info(msg) - self.log.info('%s%s' % (' '*indent,'-' * len(msg))) + logger().info(msg) + logger().info('%s%s' % (' '*indent,'-' * len(msg))) for book in booklist: if isosx: - self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % + logger().info("%s%-40.40s %-30.30s %-10.10s %s" % (' '*indent,book.title, book.author, str(book.library_id)[-9:], book.uuid)) elif iswindows: - self.log.info("%s%-40.40s %-30.30s" % + logger().info("%s%-40.40s %-30.30s" % (' '*indent,book.title, book.author)) - self.log.info() + logger().info() def _dump_cached_book(self, cached_book, header=None,indent=0): ''' @@ -1617,16 +1629,16 @@ class ITUNES(DriverBase): if isosx: if header: msg = '%s%s' % (' '*indent,header) - self.log.info(msg) - self.log.info( "%s%s" % (' '*indent, '-' * len(msg))) - self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % + logger().info(msg) + logger().info( "%s%s" % (' '*indent, '-' * len(msg))) + logger().info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % (' '*indent, 'title', 'author', 'lib_book', 'dev_book', 'uuid')) - self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % + logger().info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % (' '*indent, cached_book['title'], cached_book['author'], @@ -1636,10 +1648,10 @@ class ITUNES(DriverBase): elif iswindows: if header: msg = '%s%s' % (' '*indent,header) - self.log.info(msg) - self.log.info( "%s%s" % (' '*indent, '-' * len(msg))) + logger().info(msg) + logger().info( "%s%s" % (' '*indent, '-' * len(msg))) - self.log.info("%s%-40.40s %-30.30s %s" % + logger().info("%s%-40.40s %-30.30s %s" % (' '*indent, cached_book['title'], cached_book['author'], @@ -1650,11 +1662,11 @@ class ITUNES(DriverBase): ''' if header: msg = '\n%sself.cached_books %s:' % (' '*indent,header) - self.log.info(msg) - self.log.info( "%s%s" % (' '*indent,'-' * len(msg))) + logger().info(msg) + logger().info( "%s%s" % (' '*indent,'-' * len(msg))) if isosx: for cb in self.cached_books.keys(): - self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % + logger().info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % (' '*indent, self.cached_books[cb]['title'], self.cached_books[cb]['author'], @@ -1663,19 +1675,21 @@ class ITUNES(DriverBase): self.cached_books[cb]['uuid'])) elif iswindows: for cb in self.cached_books.keys(): - self.log.info("%s%-40.40s %-30.30s %-4.4s %s" % + logger().info("%s%-40.40s %-30.30s %-4.4s %s" % (' '*indent, self.cached_books[cb]['title'], self.cached_books[cb]['author'], self.cached_books[cb]['format'], self.cached_books[cb]['uuid'])) - self.log.info() + logger().info() def _dump_epub_metadata(self, fpath): ''' ''' - self.log.info(" ITUNES.__get_epub_metadata()") + from calibre.ebooks.BeautifulSoup import BeautifulSoup + + logger().info(" ITUNES.__get_epub_metadata()") title = None author = None timestamp = None @@ -1695,11 +1709,11 @@ class ITUNES(DriverBase): if not title or not author: if DEBUG: - self.log.error(" couldn't extract title/author from %s in %s" % (opf,fpath)) - self.log.error(" title: %s author: %s timestamp: %s" % (title, author, timestamp)) + logger().error(" couldn't extract title/author from %s in %s" % (opf,fpath)) + logger().error(" title: %s author: %s timestamp: %s" % (title, author, timestamp)) else: if DEBUG: - self.log.error(" can't find .opf in %s" % fpath) + logger().error(" can't find .opf in %s" % fpath) zf.close() return (title, author, timestamp) @@ -1720,20 +1734,20 @@ class ITUNES(DriverBase): ''' ''' if DEBUG: - self.log.info("\n library_books:") + logger().info("\n library_books:") for book in library_books: - self.log.info(" %s" % book) - self.log.info() + logger().info(" %s" % book) + logger().info() def _dump_update_list(self,header=None,indent=0): if header and self.update_list: msg = '\n%sself.update_list %s' % (' '*indent,header) - self.log.info(msg) - self.log.info( "%s%s" % (' '*indent,'-' * len(msg))) + logger().info(msg) + logger().info( "%s%s" % (' '*indent,'-' * len(msg))) if isosx: for ub in self.update_list: - self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % + logger().info("%s%-40.40s %-30.30s %-10.10s %s" % (' '*indent, ub['title'], ub['author'], @@ -1741,7 +1755,7 @@ class ITUNES(DriverBase): ub['uuid'])) elif iswindows: for ub in self.update_list: - self.log.info("%s%-40.40s %-30.30s" % + logger().info("%s%-40.40s %-30.30s" % (' '*indent, ub['title'], ub['author'])) @@ -1753,42 +1767,42 @@ class ITUNES(DriverBase): if iswindows: dev_books = self._get_device_books_playlist() if DEBUG: - self.log.info(" ITUNES._find_device_book()") - self.log.info(" searching for '%s' by '%s' (%s)" % + logger().info(" ITUNES._find_device_book()") + logger().info(" searching for '%s' by '%s' (%s)" % (search['title'], search['author'],search['uuid'])) attempts = 9 while attempts: # Try by uuid - only one hit if 'uuid' in search and search['uuid']: if DEBUG: - self.log.info(" searching by uuid '%s' ..." % search['uuid']) + logger().info(" searching by uuid '%s' ..." % search['uuid']) hits = dev_books.Search(search['uuid'],self.SearchField.index('All')) if hits: hit = hits[0] - self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) + logger().info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) return hit # Try by author - there could be multiple hits if search['author']: if DEBUG: - self.log.info(" searching by author '%s' ..." % search['author']) + logger().info(" searching by author '%s' ..." % search['author']) hits = dev_books.Search(search['author'],self.SearchField.index('Artists')) if hits: for hit in hits: if hit.Name == search['title']: if DEBUG: - self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) + logger().info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) return hit # Search by title if no author available if DEBUG: - self.log.info(" searching by title '%s' ..." % search['title']) + logger().info(" searching by title '%s' ..." % search['title']) hits = dev_books.Search(search['title'],self.SearchField.index('All')) if hits: for hit in hits: if hit.Name == search['title']: if DEBUG: - self.log.info(" found '%s'" % (hit.Name)) + logger().info(" found '%s'" % (hit.Name)) return hit # PDF just sent, title not updated yet, look for export pattern @@ -1797,24 +1811,24 @@ class ITUNES(DriverBase): title = re.sub(r'[^0-9a-zA-Z ]', '_', search['title']) author = re.sub(r'[^0-9a-zA-Z ]', '_', search['author']) if DEBUG: - self.log.info(" searching by name: '%s - %s'" % (title,author)) + logger().info(" searching by name: '%s - %s'" % (title,author)) hits = dev_books.Search('%s - %s' % (title,author), self.SearchField.index('All')) if hits: hit = hits[0] - self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) + logger().info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) return hit else: if DEBUG: - self.log.info(" no PDF hits") + logger().info(" no PDF hits") attempts -= 1 time.sleep(0.5) if DEBUG: - self.log.warning(" attempt #%d" % (10 - attempts)) + logger().warning(" attempt #%d" % (10 - attempts)) if DEBUG: - self.log.error(" no hits") + logger().error(" no hits") return None def _find_library_book(self, search): @@ -1823,13 +1837,13 @@ class ITUNES(DriverBase): ''' if iswindows: if DEBUG: - self.log.info(" ITUNES._find_library_book()") + logger().info(" ITUNES._find_library_book()") ''' if 'uuid' in search: - self.log.info(" looking for '%s' by %s (%s)" % + logger().info(" looking for '%s' by %s (%s)" % (search['title'], search['author'], search['uuid'])) else: - self.log.info(" looking for '%s' by %s" % + logger().info(" looking for '%s' by %s" % (search['title'], search['author'])) ''' @@ -1837,11 +1851,11 @@ class ITUNES(DriverBase): if source.Kind == self.Sources.index('Library'): lib = source if DEBUG: - self.log.info(" Library source: '%s' kind: %s" % (lib.Name, self.Sources[lib.Kind])) + logger().info(" Library source: '%s' kind: %s" % (lib.Name, self.Sources[lib.Kind])) break else: if DEBUG: - self.log.info(" Library source not found") + logger().info(" Library source not found") if lib is not None: lib_books = None @@ -1849,12 +1863,12 @@ class ITUNES(DriverBase): if pl.Kind == self.PlaylistKind.index('User') and \ pl.SpecialKind == self.PlaylistSpecialKind.index('Books'): if DEBUG: - self.log.info(" Books playlist: '%s'" % (pl.Name)) + logger().info(" Books playlist: '%s'" % (pl.Name)) lib_books = pl break else: if DEBUG: - self.log.error(" no Books playlist found") + logger().error(" no Books playlist found") attempts = 9 @@ -1862,35 +1876,35 @@ class ITUNES(DriverBase): # Find book whose Album field = search['uuid'] if 'uuid' in search and search['uuid']: if DEBUG: - self.log.info(" searching by uuid '%s' ..." % search['uuid']) + logger().info(" searching by uuid '%s' ..." % search['uuid']) hits = lib_books.Search(search['uuid'],self.SearchField.index('All')) if hits: hit = hits[0] if DEBUG: - self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) + logger().info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) return hit # Search by author if known if search['author']: if DEBUG: - self.log.info(" searching by author '%s' ..." % search['author']) + logger().info(" searching by author '%s' ..." % search['author']) hits = lib_books.Search(search['author'],self.SearchField.index('Artists')) if hits: for hit in hits: if hit.Name == search['title']: if DEBUG: - self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) + logger().info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) return hit # Search by title if no author available if DEBUG: - self.log.info(" searching by title '%s' ..." % search['title']) + logger().info(" searching by title '%s' ..." % search['title']) hits = lib_books.Search(search['title'],self.SearchField.index('All')) if hits: for hit in hits: if hit.Name == search['title']: if DEBUG: - self.log.info(" found '%s'" % (hit.Name)) + logger().info(" found '%s'" % (hit.Name)) return hit # PDF just sent, title not updated yet, look for export pattern @@ -1899,24 +1913,24 @@ class ITUNES(DriverBase): title = re.sub(r'[^0-9a-zA-Z ]', '_', search['title']) author = re.sub(r'[^0-9a-zA-Z ]', '_', search['author']) if DEBUG: - self.log.info(" searching by name: %s - %s" % (title,author)) + logger().info(" searching by name: %s - %s" % (title,author)) hits = lib_books.Search('%s - %s' % (title,author), self.SearchField.index('All')) if hits: hit = hits[0] - self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) + logger().info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Composer)) return hit else: if DEBUG: - self.log.info(" no PDF hits") + logger().info(" no PDF hits") attempts -= 1 time.sleep(0.5) if DEBUG: - self.log.warning(" attempt #%d" % (10 - attempts)) + logger().warning(" attempt #%d" % (10 - attempts)) if DEBUG: - self.log.error(" search for '%s' yielded no hits" % search['title']) + logger().error(" search for '%s' yielded no hits" % search['title']) return None def _generate_thumbnail(self, book_path, book): @@ -1926,6 +1940,7 @@ class ITUNES(DriverBase): cache_dir = os.path.join(config_dir, 'caches', 'itunes') as of iTunes 9.2, iBooks 1.1, can't set artwork for PDF files via automation ''' + from PIL import Image as PILImage if not self.settings().extra_customization[self.CACHE_COVERS]: thumb_data = None @@ -1942,18 +1957,18 @@ class ITUNES(DriverBase): thumb_data = zfr.read(thumb_path) if thumb_data == 'None': if False: - self.log.info(" ITUNES._generate_thumbnail()\n returning None from cover cache for '%s'" % title) + logger().info(" ITUNES._generate_thumbnail()\n returning None from cover cache for '%s'" % title) zfr.close() return None except: zfw = ZipFile(self.archive_path, mode='a') else: if False: - self.log.info(" returning thumb from cache for '%s'" % title) + logger().info(" returning thumb from cache for '%s'" % title) return thumb_data if DEBUG: - self.log.info(" ITUNES._generate_thumbnail('%s'):" % title) + logger().info(" ITUNES._generate_thumbnail('%s'):" % title) if isosx: # Fetch the artwork from iTunes @@ -1962,7 +1977,7 @@ class ITUNES(DriverBase): except: # If no artwork, write an empty marker to cache if DEBUG: - self.log.error(" error fetching iTunes artwork for '%s'" % title) + logger().error(" error fetching iTunes artwork for '%s'" % title) zfw.writestr(thumb_path, 'None') zfw.close() return None @@ -1979,12 +1994,12 @@ class ITUNES(DriverBase): thumb_data = thumb.getvalue() thumb.close() if False: - self.log.info(" generated thumb for '%s', caching" % title) + logger().info(" generated thumb for '%s', caching" % title) # Cache the tagged thumb zfw.writestr(thumb_path, thumb_data) except: if DEBUG: - self.log.error(" error generating thumb for '%s', caching empty marker" % book.name()) + logger().error(" error generating thumb for '%s', caching empty marker" % book.name()) self._dump_hex(data[:32]) thumb_data = None # Cache the empty cover @@ -1999,7 +2014,7 @@ class ITUNES(DriverBase): elif iswindows: if not book.Artwork.Count: if DEBUG: - self.log.info(" no artwork available for '%s'" % book.Name) + logger().info(" no artwork available for '%s'" % book.Name) zfw.writestr(thumb_path, 'None') zfw.close() return None @@ -2019,12 +2034,12 @@ class ITUNES(DriverBase): os.remove(tmp_thumb) thumb.close() if False: - self.log.info(" generated thumb for '%s', caching" % book.Name) + logger().info(" generated thumb for '%s', caching" % book.Name) # Cache the tagged thumb zfw.writestr(thumb_path, thumb_data) except: if DEBUG: - self.log.error(" error generating thumb for '%s', caching empty marker" % book.Name) + logger().error(" error generating thumb for '%s', caching empty marker" % book.Name) thumb_data = None # Cache the empty cover zfw.writestr(thumb_path,'None') @@ -2047,9 +2062,9 @@ class ITUNES(DriverBase): for file in myZipList: exploded_file_size += file.file_size if False: - self.log.info(" ITUNES._get_device_book_size()") - self.log.info(" %d items in archive" % len(myZipList)) - self.log.info(" compressed: %d exploded: %d" % (compressed_size, exploded_file_size)) + logger().info(" ITUNES._get_device_book_size()") + logger().info(" %d items in archive" % len(myZipList)) + logger().info(" compressed: %d exploded: %d" % (compressed_size, exploded_file_size)) myZip.close() return exploded_file_size @@ -2058,7 +2073,7 @@ class ITUNES(DriverBase): Assumes pythoncom wrapper for Windows ''' if DEBUG: - self.log.info("\n ITUNES._get_device_books()") + logger().info("\n ITUNES._get_device_books()") device_books = [] if isosx: @@ -2069,24 +2084,24 @@ class ITUNES(DriverBase): for pl in device.playlists(): if pl.special_kind() == appscript.k.Books: if DEBUG: - self.log.info(" Book playlist: '%s'" % (pl.name())) + logger().info(" Book playlist: '%s'" % (pl.name())) dev_books = pl.file_tracks() break else: - self.log.error(" book_playlist not found") + logger().error(" book_playlist not found") for book in dev_books: # This may need additional entries for international iTunes users if book.kind() in self.Audiobooks: if DEBUG: - self.log.info(" ignoring '%s' of type '%s'" % (book.name(), book.kind())) + logger().info(" ignoring '%s' of type '%s'" % (book.name(), book.kind())) else: if DEBUG: - self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % + logger().info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.name(), book.artist(), book.album(), book.kind())) device_books.append(book) if DEBUG: - self.log.info() + logger().info() elif iswindows: if 'iPod' in self.sources: @@ -2100,24 +2115,24 @@ class ITUNES(DriverBase): if pl.Kind == self.PlaylistKind.index('User') and \ pl.SpecialKind == self.PlaylistSpecialKind.index('Books'): if DEBUG: - self.log.info(" Books playlist: '%s'" % (pl.Name)) + logger().info(" Books playlist: '%s'" % (pl.Name)) dev_books = pl.Tracks break else: if DEBUG: - self.log.info(" no Books playlist found") + logger().info(" no Books playlist found") for book in dev_books: # This may need additional entries for international iTunes users if book.KindAsString in self.Audiobooks: if DEBUG: - self.log.info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString)) + logger().info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString)) else: if DEBUG: - self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) + logger().info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) device_books.append(book) if DEBUG: - self.log.info() + logger().info() finally: pythoncom.CoUninitialize() @@ -2140,7 +2155,7 @@ class ITUNES(DriverBase): break else: if DEBUG: - self.log.error(" no iPad|Books playlist found") + logger().error(" no iPad|Books playlist found") return pl def _get_library_books(self): @@ -2149,7 +2164,7 @@ class ITUNES(DriverBase): Windows assumes pythoncom wrapper ''' if DEBUG: - self.log.info("\n ITUNES._get_library_books()") + logger().info("\n ITUNES._get_library_books()") library_books = {} library_orphans = {} @@ -2160,11 +2175,11 @@ class ITUNES(DriverBase): if source.kind() == appscript.k.library: lib = source if DEBUG: - self.log.info(" Library source: '%s'" % (lib.name())) + logger().info(" Library source: '%s'" % (lib.name())) break else: if DEBUG: - self.log.error(' Library source not found') + logger().error(' Library source not found') if lib is not None: lib_books = None @@ -2172,18 +2187,18 @@ class ITUNES(DriverBase): for pl in lib.playlists(): if pl.special_kind() == appscript.k.Books: if DEBUG: - self.log.info(" Books playlist: '%s'" % (pl.name())) + logger().info(" Books playlist: '%s'" % (pl.name())) break else: if DEBUG: - self.log.info(" no Library|Books playlist found") + logger().info(" no Library|Books playlist found") lib_books = pl.file_tracks() for book in lib_books: # This may need additional entries for international iTunes users if book.kind() in self.Audiobooks: if DEBUG: - self.log.info(" ignoring '%s' of type '%s'" % (book.name(), book.kind())) + logger().info(" ignoring '%s' of type '%s'" % (book.name(), book.kind())) else: # Collect calibre orphans - remnants of recipe uploads format = 'pdf' if book.kind().startswith('PDF') else 'epub' @@ -2193,31 +2208,31 @@ class ITUNES(DriverBase): if book.location() == appscript.k.missing_value: library_orphans[path] = book if False: - self.log.info(" found iTunes PTF '%s' in Library|Books" % book.name()) + logger().info(" found iTunes PTF '%s' in Library|Books" % book.name()) except: if DEBUG: - self.log.error(" iTunes returned an error returning .location() with %s" % book.name()) + logger().error(" iTunes returned an error returning .location() with %s" % book.name()) library_books[path] = book if DEBUG: - self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % + logger().info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.name(), book.artist(), book.album(), book.kind())) else: if DEBUG: - self.log.info(' no Library playlists') + logger().info(' no Library playlists') else: if DEBUG: - self.log.info(' no Library found') + logger().info(' no Library found') elif iswindows: lib = None for source in self.iTunes.sources: if source.Kind == self.Sources.index('Library'): lib = source - self.log.info(" Library source: '%s' kind: %s" % (lib.Name, self.Sources[lib.Kind])) + logger().info(" Library source: '%s' kind: %s" % (lib.Name, self.Sources[lib.Kind])) break else: - self.log.error(" Library source not found") + logger().error(" Library source not found") if lib is not None: lib_books = None @@ -2226,22 +2241,22 @@ class ITUNES(DriverBase): if pl.Kind == self.PlaylistKind.index('User') and \ pl.SpecialKind == self.PlaylistSpecialKind.index('Books'): if DEBUG: - self.log.info(" Books playlist: '%s'" % (pl.Name)) + logger().info(" Books playlist: '%s'" % (pl.Name)) lib_books = pl.Tracks break else: if DEBUG: - self.log.error(" no Library|Books playlist found") + logger().error(" no Library|Books playlist found") else: if DEBUG: - self.log.error(" no Library playlists found") + logger().error(" no Library playlists found") try: for book in lib_books: # This may need additional entries for international iTunes users if book.KindAsString in self.Audiobooks: if DEBUG: - self.log.info(" ignoring %-30.30s of type '%s'" % (book.Name, book.KindAsString)) + logger().info(" ignoring %-30.30s of type '%s'" % (book.Name, book.KindAsString)) else: format = 'pdf' if book.KindAsString.startswith('PDF') else 'epub' path = self.path_template % (book.Name, book.Artist,format) @@ -2251,14 +2266,14 @@ class ITUNES(DriverBase): if not book.Location: library_orphans[path] = book if False: - self.log.info(" found iTunes PTF '%s' in Library|Books" % book.Name) + logger().info(" found iTunes PTF '%s' in Library|Books" % book.Name) library_books[path] = book if DEBUG: - self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) + logger().info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) except: if DEBUG: - self.log.info(" no books in library") + logger().info(" no books in library") self.library_orphans = library_orphans return library_books @@ -2303,7 +2318,7 @@ class ITUNES(DriverBase): # If more than one connected iDevice, remove all from list to prevent driver initialization if kinds.count('iPod') > 1: if DEBUG: - self.log.error(" %d connected iPod devices detected, calibre supports a single connected iDevice" % kinds.count('iPod')) + logger().error(" %d connected iPod devices detected, calibre supports a single connected iDevice" % kinds.count('iPod')) while kinds.count('iPod'): index = kinds.index('iPod') kinds.pop(index) @@ -2323,7 +2338,7 @@ class ITUNES(DriverBase): ''' ''' if DEBUG: - self.log.info(" ITUNES:_launch_iTunes():\n Instantiating iTunes") + logger().info(" ITUNES:_launch_iTunes():\n Instantiating iTunes") if isosx: ''' @@ -2333,7 +2348,7 @@ class ITUNES(DriverBase): running_apps = appscript.app('System Events') if not 'iTunes' in running_apps.processes.name(): if DEBUG: - self.log.info( "ITUNES:_launch_iTunes(): Launching iTunes" ) + logger().info( "ITUNES:_launch_iTunes(): Launching iTunes" ) try: self.iTunes = iTunes= appscript.app('iTunes', hide=True) except: @@ -2355,16 +2370,16 @@ class ITUNES(DriverBase): if os.path.exists(media_dir): self.iTunes_media = media_dir else: - self.log.error(" could not confirm valid iTunes.media_dir from %s" % 'com.apple.itunes') - self.log.error(" media_dir: %s" % media_dir) + logger().error(" could not confirm valid iTunes.media_dir from %s" % 'com.apple.itunes') + logger().error(" media_dir: %s" % media_dir) ''' if DEBUG: - self.log.info(" %s %s" % (__appname__, __version__)) - self.log.info(" [OSX %s - %s (%s), driver version %d.%d.%d]" % + logger().info(" %s %s" % (__appname__, __version__)) + logger().info(" [OSX %s - %s (%s), driver version %d.%d.%d]" % (self.iTunes.name(), self.iTunes.version(), self.initial_status, self.version[0],self.version[1],self.version[2])) - self.log.info(" calibre_library_path: %s" % self.calibre_library_path) + logger().info(" calibre_library_path: %s" % self.calibre_library_path) if iswindows: ''' @@ -2417,19 +2432,19 @@ class ITUNES(DriverBase): if os.path.exists(media_dir): self.iTunes_media = media_dir elif hasattr(string,'parent'): - self.log.error(" could not extract valid iTunes.media_dir from %s" % self.iTunes.LibraryXMLPath) - self.log.error(" %s" % string.parent.prettify()) - self.log.error(" '%s' not found" % media_dir) + logger().error(" could not extract valid iTunes.media_dir from %s" % self.iTunes.LibraryXMLPath) + logger().error(" %s" % string.parent.prettify()) + logger().error(" '%s' not found" % media_dir) else: - self.log.error(" no media dir found: string: %s" % string) + logger().error(" no media dir found: string: %s" % string) ''' if DEBUG: - self.log.info(" %s %s" % (__appname__, __version__)) - self.log.info(" [Windows %s - %s (%s), driver version %d.%d.%d]" % + logger().info(" %s %s" % (__appname__, __version__)) + logger().info(" [Windows %s - %s (%s), driver version %d.%d.%d]" % (self.iTunes.Windows[0].name, self.iTunes.Version, self.initial_status, self.version[0],self.version[1],self.version[2])) - self.log.info(" calibre_library_path: %s" % self.calibre_library_path) + logger().info(" calibre_library_path: %s" % self.calibre_library_path) def _purge_orphans(self,library_books, cached_books): ''' @@ -2438,16 +2453,16 @@ class ITUNES(DriverBase): This occurs when the user deletes a book in iBooks while disconnected ''' if DEBUG: - self.log.info(" ITUNES._purge_orphans()") + logger().info(" ITUNES._purge_orphans()") #self._dump_library_books(library_books) - #self.log.info(" cached_books:\n %s" % "\n ".join(cached_books.keys())) + #logger().info(" cached_books:\n %s" % "\n ".join(cached_books.keys())) for book in library_books: if isosx: if book not in cached_books and \ str(library_books[book].description()).startswith(self.description_prefix): if DEBUG: - self.log.info(" '%s' not found on iDevice, removing from iTunes" % book) + logger().info(" '%s' not found on iDevice, removing from iTunes" % book) btr = { 'title':library_books[book].name(), 'author':library_books[book].artist(), 'lib_book':library_books[book]} @@ -2456,19 +2471,19 @@ class ITUNES(DriverBase): if book not in cached_books and \ library_books[book].Description.startswith(self.description_prefix): if DEBUG: - self.log.info(" '%s' not found on iDevice, removing from iTunes" % book) + logger().info(" '%s' not found on iDevice, removing from iTunes" % book) btr = { 'title':library_books[book].Name, 'author':library_books[book].Artist, 'lib_book':library_books[book]} self._remove_from_iTunes(btr) if DEBUG: - self.log.info() + logger().info() def _remove_existing_copy(self, path, metadata): ''' ''' if DEBUG: - self.log.info(" ITUNES._remove_existing_copy()") + logger().info(" ITUNES._remove_existing_copy()") if self.manual_sync_mode: # Delete existing from Device|Books, add to self.update_list @@ -2480,16 +2495,16 @@ class ITUNES(DriverBase): self.update_list.append(self.cached_books[book]) if DEBUG: - self.log.info( " deleting device book '%s'" % (metadata.title)) + logger().info( " deleting device book '%s'" % (metadata.title)) self._remove_from_device(self.cached_books[book]) if DEBUG: - self.log.info(" deleting library book '%s'" % metadata.title) + logger().info(" deleting library book '%s'" % metadata.title) self._remove_from_iTunes(self.cached_books[book]) break else: if DEBUG: - self.log.info(" '%s' not in cached_books" % metadata.title) + logger().info(" '%s' not in cached_books" % metadata.title) else: # Delete existing from Library|Books, add to self.update_list # for deletion from booklist[0] during add_books_to_metadata @@ -2499,35 +2514,35 @@ class ITUNES(DriverBase): self.cached_books[book]['author'] == authors_to_string(metadata.authors)): self.update_list.append(self.cached_books[book]) if DEBUG: - self.log.info( " deleting library book '%s'" % metadata.title) + logger().info( " deleting library book '%s'" % metadata.title) self._remove_from_iTunes(self.cached_books[book]) break else: if DEBUG: - self.log.info(" '%s' not found in cached_books" % metadata.title) + logger().info(" '%s' not found in cached_books" % metadata.title) def _remove_from_device(self, cached_book): ''' Windows assumes pythoncom wrapper ''' if DEBUG: - self.log.info(" ITUNES._remove_from_device()") + logger().info(" ITUNES._remove_from_device()") if isosx: if DEBUG: - self.log.info(" deleting '%s' from iDevice" % cached_book['title']) + logger().info(" deleting '%s' from iDevice" % cached_book['title']) try: cached_book['dev_book'].delete() except: - self.log.error(" error deleting '%s'" % cached_book['title']) + logger().error(" error deleting '%s'" % cached_book['title']) elif iswindows: hit = self._find_device_book(cached_book) if hit: if DEBUG: - self.log.info(" deleting '%s' from iDevice" % cached_book['title']) + logger().info(" deleting '%s' from iDevice" % cached_book['title']) hit.Delete() else: if DEBUG: - self.log.warning(" unable to remove '%s' by '%s' (%s) from device" % + logger().warning(" unable to remove '%s' by '%s' (%s) from device" % (cached_book['title'],cached_book['author'],cached_book['uuid'])) def _remove_from_iTunes(self, cached_book): @@ -2535,34 +2550,34 @@ class ITUNES(DriverBase): iTunes does not delete books from storage when removing from database via automation ''' if DEBUG: - self.log.info(" ITUNES._remove_from_iTunes():") + logger().info(" ITUNES._remove_from_iTunes():") if isosx: ''' Manually remove the book from iTunes storage ''' try: fp = cached_book['lib_book'].location().path if DEBUG: - self.log.info(" processing %s" % fp) + logger().info(" processing %s" % fp) if fp.startswith(prefs['library_path']): - self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) + logger().info(" '%s' stored in calibre database, not removed" % cached_book['title']) elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \ fp.startswith(self.iTunes_local_storage) and \ os.path.exists(fp): # Delete the copy in iTunes_local_storage os.remove(fp) if DEBUG: - self.log(" removing from iTunes_local_storage") + logger()(" removing from iTunes_local_storage") else: # Delete from iTunes Media folder if os.path.exists(fp): os.remove(fp) if DEBUG: - self.log.info(" deleting from iTunes storage") + logger().info(" deleting from iTunes storage") author_storage_path = os.path.split(fp)[0] try: os.rmdir(author_storage_path) if DEBUG: - self.log.info(" removing empty author directory") + logger().info(" removing empty author directory") except: author_files = os.listdir(author_storage_path) if '.DS_Store' in author_files: @@ -2570,23 +2585,23 @@ class ITUNES(DriverBase): if not author_files: os.rmdir(author_storage_path) if DEBUG: - self.log.info(" removing empty author directory") + logger().info(" removing empty author directory") else: - self.log.info(" '%s' does not exist at storage location" % cached_book['title']) + logger().info(" '%s' does not exist at storage location" % cached_book['title']) except: # We get here if there was an error with .location().path if DEBUG: - self.log.info(" '%s' not found in iTunes storage" % cached_book['title']) + logger().info(" '%s' not found in iTunes storage" % cached_book['title']) # Delete the book from the iTunes database try: self.iTunes.delete(cached_book['lib_book']) if DEBUG: - self.log.info(" removing from iTunes database") + logger().info(" removing from iTunes database") except: if DEBUG: - self.log.info(" unable to remove from iTunes database") + logger().info(" unable to remove from iTunes database") elif iswindows: ''' @@ -2604,43 +2619,43 @@ class ITUNES(DriverBase): if book: if DEBUG: - self.log.info(" processing %s" % fp) + logger().info(" processing %s" % fp) if fp.startswith(prefs['library_path']): - self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) + logger().info(" '%s' stored in calibre database, not removed" % cached_book['title']) elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \ fp.startswith(self.iTunes_local_storage) and \ os.path.exists(fp): # Delete the copy in iTunes_local_storage os.remove(fp) if DEBUG: - self.log(" removing from iTunes_local_storage") + logger()(" removing from iTunes_local_storage") else: # Delete from iTunes Media folder if os.path.exists(fp): os.remove(fp) if DEBUG: - self.log.info(" deleting from iTunes storage") + logger().info(" deleting from iTunes storage") author_storage_path = os.path.split(fp)[0] try: os.rmdir(author_storage_path) if DEBUG: - self.log.info(" removing empty author directory") + logger().info(" removing empty author directory") except: pass else: - self.log.info(" '%s' does not exist at storage location" % cached_book['title']) + logger().info(" '%s' does not exist at storage location" % cached_book['title']) else: if DEBUG: - self.log.info(" '%s' not found in iTunes storage" % cached_book['title']) + logger().info(" '%s' not found in iTunes storage" % cached_book['title']) # Delete the book from the iTunes database try: book.Delete() if DEBUG: - self.log.info(" removing from iTunes database") + logger().info(" removing from iTunes database") except: if DEBUG: - self.log.info(" unable to remove from iTunes database") + logger().info(" unable to remove from iTunes database") def title_sorter(self, title): return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', title).rstrip() @@ -2648,8 +2663,11 @@ class ITUNES(DriverBase): def _update_epub_metadata(self, fpath, metadata): ''' ''' + from calibre.ebooks.metadata.epub import set_metadata + from lxml import etree + if DEBUG: - self.log.info(" ITUNES._update_epub_metadata()") + logger().info(" ITUNES._update_epub_metadata()") # Fetch plugboard updates metadata_x = self._xform_metadata_via_plugboard(metadata, 'epub') @@ -2677,17 +2695,17 @@ class ITUNES(DriverBase): metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour, old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo) if DEBUG: - self.log.info(" existing timestamp: %s" % metadata.timestamp) + logger().info(" existing timestamp: %s" % metadata.timestamp) else: metadata.timestamp = now() if DEBUG: - self.log.info(" add timestamp: %s" % metadata.timestamp) + logger().info(" add timestamp: %s" % metadata.timestamp) else: metadata.timestamp = now() if DEBUG: - self.log.warning(" missing block in OPF file") - self.log.info(" add timestamp: %s" % metadata.timestamp) + logger().warning(" missing block in OPF file") + logger().info(" add timestamp: %s" % metadata.timestamp) zf_opf.close() @@ -2717,7 +2735,7 @@ class ITUNES(DriverBase): Trigger a sync, wait for completion ''' if DEBUG: - self.log.info(" ITUNES:_update_device():\n %s" % msg) + logger().info(" ITUNES:_update_device():\n %s" % msg) if isosx: self.iTunes.update() @@ -2763,7 +2781,7 @@ class ITUNES(DriverBase): ''' ''' if DEBUG: - self.log.info(" ITUNES._update_iTunes_metadata()") + logger().info(" ITUNES._update_iTunes_metadata()") STRIP_TAGS = re.compile(r'<[^<]*?/?>') @@ -2815,8 +2833,8 @@ class ITUNES(DriverBase): # If title_sort applied in plugboard, that overrides using series/index as title_sort if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]: if DEBUG: - self.log.info(" ITUNES._update_iTunes_metadata()") - self.log.info(" using Series name '%s' as Genre" % metadata_x.series) + logger().info(" ITUNES._update_iTunes_metadata()") + logger().info(" using Series name '%s' as Genre" % metadata_x.series) # Format the index as a sort key index = metadata_x.series_index @@ -2840,7 +2858,7 @@ class ITUNES(DriverBase): break if db_added: - self.log.warning(" waiting for db_added to become writeable ") + logger().warning(" waiting for db_added to become writeable ") time.sleep(1.0) # If no title_sort plugboard tweak, create sort_name from series/index if metadata.title_sort == metadata_x.title_sort: @@ -2860,7 +2878,7 @@ class ITUNES(DriverBase): elif metadata_x.tags is not None: if DEBUG: - self.log.info(" %susing Tag as Genre" % + logger().info(" %susing Tag as Genre" % "no Series name available, " if self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY] else '') for tag in metadata_x.tags: if self._is_alpha(tag[0]): @@ -2883,7 +2901,7 @@ class ITUNES(DriverBase): lb_added.Year = metadata_x.pubdate.year if db_added: - self.log.warning(" waiting for db_added to become writeable ") + logger().warning(" waiting for db_added to become writeable ") time.sleep(1.0) db_added.Name = metadata_x.title db_added.Album = metadata_x.title @@ -2910,7 +2928,7 @@ class ITUNES(DriverBase): db_added.AlbumRating = (metadata_x.rating*10) except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " setting AlbumRating on iDevice") # Set Genre from first alpha tag, overwrite with series if available @@ -2919,7 +2937,7 @@ class ITUNES(DriverBase): if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]: if DEBUG: - self.log.info(" using Series name as Genre") + logger().info(" using Series name as Genre") # Format the index as a sort key index = metadata_x.series_index integer = int(index) @@ -2935,13 +2953,13 @@ class ITUNES(DriverBase): lb_added.TrackNumber = metadata_x.series_index except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " setting TrackNumber in iTunes") try: lb_added.EpisodeNumber = metadata_x.series_index except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " setting EpisodeNumber in iTunes") # If no plugboard transform applied to tags, change the Genre/Category to Series @@ -2963,13 +2981,13 @@ class ITUNES(DriverBase): db_added.TrackNumber = metadata_x.series_index except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " setting TrackNumber on iDevice") try: db_added.EpisodeNumber = metadata_x.series_index except: if DEBUG: - self.log.warning(" iTunes automation interface reported an error" + logger().warning(" iTunes automation interface reported an error" " setting EpisodeNumber on iDevice") # If no plugboard transform applied to tags, change the Genre/Category to Series @@ -2983,7 +3001,7 @@ class ITUNES(DriverBase): elif metadata_x.tags is not None: if DEBUG: - self.log.info(" using Tag as Genre") + logger().info(" using Tag as Genre") for tag in metadata_x.tags: if self._is_alpha(tag[0]): if lb_added: @@ -2997,8 +3015,8 @@ class ITUNES(DriverBase): Ensure iDevice metadata is writable. Direct connect mode only ''' if DEBUG: - self.log.info(" ITUNES._wait_for_writable_metadata()") - self.log.warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE) + logger().info(" ITUNES._wait_for_writable_metadata()") + logger().warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE) attempts = 9 while attempts: @@ -3012,40 +3030,40 @@ class ITUNES(DriverBase): attempts -= 1 time.sleep(delay) if DEBUG: - self.log.warning(" waiting %.1f seconds for iDevice metadata to become writable (attempt #%d)" % + logger().warning(" waiting %.1f seconds for iDevice metadata to become writable (attempt #%d)" % (delay, (10 - attempts))) else: if DEBUG: - self.log.error(" failed to write device metadata") + logger().error(" failed to write device metadata") def _xform_metadata_via_plugboard(self, book, format): ''' Transform book metadata from plugboard templates ''' if DEBUG: - self.log.info(" ITUNES._xform_metadata_via_plugboard()") + logger().info(" ITUNES._xform_metadata_via_plugboard()") if self.plugboard_func: pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards) newmi = book.deepcopy_metadata() newmi.template_to_attribute(book, pb) if pb is not None and DEBUG: - #self.log.info(" transforming %s using %s:" % (format, pb)) - self.log.info(" title: '%s' %s" % (book.title, ">>> '%s'" % + #logger().info(" transforming %s using %s:" % (format, pb)) + logger().info(" title: '%s' %s" % (book.title, ">>> '%s'" % newmi.title if book.title != newmi.title else '')) - self.log.info(" title_sort: %s %s" % (book.title_sort, ">>> %s" % + logger().info(" title_sort: %s %s" % (book.title_sort, ">>> %s" % newmi.title_sort if book.title_sort != newmi.title_sort else '')) - self.log.info(" authors: %s %s" % (book.authors, ">>> %s" % + logger().info(" authors: %s %s" % (book.authors, ">>> %s" % newmi.authors if book.authors != newmi.authors else '')) - self.log.info(" author_sort: %s %s" % (book.author_sort, ">>> %s" % + logger().info(" author_sort: %s %s" % (book.author_sort, ">>> %s" % newmi.author_sort if book.author_sort != newmi.author_sort else '')) - self.log.info(" language: %s %s" % (book.language, ">>> %s" % + logger().info(" language: %s %s" % (book.language, ">>> %s" % newmi.language if book.language != newmi.language else '')) - self.log.info(" publisher: %s %s" % (book.publisher, ">>> %s" % + logger().info(" publisher: %s %s" % (book.publisher, ">>> %s" % newmi.publisher if book.publisher != newmi.publisher else '')) - self.log.info(" tags: %s %s" % (book.tags, ">>> %s" % + logger().info(" tags: %s %s" % (book.tags, ">>> %s" % newmi.tags if book.tags != newmi.tags else '')) else: if DEBUG: - self.log(" matching plugboard not found") + logger()(" matching plugboard not found") else: newmi = book @@ -3068,7 +3086,7 @@ class ITUNES_ASYNC(ITUNES): def __init__(self,path): if DEBUG: - self.log.info("ITUNES_ASYNC:__init__()") + logger().info("ITUNES_ASYNC:__init__()") if isosx and appscript is None: self.connected = False @@ -3110,15 +3128,15 @@ class ITUNES_ASYNC(ITUNES): """ if not oncard: if DEBUG: - self.log.info("ITUNES_ASYNC:books()") + logger().info("ITUNES_ASYNC:books()") if self.settings().extra_customization[self.CACHE_COVERS]: - self.log.info(" Cover fetching/caching enabled") + logger().info(" Cover fetching/caching enabled") else: - self.log.info(" Cover fetching/caching disabled") + logger().info(" Cover fetching/caching disabled") # Fetch a list of books from iTunes - booklist = BookList(self.log) + booklist = BookList(logger()) cached_books = {} if isosx: @@ -3214,7 +3232,7 @@ class ITUNES_ASYNC(ITUNES): return booklist else: - return BookList(self.log) + return BookList(logger()) def eject(self): ''' @@ -3222,7 +3240,7 @@ class ITUNES_ASYNC(ITUNES): are pending GUI jobs that need to communicate with the device. ''' if DEBUG: - self.log.info("ITUNES_ASYNC:eject()") + logger().info("ITUNES_ASYNC:eject()") self.iTunes = None self.connected = False @@ -3237,7 +3255,7 @@ class ITUNES_ASYNC(ITUNES): particular device doesn't have any of these locations it should return -1. """ if DEBUG: - self.log.info("ITUNES_ASYNC:free_space()") + logger().info("ITUNES_ASYNC:free_space()") free_space = 0 if isosx: s = os.statvfs(os.sep) @@ -3254,7 +3272,7 @@ class ITUNES_ASYNC(ITUNES): @return: (device name, device version, software version on device, mime type) """ if DEBUG: - self.log.info("ITUNES_ASYNC:get_device_information()") + logger().info("ITUNES_ASYNC:get_device_information()") return ('iTunes','hw v1.0','sw v1.0', 'mime type normally goes here') @@ -3277,33 +3295,33 @@ class ITUNES_ASYNC(ITUNES): we need to talk to iTunes to discover if there's a connected iPod ''' if DEBUG: - self.log.info("ITUNES_ASYNC.open(connected_device: %s)" % repr(connected_device)) + logger().info("ITUNES_ASYNC.open(connected_device: %s)" % repr(connected_device)) # Confirm/create thumbs archive if not os.path.exists(self.cache_dir): if DEBUG: - self.log.info(" creating thumb cache '%s'" % self.cache_dir) + logger().info(" creating thumb cache '%s'" % self.cache_dir) os.makedirs(self.cache_dir) if not os.path.exists(self.archive_path): - self.log.info(" creating zip archive") + logger().info(" creating zip archive") zfw = ZipFile(self.archive_path, mode='w') zfw.writestr("iTunes Thumbs Archive",'') zfw.close() else: if DEBUG: - self.log.info(" existing thumb cache at '%s'" % self.archive_path) + logger().info(" existing thumb cache at '%s'" % self.archive_path) # If enabled in config options, create/confirm an iTunes storage folder if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]: self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage') if not os.path.exists(self.iTunes_local_storage): if DEBUG: - self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage) + logger()(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage) os.mkdir(self.iTunes_local_storage) else: if DEBUG: - self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage) + logger()(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage) def sync_booklists(self, booklists, end_session=True): ''' @@ -3314,7 +3332,7 @@ class ITUNES_ASYNC(ITUNES): ''' if DEBUG: - self.log.info("ITUNES_ASYNC.sync_booklists()") + logger().info("ITUNES_ASYNC.sync_booklists()") # Inform user of any problem books if self.problem_titles: @@ -3328,7 +3346,7 @@ class ITUNES_ASYNC(ITUNES): ''' ''' if DEBUG: - self.log.info("ITUNES_ASYNC:unmount_device()") + logger().info("ITUNES_ASYNC:unmount_device()") self.connected = False class BookList(list): diff --git a/src/calibre/devices/bambook/libbambookcore.py b/src/calibre/devices/bambook/libbambookcore.py index e77ac1da7b..a1c6046df0 100644 --- a/src/calibre/devices/bambook/libbambookcore.py +++ b/src/calibre/devices/bambook/libbambookcore.py @@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en' Sanda library wrapper ''' -import ctypes, uuid, hashlib, os, sys +import ctypes, hashlib, os, sys from threading import Event, Lock from calibre.constants import iswindows from calibre import load_library @@ -350,6 +350,7 @@ class Bambook: return None def SendFile(self, fileName, guid = None): + import uuid if self.handle: taskID = job.NewJob() if guid: diff --git a/src/calibre/devices/cybook/t2b.py b/src/calibre/devices/cybook/t2b.py index 7aaeeb63d7..fc0c772bf7 100644 --- a/src/calibre/devices/cybook/t2b.py +++ b/src/calibre/devices/cybook/t2b.py @@ -5,7 +5,6 @@ Write a t2b file to disk. ''' import StringIO -from PIL import Image DEFAULT_T2B_DATA = '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x0f\xff\xff\xff\xf0\xff\x0f\xc3\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf8\x00\x00\xff\xff\xff\xf0\xff\x0f\xc3\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xe0\xff\xf0\xff\xff\xff\xf0\xff\xff\xc3\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc3\xff\xff\xff\xff\xff\xf0\xff\xff\xc3\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x07\xff\xff\xfc\x00?\xf0\xff\x0f\xc3\x00?\xf0\xc0\xfe\x00?\xff\xff\xff\xff\xff\xff\xff\x0f\xff\xff\xf0<\x0f\xf0\xff\x0f\xc0,\x0f\xf0\x0e\xf0,\x0f\xff\xff\xff\xff\xff\xff\xff\x0f\xff\xff\xff\xff\xc3\xf0\xff\x0f\xc0\xff\x0f\xf0\xff\xf0\xff\xc7\xff\xff\xff\xff\xff\xff\xff\x0f\xff\xff\xff\xff\xc3\xf0\xff\x0f\xc3\xff\xc3\xf0\xff\xc3\xff\xc3\xff\xff\xff\xff\xff\xff\xff\x0f\xff\xff\xff\x00\x03\xf0\xff\x0f\xc3\xff\xc3\xf0\xff\xc3\xff\xc3\xff\xff\xff\xff\xff\xff\xff\x0f\xff\xff\xf0\x1f\xc3\xf0\xff\x0f\xc3\xff\xc3\xf0\xff\xc0\x00\x03\xff\xff\xff\xff\xff\xff\xff\x0b\xff\xff\xf0\xff\xc3\xf0\xff\x0f\xc3\xff\xc3\xf0\xff\xc3\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc3\xff\xff\xf3\xff\xc3\xf0\xff\x0f\xc3\xff\xc3\xf0\xff\xc3\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\xff\xfc\xf0\xff\x03\xf0\xff\x0f\xc0\xff\x0f\xf0\xff\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf0\x0f\x00\xf08\x03\xf0\xff\x0f\xc0,\x0f\xf0\xff\xf0\x1f\x03\xff\xff\xff\xff\xff\xff\xff\xff\x00\x0f\xfc\x00\xc3\xf0\xff\x0f\xc3\x00?\xf0\xff\xff\x00\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf0\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xf0\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x03\xfe\x94\xff\xff\xff\xff\xff\xff\xff\xff\xff\xc0\x00\x00\x00\x0f\xff\xff\xff\xff\xff\xff\xfc\x7f\xfe\x94\xff\xff\xff\xff\xff\xff\xff\xff\xfc\x0f\xff\xfe\xa9@\xff\xff\xff\xff\xff\xff\xfc?\xfe\xa4\xff\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xe9P\xff\xff\xff\xff\xff\xff\xfe/\xfe\xa8\xff\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xf9T\xff\xff\xff\xff\xf0@\x00+\xfa\xa8?\xff\xff\xff\xff\xff\xff\xff\xfc\xbf\xff\xff\xf9T\xff\xff\xff\xff\xcb\xe4}*\xaa\xaa?\xff\xff\xff\xff\xff\xff\xff\xfc\xbf\xff\xff\xe9T\xff\xff\xff\xff\xc7\xe4\xfd\x1a\xaa\xaa?\xff\xff\xff\xff\xff\xff\xff\xfc\xaf\xea\xaa\xa6\xa4\xff@\x00\x0f\xc3\xe8\xfe\x1a\xaa\xaa?\xff\xff\xff\xff\xff\xff\xff\xfcj\x95UZ\xa4\x00\x7f\xfe\x90\x03\xe8\xfe\n\xaa\xaa?\xff\xff\xff\xff\xff\xff\xff\xfcj\x95UZ\xa4?\xff\xff\xa5C\xe8\xfe\x06\xaa\xaa?\xff\xff\xff\xff\xff\xff\xff\xfcj\x95UZ\xa4?\xff\xff\xeaC\xe8\xbe\x06\xaa\xaa\x0f\xff\xff\xff\xff\xff\xff\xff\xfcj\x95UZ\xa4/\xff\xff\xea\x82\xe8j\x06\xaa\xaa\x0f\xff\xff\xff\xff\xff\xff\xff\xfcj\x95UZ\xa4/\xff\xff\xaa\x82\xe8*F\xaa\xaa\x8f\xff\xff\xff\xff\xff\xff\xff\xfcj\x95UZ\xa4+\xff\xfe\xaa\x82\xe8*\x86\xaa\xaa\x8f\xff\xff\x80\xff\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xe8*\x86\xaa\xaa\x8f\xf0\x00T?\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xe8*\x81\xaa\xaa\x8c\x03\xff\x95?\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xe8*\x81\xaa\xaa\x80\xbf\xff\x95?\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xe8*\x81\xaa\xaa\x9b\xff\xff\x95\x0f\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xe8\x1a\x81\xaa\xaa\x9a\xff\xfe\x95\x0f\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xe8\n\x81\xaa\xaa\xa6\xbf\xfeUO\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xa8\n\x91j\xaa\xa5\xaa\xa9ZO\xff\xff\xff\xfcj\x95UV\xa4\x1a\xfa\xaa\xaa\x82\xa8\n\xa0j\xaa\xa5Z\x95ZO\xff\xff\xff\xfcj\x95UV\xa4*\xfa\xaa\xaa\x82\xa9\n\xa0j\xaa\xa5UUZC\xff\xff\xff\xfcj\x95UV\xa4*\xfa\xaa\xaa\x82\xaa\n\xa0j\xaa\xa4UUZS\xff\xff\xff\xfcZ\x95UV\xa4*\xfa\xaa\xaa\x82\xaa\n\xa0j\xaa\xa4UUZS\xff\xff\xff\xfcZ\x95UU\xa4*\xfa\xaa\xaa\x82\xaa\n\xa0j\xaa\xa8UUVS\xff\xff\xff\xfcZ\x95UU\xa4*\xea\xaa\xaa\x82\xaa\x06\xa0Z\xaa\xa8UUV\x93\xff\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x81\xaa\x02\xa0\x1a\xaa\xa8UUV\x90\xff\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x80\xaa\x02\xa0\x1a\xaa\xa8\x15UU\x94\xff\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x80\xaa"\xa0\x1a\xaa\xa8\x15UU\x94\xff\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x80\xaa2\xa4\x16\xaa\xa8\x15UU\x94\xff\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x80\xaa2\xa8\x16\xa6\xa9\x15UU\x94\xff\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x80\xaa2\xa8\x16\xa6\xa9\x05UUT?\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x84\xaa2\xa8\x16\xaa\xaa\x05UUU?\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x88\xaa2\xa8\x06\xaa\xaa\x05UUU?\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa1\xa8\xc5\xaa\xaa\x05UUU?\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa0\xa8E\xa9\xaa\x05UUU/\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa<\xa8\x05\xa9\xaaAUUU\x0f\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa<\xa8\x05\xa9\xaaAUUUO\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa<\xa9\x05\xaa\xaaAUUUO\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x1c\xaa\x01\xaa\xaa\x81UUUO\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x0c\xaa\x01\xaa\xaa\x81UUUO\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x0c\xaa1j\xaa\x80UUUC\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x0cj1jj\x90UUUS\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x0c*1jj\x90UUUS\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaaL*1jj\xa0UUUS\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x8f* j\xaa\xa0\x15UUS\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x8f*@j\xaa\xa0\x15UUP\xff\xff\xfcZ\x95UU\xa4*\xaa\xaa\xaa\x8c\xaa\x8f*\x8cZ\xaa\xa1\x15UUT\xff\xff\xfcZ\x95UU\xa4j\xaa\xaa\xaa\x8c\xaa\x8f*\x8cZ\x9a\xa0\x15UUT\xff\xff\xfcZ\x95UU\xa4j\xaa\xaa\xaa\x8c\xaa\x8f*\x8cZ\x9a\xa0\x15UUT\xff\xff\xfcZ\x95UU\xa4j\xaa\xaa\xaa\x8c\xaa\x8f\x1a\x8cZ\x9a\xa4\x15UUT?\xff\xfcZ\x95UU\x94j\xaa\xaa\xaa\x8cj\x8f\n\x8cVj\xa4\x05UU\xa4?\xff\xfcVUUU\xa4j\xaa\xaa\xaa\x8cj\x8fJ\x8c\x16\xaa\xa8\xc5UZ\xa5?\xff\xfcUUUV\xa4j\xaa\xaa\xaa\x8cj\x8f\xca\x8f\x16\xaa\xa8\xc5V\xaa\xa5?\xff\xfcUj\xaa\xaa\xa4j\xaa\xaa\xaa\x8cj\x8f\xca\x8f\x1a\xaa\xa8\x05Z\xaaU?\xff\xfcV\xaa\xaa\xaa\xa5j\xaa\xaa\xaa\x8e*\x8f\xca\x83\x1a\xaa\xa4\x01eUU?\xff\xfcZ\xaa\xaa\xaa\xa5j\xaa\xaa\xaa\x8f*\x8f\xca\x83\x1a\xa5U\x01U\x00\x00\x0f\xff\xfcUUUUUZ\xaa\xaa\xaaO%\x8f\xc6\x93\x15\x00\x001@\x0f\xff\xff\xff\xfcP\x00\x00\x00\x15\x00\x00\x00\x00\x0f\x00\x07\xc0\x03\x00\xff\xff0\x1f\xff\xff\xff\xff\xfc\x00\xff\xff\xf8\x00?\xff\xff\xff\x0f?\xc7\xc3\xf7\x0f\xff\xff\xf1\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xf4\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' @@ -18,7 +17,7 @@ def reduce_color(c): return 2 else: return 3 - + def i2b(n): return "".join([str((n >> y) & 1) for y in range(1, -1, -1)]) @@ -27,12 +26,13 @@ def write_t2b(t2bfile, coverdata=None): t2bfile is a file handle ready to write binary data to disk. coverdata is a string representation of a JPEG file. ''' + from PIL import Image if coverdata != None: coverdata = StringIO.StringIO(coverdata) cover = Image.open(coverdata).convert("L") cover.thumbnail((96, 144), Image.ANTIALIAS) t2bcover = Image.new('L', (96, 144), 'white') - + x, y = cover.size t2bcover.paste(cover, ((96-x)/2, (144-y)/2)) diff --git a/src/calibre/devices/folder_device/driver.py b/src/calibre/devices/folder_device/driver.py index 09df8cd6d8..c8ee3a2e77 100644 --- a/src/calibre/devices/folder_device/driver.py +++ b/src/calibre/devices/folder_device/driver.py @@ -97,3 +97,13 @@ class FOLDER_DEVICE(USBMS): @classmethod def settings(self): return FOLDER_DEVICE_FOR_CONFIG._config().parse() + + @classmethod + def config_widget(cls): + return FOLDER_DEVICE_FOR_CONFIG.config_widget() + + @classmethod + def save_settings(cls, config_widget): + return FOLDER_DEVICE_FOR_CONFIG.save_settings(config_widget) + + diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 178c1091f3..a051c84be6 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -9,7 +9,6 @@ Generates and writes an APNX page mapping file. ''' import struct -import uuid from calibre.ebooks.mobi.reader import MobiReader from calibre.ebooks.pdb.header import PdbHeaderReader @@ -51,6 +50,7 @@ class APNXBuilder(object): apnxf.write(apnx) def generate_apnx(self, pages): + import uuid apnx = '' content_vals = { diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 3c69245cf9..1b10ce3050 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -10,10 +10,8 @@ Device driver for Amazon's Kindle import datetime, os, re, sys, json, hashlib -from calibre.devices.kindle.apnx import APNXBuilder from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.usbms.driver import USBMS -from calibre.ebooks.metadata import MetaInformation from calibre import strftime ''' @@ -152,6 +150,7 @@ class KINDLE(USBMS): path_map, book_ext = resolve_bookmark_paths(storage, path_map) bookmarked_books = {} + for id in path_map: bookmark_ext = path_map[id].rpartition('.')[2] myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext) @@ -236,6 +235,8 @@ class KINDLE(USBMS): def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag + from calibre.ebooks.metadata import MetaInformation + bm = annotation ignore_tags = set(['Catalog', 'Clippings']) @@ -363,6 +364,8 @@ class KINDLE2(KINDLE): ''' Hijacking this function to write the apnx file. ''' + from calibre.devices.kindle.apnx import APNXBuilder + opts = self.settings() if not opts.extra_customization[self.OPT_APNX]: return diff --git a/src/calibre/devices/kobo/bookmark.py b/src/calibre/devices/kobo/bookmark.py index 8e199f77a6..afb392403d 100644 --- a/src/calibre/devices/kobo/bookmark.py +++ b/src/calibre/devices/kobo/bookmark.py @@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en' import os from contextlib import closing -import sqlite3 as sqlite class Bookmark(): # {{{ ''' @@ -32,7 +31,7 @@ class Bookmark(): # {{{ def get_bookmark_data(self): ''' Return the timestamp and last_read_location ''' - + import sqlite3 as sqlite user_notes = {} self.timestamp = os.path.getmtime(self.path) with closing(sqlite.connect(self.db_path)) as connection: diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 0bc578155d..f68ea8feff 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -6,7 +6,6 @@ __copyright__ = '2010, Timothy Legge and Kovid Goyal ]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', @@ -34,8 +33,13 @@ def substitute_entites(raw): _CHARSET_ALIASES = { "macintosh" : "mac-roman", "x-sjis" : "shift-jis" } +def detect(*args, **kwargs): + from chardet import detect + return detect(*args, **kwargs) + def force_encoding(raw, verbose, assume_utf8=False): from calibre.constants import preferred_encoding + try: chardet = detect(raw[:1024*50]) except: diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index 9fcfc559aa..221bece092 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en' Based on ideas from comiclrf created by FangornUK. ''' -import os, shutil, traceback, textwrap, time, codecs +import os, traceback, time from Queue import Empty -from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from calibre import extract, CurrentDir, prints, walk +from calibre import extract, prints, walk from calibre.constants import filesystem_encoding from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.ipc.server import Server @@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir): return ans, failures -class ComicInput(InputFormatPlugin): - - name = 'Comic Input' - author = 'Kovid Goyal' - description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices' - file_types = set(['cbz', 'cbr', 'cbc']) - is_image_collection = True - core_usage = -1 - - options = set([ - OptionRecommendation(name='colors', recommended_value=256, - help=_('Number of colors for grayscale image conversion. Default: ' - '%default. Values of less than 256 may result in blurred text ' - 'on your device if you are creating your comics in EPUB format.')), - OptionRecommendation(name='dont_normalize', recommended_value=False, - help=_('Disable normalize (improve contrast) color range ' - 'for pictures. Default: False')), - OptionRecommendation(name='keep_aspect_ratio', recommended_value=False, - help=_('Maintain picture aspect ratio. Default is to fill the screen.')), - OptionRecommendation(name='dont_sharpen', recommended_value=False, - help=_('Disable sharpening.')), - OptionRecommendation(name='disable_trim', recommended_value=False, - help=_('Disable trimming of comic pages. For some comics, ' - 'trimming might remove content as well as borders.')), - OptionRecommendation(name='landscape', recommended_value=False, - help=_("Don't split landscape images into two portrait images")), - OptionRecommendation(name='wide', recommended_value=False, - help=_("Keep aspect ratio and scale image using screen height as " - "image width for viewing in landscape mode.")), - OptionRecommendation(name='right2left', recommended_value=False, - help=_('Used for right-to-left publications like manga. ' - 'Causes landscape pages to be split into portrait pages ' - 'from right to left.')), - OptionRecommendation(name='despeckle', recommended_value=False, - help=_('Enable Despeckle. Reduces speckle noise. ' - 'May greatly increase processing time.')), - OptionRecommendation(name='no_sort', recommended_value=False, - help=_("Don't sort the files found in the comic " - "alphabetically by name. Instead use the order they were " - "added to the comic.")), - OptionRecommendation(name='output_format', choices=['png', 'jpg'], - recommended_value='png', help=_('The format that images in the created ebook ' - 'are converted to. You can experiment to see which format gives ' - 'you optimal size and look on your device.')), - OptionRecommendation(name='no_process', recommended_value=False, - help=_("Apply no processing to the image")), - OptionRecommendation(name='dont_grayscale', recommended_value=False, - help=_('Do not convert the image to grayscale (black and white)')), - OptionRecommendation(name='comic_image_size', recommended_value=None, - help=_('Specify the image size as widthxheight pixels. Normally,' - ' an image size is automatically calculated from the output ' - 'profile, this option overrides it.')), - OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False, - help=_('When converting a CBC do not add links to each page to' - ' the TOC. Note this only applies if the TOC has more than one' - ' section')), - ]) - - recommendations = set([ - ('margin_left', 0, OptionRecommendation.HIGH), - ('margin_top', 0, OptionRecommendation.HIGH), - ('margin_right', 0, OptionRecommendation.HIGH), - ('margin_bottom', 0, OptionRecommendation.HIGH), - ('insert_blank_line', False, OptionRecommendation.HIGH), - ('remove_paragraph_spacing', False, OptionRecommendation.HIGH), - ('change_justification', 'left', OptionRecommendation.HIGH), - ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH), - ('chapter', None, OptionRecommendation.HIGH), - ('page_breaks_brefore', None, OptionRecommendation.HIGH), - ('use_auto_toc', False, OptionRecommendation.HIGH), - ('page_breaks_before', None, OptionRecommendation.HIGH), - ('disable_font_rescaling', True, OptionRecommendation.HIGH), - ('linearize_tables', False, OptionRecommendation.HIGH), - ]) - - def get_comics_from_collection(self, stream): - from calibre.libunzip import extract as zipextract - tdir = PersistentTemporaryDirectory('_comic_collection') - zipextract(stream, tdir) - comics = [] - with CurrentDir(tdir): - if not os.path.exists('comics.txt'): - raise ValueError(( - '%s is not a valid comic collection' - ' no comics.txt was found in the file') - %stream.name) - raw = open('comics.txt', 'rb').read() - if raw.startswith(codecs.BOM_UTF16_BE): - raw = raw.decode('utf-16-be')[1:] - elif raw.startswith(codecs.BOM_UTF16_LE): - raw = raw.decode('utf-16-le')[1:] - elif raw.startswith(codecs.BOM_UTF8): - raw = raw.decode('utf-8')[1:] - else: - raw = raw.decode('utf-8') - for line in raw.splitlines(): - line = line.strip() - if not line: - continue - fname, title = line.partition(':')[0], line.partition(':')[-1] - fname = fname.replace('#', '_') - fname = os.path.join(tdir, *fname.split('/')) - if not title: - title = os.path.basename(fname).rpartition('.')[0] - if os.access(fname, os.R_OK): - comics.append([title, fname]) - if not comics: - raise ValueError('%s has no comics'%stream.name) - return comics - - def get_pages(self, comic, tdir2): - tdir = extract_comic(comic) - new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort, - verbose=self.opts.verbose) - thumbnail = None - if not new_pages: - raise ValueError('Could not find any pages in the comic: %s' - %comic) - if self.opts.no_process: - n2 = [] - for page in new_pages: - n2.append(os.path.join(tdir2, os.path.basename(page))) - shutil.copyfile(page, n2[-1]) - new_pages = n2 - else: - new_pages, failures = process_pages(new_pages, self.opts, - self.report_progress, tdir2) - if failures: - self.log.warning('Could not process the following pages ' - '(run with --verbose to see why):') - for f in failures: - self.log.warning('\t', f) - if not new_pages: - raise ValueError('Could not find any valid pages in comic: %s' - % comic) - thumbnail = os.path.join(tdir2, - 'thumbnail.'+self.opts.output_format.lower()) - if not os.access(thumbnail, os.R_OK): - thumbnail = None - return new_pages - - def get_images(self): - return self._images - - def convert(self, stream, opts, file_ext, log, accelerators): - from calibre.ebooks.metadata import MetaInformation - from calibre.ebooks.metadata.opf2 import OPFCreator - from calibre.ebooks.metadata.toc import TOC - - self.opts, self.log= opts, log - if file_ext == 'cbc': - comics_ = self.get_comics_from_collection(stream) - else: - comics_ = [['Comic', os.path.abspath(stream.name)]] - stream.close() - comics = [] - for i, x in enumerate(comics_): - title, fname = x - cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.' - cdir = os.path.abspath(cdir) - if not os.path.exists(cdir): - os.makedirs(cdir) - pages = self.get_pages(fname, cdir) - if not pages: continue - wrappers = self.create_wrappers(pages) - comics.append((title, pages, wrappers)) - - if not comics: - raise ValueError('No comic pages found in %s'%stream.name) - - mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0], - [_('Unknown')]) - opf = OPFCreator(os.path.abspath('.'), mi) - entries = [] - - def href(x): - if len(comics) == 1: return os.path.basename(x) - return '/'.join(x.split(os.sep)[-2:]) - - for comic in comics: - pages, wrappers = comic[1:] - entries += [(w, None) for w in map(href, wrappers)] + \ - [(x, None) for x in map(href, pages)] - opf.create_manifest(entries) - spine = [] - for comic in comics: - spine.extend(map(href, comic[2])) - self._images = [] - for comic in comics: - self._images.extend(comic[1]) - opf.create_spine(spine) - toc = TOC() - if len(comics) == 1: - wrappers = comics[0][2] - for i, x in enumerate(wrappers): - toc.add_item(href(x), None, _('Page')+' %d'%(i+1), - play_order=i) - else: - po = 0 - for comic in comics: - po += 1 - wrappers = comic[2] - stoc = toc.add_item(href(wrappers[0]), - None, comic[0], play_order=po) - if not opts.dont_add_comic_pages_to_toc: - for i, x in enumerate(wrappers): - stoc.add_item(href(x), None, - _('Page')+' %d'%(i+1), play_order=po) - po += 1 - opf.set_toc(toc) - m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb') - opf.render(m, n, 'toc.ncx') - return os.path.abspath('metadata.opf') - - def create_wrappers(self, pages): - from calibre.ebooks.oeb.base import XHTML_NS - wrappers = [] - WRAPPER = textwrap.dedent('''\ - - - Page #%d - - - -
- comic page #%d -
- - - ''') - dir = os.path.dirname(pages[0]) - for i, page in enumerate(pages): - wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1) - page = os.path.join(dir, 'page_%d.xhtml'%(i+1)) - open(page, 'wb').write(wrapper) - wrappers.append(page) - return wrappers diff --git a/src/calibre/ebooks/conversion/plugins/__init__.py b/src/calibre/ebooks/conversion/plugins/__init__.py new file mode 100644 index 0000000000..dd9615356c --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/__init__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + + diff --git a/src/calibre/ebooks/azw4/input.py b/src/calibre/ebooks/conversion/plugins/azw4_input.py similarity index 84% rename from src/calibre/ebooks/azw4/input.py rename to src/calibre/ebooks/conversion/plugins/azw4_input.py index 1ac7657342..6d2b2a917e 100644 --- a/src/calibre/ebooks/azw4/input.py +++ b/src/calibre/ebooks/conversion/plugins/azw4_input.py @@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.pdb.header import PdbHeaderReader -from calibre.ebooks.azw4.reader import Reader class AZW4Input(InputFormatPlugin): @@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.pdb.header import PdbHeaderReader + from calibre.ebooks.azw4.reader import Reader + header = PdbHeaderReader(stream) reader = Reader(header, stream, log, options) opf = reader.extract_content(os.getcwd()) diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/conversion/plugins/chm_input.py similarity index 98% rename from src/calibre/ebooks/chm/input.py rename to src/calibre/ebooks/conversion/plugins/chm_input.py index f36685bd91..a674735f1d 100644 --- a/src/calibre/ebooks/chm/input.py +++ b/src/calibre/ebooks/conversion/plugins/chm_input.py @@ -3,9 +3,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ,' \ ' and Alex Bramley .' -import os, uuid - -from lxml import html +import os from calibre.customize.conversion import InputFormatPlugin from calibre.ptempfile import TemporaryDirectory @@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin): def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi): # use HTMLInput plugin to generate book - from calibre.ebooks.html.input import HTMLInput + from calibre.customize.builtins import HTMLInput opts.breadth_first = True htmlinput = HTMLInput(None) oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi) @@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin): def _create_oebbook(self, hhcpath, basedir, opts, log, mi): + import uuid + from lxml import html from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.oeb.base import DirContainer oeb = create_oebbook(log, None, opts, @@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin): return oeb def _create_html_root(self, hhcpath, log): + from lxml import html hhcdata = self._read_file(hhcpath) hhcroot = html.fromstring(hhcdata) chapters = self._process_nodes(hhcroot) diff --git a/src/calibre/ebooks/conversion/plugins/comic_input.py b/src/calibre/ebooks/conversion/plugins/comic_input.py new file mode 100644 index 0000000000..77ae7d8086 --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/comic_input.py @@ -0,0 +1,259 @@ +from __future__ import with_statement +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +Based on ideas from comiclrf created by FangornUK. +''' + +import shutil, textwrap, codecs, os + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre import CurrentDir +from calibre.ptempfile import PersistentTemporaryDirectory + +class ComicInput(InputFormatPlugin): + + name = 'Comic Input' + author = 'Kovid Goyal' + description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices' + file_types = set(['cbz', 'cbr', 'cbc']) + is_image_collection = True + core_usage = -1 + + options = set([ + OptionRecommendation(name='colors', recommended_value=256, + help=_('Number of colors for grayscale image conversion. Default: ' + '%default. Values of less than 256 may result in blurred text ' + 'on your device if you are creating your comics in EPUB format.')), + OptionRecommendation(name='dont_normalize', recommended_value=False, + help=_('Disable normalize (improve contrast) color range ' + 'for pictures. Default: False')), + OptionRecommendation(name='keep_aspect_ratio', recommended_value=False, + help=_('Maintain picture aspect ratio. Default is to fill the screen.')), + OptionRecommendation(name='dont_sharpen', recommended_value=False, + help=_('Disable sharpening.')), + OptionRecommendation(name='disable_trim', recommended_value=False, + help=_('Disable trimming of comic pages. For some comics, ' + 'trimming might remove content as well as borders.')), + OptionRecommendation(name='landscape', recommended_value=False, + help=_("Don't split landscape images into two portrait images")), + OptionRecommendation(name='wide', recommended_value=False, + help=_("Keep aspect ratio and scale image using screen height as " + "image width for viewing in landscape mode.")), + OptionRecommendation(name='right2left', recommended_value=False, + help=_('Used for right-to-left publications like manga. ' + 'Causes landscape pages to be split into portrait pages ' + 'from right to left.')), + OptionRecommendation(name='despeckle', recommended_value=False, + help=_('Enable Despeckle. Reduces speckle noise. ' + 'May greatly increase processing time.')), + OptionRecommendation(name='no_sort', recommended_value=False, + help=_("Don't sort the files found in the comic " + "alphabetically by name. Instead use the order they were " + "added to the comic.")), + OptionRecommendation(name='output_format', choices=['png', 'jpg'], + recommended_value='png', help=_('The format that images in the created ebook ' + 'are converted to. You can experiment to see which format gives ' + 'you optimal size and look on your device.')), + OptionRecommendation(name='no_process', recommended_value=False, + help=_("Apply no processing to the image")), + OptionRecommendation(name='dont_grayscale', recommended_value=False, + help=_('Do not convert the image to grayscale (black and white)')), + OptionRecommendation(name='comic_image_size', recommended_value=None, + help=_('Specify the image size as widthxheight pixels. Normally,' + ' an image size is automatically calculated from the output ' + 'profile, this option overrides it.')), + OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False, + help=_('When converting a CBC do not add links to each page to' + ' the TOC. Note this only applies if the TOC has more than one' + ' section')), + ]) + + recommendations = set([ + ('margin_left', 0, OptionRecommendation.HIGH), + ('margin_top', 0, OptionRecommendation.HIGH), + ('margin_right', 0, OptionRecommendation.HIGH), + ('margin_bottom', 0, OptionRecommendation.HIGH), + ('insert_blank_line', False, OptionRecommendation.HIGH), + ('remove_paragraph_spacing', False, OptionRecommendation.HIGH), + ('change_justification', 'left', OptionRecommendation.HIGH), + ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH), + ('chapter', None, OptionRecommendation.HIGH), + ('page_breaks_brefore', None, OptionRecommendation.HIGH), + ('use_auto_toc', False, OptionRecommendation.HIGH), + ('page_breaks_before', None, OptionRecommendation.HIGH), + ('disable_font_rescaling', True, OptionRecommendation.HIGH), + ('linearize_tables', False, OptionRecommendation.HIGH), + ]) + + def get_comics_from_collection(self, stream): + from calibre.libunzip import extract as zipextract + tdir = PersistentTemporaryDirectory('_comic_collection') + zipextract(stream, tdir) + comics = [] + with CurrentDir(tdir): + if not os.path.exists('comics.txt'): + raise ValueError(( + '%s is not a valid comic collection' + ' no comics.txt was found in the file') + %stream.name) + raw = open('comics.txt', 'rb').read() + if raw.startswith(codecs.BOM_UTF16_BE): + raw = raw.decode('utf-16-be')[1:] + elif raw.startswith(codecs.BOM_UTF16_LE): + raw = raw.decode('utf-16-le')[1:] + elif raw.startswith(codecs.BOM_UTF8): + raw = raw.decode('utf-8')[1:] + else: + raw = raw.decode('utf-8') + for line in raw.splitlines(): + line = line.strip() + if not line: + continue + fname, title = line.partition(':')[0], line.partition(':')[-1] + fname = fname.replace('#', '_') + fname = os.path.join(tdir, *fname.split('/')) + if not title: + title = os.path.basename(fname).rpartition('.')[0] + if os.access(fname, os.R_OK): + comics.append([title, fname]) + if not comics: + raise ValueError('%s has no comics'%stream.name) + return comics + + def get_pages(self, comic, tdir2): + from calibre.ebooks.comic.input import (extract_comic, process_pages, + find_pages) + tdir = extract_comic(comic) + new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort, + verbose=self.opts.verbose) + thumbnail = None + if not new_pages: + raise ValueError('Could not find any pages in the comic: %s' + %comic) + if self.opts.no_process: + n2 = [] + for page in new_pages: + n2.append(os.path.join(tdir2, os.path.basename(page))) + shutil.copyfile(page, n2[-1]) + new_pages = n2 + else: + new_pages, failures = process_pages(new_pages, self.opts, + self.report_progress, tdir2) + if failures: + self.log.warning('Could not process the following pages ' + '(run with --verbose to see why):') + for f in failures: + self.log.warning('\t', f) + if not new_pages: + raise ValueError('Could not find any valid pages in comic: %s' + % comic) + thumbnail = os.path.join(tdir2, + 'thumbnail.'+self.opts.output_format.lower()) + if not os.access(thumbnail, os.R_OK): + thumbnail = None + return new_pages + + def get_images(self): + return self._images + + def convert(self, stream, opts, file_ext, log, accelerators): + from calibre.ebooks.metadata import MetaInformation + from calibre.ebooks.metadata.opf2 import OPFCreator + from calibre.ebooks.metadata.toc import TOC + + self.opts, self.log= opts, log + if file_ext == 'cbc': + comics_ = self.get_comics_from_collection(stream) + else: + comics_ = [['Comic', os.path.abspath(stream.name)]] + stream.close() + comics = [] + for i, x in enumerate(comics_): + title, fname = x + cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.' + cdir = os.path.abspath(cdir) + if not os.path.exists(cdir): + os.makedirs(cdir) + pages = self.get_pages(fname, cdir) + if not pages: continue + wrappers = self.create_wrappers(pages) + comics.append((title, pages, wrappers)) + + if not comics: + raise ValueError('No comic pages found in %s'%stream.name) + + mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0], + [_('Unknown')]) + opf = OPFCreator(os.path.abspath('.'), mi) + entries = [] + + def href(x): + if len(comics) == 1: return os.path.basename(x) + return '/'.join(x.split(os.sep)[-2:]) + + for comic in comics: + pages, wrappers = comic[1:] + entries += [(w, None) for w in map(href, wrappers)] + \ + [(x, None) for x in map(href, pages)] + opf.create_manifest(entries) + spine = [] + for comic in comics: + spine.extend(map(href, comic[2])) + self._images = [] + for comic in comics: + self._images.extend(comic[1]) + opf.create_spine(spine) + toc = TOC() + if len(comics) == 1: + wrappers = comics[0][2] + for i, x in enumerate(wrappers): + toc.add_item(href(x), None, _('Page')+' %d'%(i+1), + play_order=i) + else: + po = 0 + for comic in comics: + po += 1 + wrappers = comic[2] + stoc = toc.add_item(href(wrappers[0]), + None, comic[0], play_order=po) + if not opts.dont_add_comic_pages_to_toc: + for i, x in enumerate(wrappers): + stoc.add_item(href(x), None, + _('Page')+' %d'%(i+1), play_order=po) + po += 1 + opf.set_toc(toc) + m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb') + opf.render(m, n, 'toc.ncx') + return os.path.abspath('metadata.opf') + + def create_wrappers(self, pages): + from calibre.ebooks.oeb.base import XHTML_NS + wrappers = [] + WRAPPER = textwrap.dedent('''\ + + + Page #%d + + + +
+ comic page #%d +
+ + + ''') + dir = os.path.dirname(pages[0]) + for i, page in enumerate(pages): + wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1) + page = os.path.join(dir, 'page_%d.xhtml'%(i+1)) + open(page, 'wb').write(wrapper) + wrappers.append(page) + return wrappers + diff --git a/src/calibre/ebooks/djvu/input.py b/src/calibre/ebooks/conversion/plugins/djvu_input.py similarity index 98% rename from src/calibre/ebooks/djvu/input.py rename to src/calibre/ebooks/conversion/plugins/djvu_input.py index 70dbf97f5d..936ef1a702 100644 --- a/src/calibre/ebooks/djvu/input.py +++ b/src/calibre/ebooks/conversion/plugins/djvu_input.py @@ -12,7 +12,6 @@ from subprocess import Popen, PIPE from cStringIO import StringIO from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from calibre.ebooks.txt.processor import convert_basic class DJVUInput(InputFormatPlugin): @@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin): ]) def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.txt.processor import convert_basic + stdout = StringIO() ppdjvu = True # using djvutxt is MUCH faster, should make it an option diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/conversion/plugins/epub_input.py similarity index 98% rename from src/calibre/ebooks/epub/input.py rename to src/calibre/ebooks/conversion/plugins/epub_input.py index c2cfedd7d4..47356dbd1f 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/conversion/plugins/epub_input.py @@ -3,11 +3,9 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, uuid +import os from itertools import cycle -from lxml import etree - from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation class EPUBInput(InputFormatPlugin): @@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin): f.write(raw[1024:]) def process_encryption(self, encfile, opf, log): + from lxml import etree + import uuid key = None for item in opf.identifier_iter(): scheme = None @@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin): return False def rationalize_cover(self, opf, log): + from lxml import etree guide_cover, guide_elem = None, None for guide_elem in opf.iterguide(): if guide_elem.get('type', '').lower() == 'cover': @@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin): renderer) def find_opf(self): + from lxml import etree def attr(n, attr): for k, v in n.attrib.items(): if k.endswith(attr): diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/conversion/plugins/epub_output.py similarity index 99% rename from src/calibre/ebooks/epub/output.py rename to src/calibre/ebooks/conversion/plugins/epub_output.py index 2bdfb0d934..44249e49a2 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/conversion/plugins/epub_output.py @@ -8,14 +8,12 @@ __docformat__ = 'restructuredtext en' import os, shutil, re -from calibre.customize.conversion import OutputFormatPlugin +from calibre.customize.conversion import (OutputFormatPlugin, + OptionRecommendation) from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir -from calibre.customize.conversion import OptionRecommendation from calibre.constants import filesystem_encoding -from lxml import etree - block_level_tags = ( 'address', 'body', @@ -289,6 +287,7 @@ class EPUBOutput(OutputFormatPlugin): # }}} def condense_ncx(self, ncx_path): + from lxml import etree if not self.opts.pretty_print: tree = etree.parse(ncx_path) for tag in tree.getroot().iter(tag=etree.Element): diff --git a/src/calibre/ebooks/fb2/input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py similarity index 99% rename from src/calibre/ebooks/fb2/input.py rename to src/calibre/ebooks/conversion/plugins/fb2_input.py index 147e940eb4..747f8f19d8 100644 --- a/src/calibre/ebooks/fb2/input.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py @@ -6,7 +6,6 @@ Convert .fb2 files to .lrf """ import os, re from base64 import b64decode -from lxml import etree from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre import guess_type @@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from lxml import etree from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/conversion/plugins/fb2_output.py similarity index 99% rename from src/calibre/ebooks/fb2/output.py rename to src/calibre/ebooks/conversion/plugins/fb2_output.py index 2042902724..d7db2a0a33 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_output.py @@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation -from calibre.ebooks.fb2.fb2ml import FB2MLizer class FB2Output(OutputFormatPlugin): @@ -162,6 +161,7 @@ class FB2Output(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.oeb.transforms.jacket import linearize_jacket from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable + from calibre.ebooks.fb2.fb2ml import FB2MLizer try: rasterizer = SVGRasterizer() diff --git a/src/calibre/ebooks/conversion/plugins/html_input.py b/src/calibre/ebooks/conversion/plugins/html_input.py new file mode 100644 index 0000000000..cfd2ebf8cf --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/html_input.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re, tempfile, os +from functools import partial +from itertools import izip +from urllib import quote + +from calibre.constants import islinux, isbsd +from calibre.customize.conversion import (InputFormatPlugin, + OptionRecommendation) +from calibre.utils.localization import get_lang +from calibre.utils.filenames import ascii_filename + + +class HTMLInput(InputFormatPlugin): + + name = 'HTML Input' + author = 'Kovid Goyal' + description = 'Convert HTML and OPF files to an OEB' + file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml']) + + options = set([ + OptionRecommendation(name='breadth_first', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Traverse links in HTML files breadth first. Normally, ' + 'they are traversed depth first.' + ) + ), + + OptionRecommendation(name='max_levels', + recommended_value=5, level=OptionRecommendation.LOW, + help=_('Maximum levels of recursion when following links in ' + 'HTML files. Must be non-negative. 0 implies that no ' + 'links in the root HTML file are followed. Default is ' + '%default.' + ) + ), + + OptionRecommendation(name='dont_package', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Normally this input plugin re-arranges all the input ' + 'files into a standard folder hierarchy. Only use this option ' + 'if you know what you are doing as it can result in various ' + 'nasty side effects in the rest of the conversion pipeline.' + ) + ), + + ]) + + def convert(self, stream, opts, file_ext, log, + accelerators): + self._is_case_sensitive = None + basedir = os.getcwd() + self.opts = opts + + fname = None + if hasattr(stream, 'name'): + basedir = os.path.dirname(stream.name) + fname = os.path.basename(stream.name) + + if file_ext != 'opf': + if opts.dont_package: + raise ValueError('The --dont-package option is not supported for an HTML input file') + from calibre.ebooks.metadata.html import get_metadata + mi = get_metadata(stream) + if fname: + from calibre.ebooks.metadata.meta import metadata_from_filename + fmi = metadata_from_filename(fname) + fmi.smart_update(mi) + mi = fmi + oeb = self.create_oebbook(stream.name, basedir, opts, log, mi) + return oeb + + from calibre.ebooks.conversion.plumber import create_oebbook + return create_oebbook(log, stream.name, opts, + encoding=opts.input_encoding) + + def is_case_sensitive(self, path): + if getattr(self, '_is_case_sensitive', None) is not None: + return self._is_case_sensitive + if not path or not os.path.exists(path): + return islinux or isbsd + self._is_case_sensitive = not (os.path.exists(path.lower()) \ + and os.path.exists(path.upper())) + return self._is_case_sensitive + + def create_oebbook(self, htmlpath, basedir, opts, log, mi): + import uuid + from calibre.ebooks.conversion.plumber import create_oebbook + from calibre.ebooks.oeb.base import (DirContainer, + rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, + xpath) + from calibre import guess_type + from calibre.ebooks.oeb.transforms.metadata import \ + meta_info_to_oeb_metadata + from calibre.ebooks.html.input import get_filelist + import cssutils, logging + cssutils.log.setLevel(logging.WARN) + self.OEB_STYLES = OEB_STYLES + oeb = create_oebbook(log, None, opts, self, + encoding=opts.input_encoding, populate=False) + self.oeb = oeb + + metadata = oeb.metadata + meta_info_to_oeb_metadata(mi, metadata, log) + if not metadata.language: + oeb.logger.warn(u'Language not specified') + metadata.add('language', get_lang().replace('_', '-')) + if not metadata.creator: + oeb.logger.warn('Creator not specified') + metadata.add('creator', self.oeb.translate(__('Unknown'))) + if not metadata.title: + oeb.logger.warn('Title not specified') + metadata.add('title', self.oeb.translate(__('Unknown'))) + bookid = str(uuid.uuid4()) + metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') + for ident in metadata.identifier: + if 'id' in ident.attrib: + self.oeb.uid = metadata.identifier[0] + break + + filelist = get_filelist(htmlpath, basedir, opts, log) + filelist = [f for f in filelist if not f.is_binary] + htmlfile_map = {} + for f in filelist: + path = f.path + oeb.container = DirContainer(os.path.dirname(path), log, + ignore_opf=True) + bname = os.path.basename(path) + id, href = oeb.manifest.generate(id='html', + href=ascii_filename(bname)) + htmlfile_map[path] = href + item = oeb.manifest.add(id, href, 'text/html') + item.html_input_href = bname + oeb.spine.add(item, True) + + self.added_resources = {} + self.log = log + self.log('Normalizing filename cases') + for path, href in htmlfile_map.items(): + if not self.is_case_sensitive(path): + path = path.lower() + self.added_resources[path] = href + self.urlnormalize, self.DirContainer = urlnormalize, DirContainer + self.urldefrag = urldefrag + self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME + + self.log('Rewriting HTML links') + for f in filelist: + path = f.path + dpath = os.path.dirname(path) + oeb.container = DirContainer(dpath, log, ignore_opf=True) + item = oeb.manifest.hrefs[htmlfile_map[path]] + rewrite_links(item.data, partial(self.resource_adder, base=dpath)) + + for item in oeb.manifest.values(): + if item.media_type in self.OEB_STYLES: + dpath = None + for path, href in self.added_resources.items(): + if href == item.href: + dpath = os.path.dirname(path) + break + cssutils.replaceUrls(item.data, + partial(self.resource_adder, base=dpath)) + + toc = self.oeb.toc + self.oeb.auto_generated_toc = True + titles = [] + headers = [] + for item in self.oeb.spine: + if not item.linear: continue + html = item.data + title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) + title = re.sub(r'\s+', ' ', title.strip()) + if title: + titles.append(title) + headers.append('(unlabled)') + for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'): + expr = '/h:html/h:body//h:%s[position()=1]/text()' + header = ''.join(xpath(html, expr % tag)) + header = re.sub(r'\s+', ' ', header.strip()) + if header: + headers[-1] = header + break + use = titles + if len(titles) > len(set(titles)): + use = headers + for title, item in izip(use, self.oeb.spine): + if not item.linear: continue + toc.add(title, item.href) + + oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True) + return oeb + + def link_to_local_path(self, link_, base=None): + from calibre.ebooks.html.input import Link + if not isinstance(link_, unicode): + try: + link_ = link_.decode('utf-8', 'error') + except: + self.log.warn('Failed to decode link %r. Ignoring'%link_) + return None, None + try: + l = Link(link_, base if base else os.getcwdu()) + except: + self.log.exception('Failed to process link: %r'%link_) + return None, None + if l.path is None: + # Not a local resource + return None, None + link = l.path.replace('/', os.sep).strip() + frag = l.fragment + if not link: + return None, None + return link, frag + + def resource_adder(self, link_, base=None): + link, frag = self.link_to_local_path(link_, base=base) + if link is None: + return link_ + try: + if base and not os.path.isabs(link): + link = os.path.join(base, link) + link = os.path.abspath(link) + except: + return link_ + if not os.access(link, os.R_OK): + return link_ + if os.path.isdir(link): + self.log.warn(link_, 'is a link to a directory. Ignoring.') + return link_ + if not self.is_case_sensitive(tempfile.gettempdir()): + link = link.lower() + if link not in self.added_resources: + bhref = os.path.basename(link) + id, href = self.oeb.manifest.generate(id='added', + href=bhref) + guessed = self.guess_type(href)[0] + media_type = guessed or self.BINARY_MIME + if media_type == 'text/plain': + self.log.warn('Ignoring link to text file %r'%link_) + return None + + self.oeb.log.debug('Added', link) + self.oeb.container = self.DirContainer(os.path.dirname(link), + self.oeb.log, ignore_opf=True) + # Load into memory + item = self.oeb.manifest.add(id, href, media_type) + # bhref refers to an already existing file. The read() method of + # DirContainer will call unquote on it before trying to read the + # file, therefore we quote it here. + if isinstance(bhref, unicode): + bhref = bhref.encode('utf-8') + item.html_input_href = quote(bhref).decode('utf-8') + if guessed in self.OEB_STYLES: + item.override_css_fetch = partial( + self.css_import_handler, os.path.dirname(link)) + item.data + self.added_resources[link] = href + + nlink = self.added_resources[link] + if frag: + nlink = '#'.join((nlink, frag)) + return nlink + + def css_import_handler(self, base, href): + link, frag = self.link_to_local_path(href, base=base) + if link is None or not os.access(link, os.R_OK) or os.path.isdir(link): + return (None, None) + try: + raw = open(link, 'rb').read().decode('utf-8', 'replace') + raw = self.oeb.css_preprocessor(raw, add_namespace=True) + except: + self.log.exception('Failed to read CSS file: %r'%link) + return (None, None) + return (None, raw) diff --git a/src/calibre/ebooks/html/output.py b/src/calibre/ebooks/conversion/plugins/html_output.py similarity index 96% rename from src/calibre/ebooks/html/output.py rename to src/calibre/ebooks/conversion/plugins/html_output.py index fe7b4cf274..3821ba41a4 100644 --- a/src/calibre/ebooks/html/output.py +++ b/src/calibre/ebooks/conversion/plugins/html_output.py @@ -4,22 +4,11 @@ __copyright__ = '2010, Fabian Grassl ' __docformat__ = 'restructuredtext en' import os, re, shutil - -from calibre.utils import zipfile - from os.path import dirname, abspath, relpath, exists, basename -from lxml import etree -from templite import Templite - from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation from calibre import CurrentDir from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.utils.zipfile import ZipFile - -from urllib import unquote - -from calibre.ebooks.html.meta import EasyMeta class HTMLOutput(OutputFormatPlugin): @@ -50,6 +39,9 @@ class HTMLOutput(OutputFormatPlugin): ''' Generate table of contents ''' + from lxml import etree + from urllib import unquote + from calibre.ebooks.oeb.base import element with CurrentDir(output_dir): def build_node(current_node, parent=None): @@ -72,11 +64,18 @@ class HTMLOutput(OutputFormatPlugin): return wrap def generate_html_toc(self, oeb_book, ref_url, output_dir): + from lxml import etree + root = self.generate_toc(oeb_book, ref_url, output_dir) return etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=False) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from lxml import etree + from calibre.utils import zipfile + from templite import Templite + from urllib import unquote + from calibre.ebooks.html.meta import EasyMeta # read template files if opts.template_html_index is not None: @@ -192,7 +191,7 @@ class HTMLOutput(OutputFormatPlugin): f.write(t) item.unload_data_from_memory(memory=path) - zfile = ZipFile(output_path, "w") + zfile = zipfile.ZipFile(output_path, "w") zfile.add_dir(output_dir, basename(output_dir)) zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED) diff --git a/src/calibre/ebooks/htmlz/input.py b/src/calibre/ebooks/conversion/plugins/htmlz_input.py similarity index 96% rename from src/calibre/ebooks/htmlz/input.py rename to src/calibre/ebooks/conversion/plugins/htmlz_input.py index f0f45f72fe..e9fbb1d7c2 100644 --- a/src/calibre/ebooks/htmlz/input.py +++ b/src/calibre/ebooks/conversion/plugins/htmlz_input.py @@ -10,9 +10,6 @@ import os from calibre import guess_type from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.chardet import xml_to_unicode -from calibre.ebooks.metadata.opf2 import OPF -from calibre.utils.zipfile import ZipFile class HTMLZInput(InputFormatPlugin): @@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.chardet import xml_to_unicode + from calibre.ebooks.metadata.opf2 import OPF + from calibre.utils.zipfile import ZipFile + self.log = log html = u'' top_levels = [] diff --git a/src/calibre/ebooks/htmlz/output.py b/src/calibre/ebooks/conversion/plugins/htmlz_output.py similarity index 96% rename from src/calibre/ebooks/htmlz/output.py rename to src/calibre/ebooks/conversion/plugins/htmlz_output.py index a1ef57af2c..f35dbc4dad 100644 --- a/src/calibre/ebooks/htmlz/output.py +++ b/src/calibre/ebooks/conversion/plugins/htmlz_output.py @@ -9,13 +9,10 @@ __docformat__ = 'restructuredtext en' import os from cStringIO import StringIO -from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf from calibre.ptempfile import TemporaryDirectory -from calibre.utils.zipfile import ZipFile class HTMLZOutput(OutputFormatPlugin): @@ -43,7 +40,10 @@ class HTMLZOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from lxml import etree from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME + from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf + from calibre.utils.zipfile import ZipFile # HTML if opts.htmlz_css_type == 'inline': @@ -81,7 +81,7 @@ class HTMLZOutput(OutputFormatPlugin): fname = os.path.join(tdir, 'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) - + # Cover cover_path = None try: diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/conversion/plugins/lit_input.py similarity index 100% rename from src/calibre/ebooks/lit/input.py rename to src/calibre/ebooks/conversion/plugins/lit_input.py diff --git a/src/calibre/ebooks/lit/output.py b/src/calibre/ebooks/conversion/plugins/lit_output.py similarity index 100% rename from src/calibre/ebooks/lit/output.py rename to src/calibre/ebooks/conversion/plugins/lit_output.py diff --git a/src/calibre/ebooks/conversion/plugins/lrf_input.py b/src/calibre/ebooks/conversion/plugins/lrf_input.py new file mode 100644 index 0000000000..63af39e1e0 --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/lrf_input.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os, sys +from calibre.customize.conversion import InputFormatPlugin + +class LRFInput(InputFormatPlugin): + + name = 'LRF Input' + author = 'Kovid Goyal' + description = 'Convert LRF files to HTML' + file_types = set(['lrf']) + + def convert(self, stream, options, file_ext, log, + accelerators): + from lxml import etree + from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock, + Canvas, ImageBlock, RuledLine) + self.log = log + self.log('Generating XML') + from calibre.ebooks.lrf.lrfparser import LRFDocument + d = LRFDocument(stream) + d.parse() + xml = d.to_xml(write_files=True) + if options.verbose > 2: + open('lrs.xml', 'wb').write(xml.encode('utf-8')) + parser = etree.XMLParser(no_network=True, huge_tree=True) + try: + doc = etree.fromstring(xml, parser=parser) + except: + self.log.warn('Failed to parse XML. Trying to recover') + parser = etree.XMLParser(no_network=True, huge_tree=True, + recover=True) + doc = etree.fromstring(xml, parser=parser) + + + char_button_map = {} + for x in doc.xpath('//CharButton[@refobj]'): + ro = x.get('refobj') + jump_button = doc.xpath('//*[@objid="%s"]'%ro) + if jump_button: + jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]') + if jump_to: + char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'), + jump_to[0].get('refobj')) + plot_map = {} + for x in doc.xpath('//Plot[@refobj]'): + ro = x.get('refobj') + image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro) + if image: + imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'% + image[0].get('refstream')) + if imgstr: + plot_map[ro] = imgstr[0].get('file') + + self.log('Converting XML to HTML...') + styledoc = etree.fromstring(P('templates/lrf.xsl', data=True)) + media_type = MediaType() + styles = Styles() + text_block = TextBlock(styles, char_button_map, plot_map, log) + canvas = Canvas(doc, styles, text_block, log) + image_block = ImageBlock(canvas) + ruled_line = RuledLine() + extensions = { + ('calibre', 'media-type') : media_type, + ('calibre', 'text-block') : text_block, + ('calibre', 'ruled-line') : ruled_line, + ('calibre', 'styles') : styles, + ('calibre', 'canvas') : canvas, + ('calibre', 'image-block'): image_block, + } + transform = etree.XSLT(styledoc, extensions=extensions) + try: + result = transform(doc) + except RuntimeError: + sys.setrecursionlimit(5000) + result = transform(doc) + + with open('content.opf', 'wb') as f: + f.write(result) + styles.write() + return os.path.abspath('content.opf') diff --git a/src/calibre/ebooks/lrf/output.py b/src/calibre/ebooks/conversion/plugins/lrf_output.py similarity index 100% rename from src/calibre/ebooks/lrf/output.py rename to src/calibre/ebooks/conversion/plugins/lrf_output.py diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py similarity index 100% rename from src/calibre/ebooks/mobi/input.py rename to src/calibre/ebooks/conversion/plugins/mobi_input.py diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py similarity index 100% rename from src/calibre/ebooks/mobi/output.py rename to src/calibre/ebooks/conversion/plugins/mobi_output.py diff --git a/src/calibre/ebooks/conversion/plugins/odt_input.py b/src/calibre/ebooks/conversion/plugins/odt_input.py new file mode 100644 index 0000000000..5e92ea5163 --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/odt_input.py @@ -0,0 +1,25 @@ +from __future__ import with_statement +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +Convert an ODT file into a Open Ebook +''' + +from calibre.customize.conversion import InputFormatPlugin + +class ODTInput(InputFormatPlugin): + + name = 'ODT Input' + author = 'Kovid Goyal' + description = 'Convert ODT (OpenOffice) files to HTML' + file_types = set(['odt']) + + + def convert(self, stream, options, file_ext, log, + accelerators): + from calibre.ebooks.odt.input import Extract + return Extract()(stream, '.', log) + + diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/conversion/plugins/oeb_output.py similarity index 96% rename from src/calibre/ebooks/oeb/output.py rename to src/calibre/ebooks/conversion/plugins/oeb_output.py index 38ac2495fd..b69e095d0f 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/conversion/plugins/oeb_output.py @@ -5,13 +5,10 @@ __docformat__ = 'restructuredtext en' import os, re -from lxml import etree -from calibre.customize.conversion import OutputFormatPlugin +from calibre.customize.conversion import (OutputFormatPlugin, + OptionRecommendation) from calibre import CurrentDir -from calibre.customize.conversion import OptionRecommendation - -from urllib import unquote class OEBOutput(OutputFormatPlugin): @@ -23,6 +20,9 @@ class OEBOutput(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): + from urllib import unquote + from lxml import etree + self.log, self.opts = log, opts if not os.path.exists(output_path): os.makedirs(output_path) diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/conversion/plugins/pdb_input.py similarity index 87% rename from src/calibre/ebooks/pdb/input.py rename to src/calibre/ebooks/conversion/plugins/pdb_input.py index cd861216af..69984ab268 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/conversion/plugins/pdb_input.py @@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.pdb.header import PdbHeaderReader -from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader class PDBInput(InputFormatPlugin): @@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.pdb.header import PdbHeaderReader + from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader + header = PdbHeaderReader(stream) Reader = get_reader(header.ident) diff --git a/src/calibre/ebooks/pdb/output.py b/src/calibre/ebooks/conversion/plugins/pdb_output.py similarity index 91% rename from src/calibre/ebooks/pdb/output.py rename to src/calibre/ebooks/conversion/plugins/pdb_output.py index 7bca4e5c5d..b80f9958ef 100644 --- a/src/calibre/ebooks/pdb/output.py +++ b/src/calibre/ebooks/conversion/plugins/pdb_output.py @@ -8,7 +8,7 @@ import os from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.pdb import PDBError, get_writer, FORMAT_WRITERS +from calibre.ebooks.pdb import PDBError, get_writer, ALL_FORMAT_WRITERS class PDBOutput(OutputFormatPlugin): @@ -19,9 +19,9 @@ class PDBOutput(OutputFormatPlugin): options = set([ OptionRecommendation(name='format', recommended_value='doc', level=OptionRecommendation.LOW, - short_switch='f', choices=FORMAT_WRITERS.keys(), + short_switch='f', choices=list(ALL_FORMAT_WRITERS), help=(_('Format to use inside the pdb container. Choices are:')+\ - ' %s' % FORMAT_WRITERS.keys())), + ' %s' % list(ALL_FORMAT_WRITERS))), OptionRecommendation(name='pdb_output_encoding', recommended_value='cp1252', level=OptionRecommendation.LOW, help=_('Specify the character encoding of the output document. ' \ diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/conversion/plugins/pdf_input.py similarity index 92% rename from src/calibre/ebooks/pdf/input.py rename to src/calibre/ebooks/conversion/plugins/pdf_input.py index 51f44ba502..be0150834b 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_input.py @@ -7,10 +7,6 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from calibre.ebooks.pdf.pdftohtml import pdftohtml -from calibre.ebooks.metadata.opf2 import OPFCreator -from calibre.constants import plugins -pdfreflow, pdfreflow_err = plugins['pdfreflow'] class PDFInput(InputFormatPlugin): @@ -31,6 +27,9 @@ class PDFInput(InputFormatPlugin): ]) def convert_new(self, stream, accelerators): + from calibre.constants import plugins + pdfreflow, pdfreflow_err = plugins['pdfreflow'] + from calibre.ebooks.pdf.reflow import PDFDocument from calibre.utils.cleantext import clean_ascii_chars if pdfreflow_err: @@ -43,6 +42,9 @@ class PDFInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.metadata.opf2 import OPFCreator + from calibre.ebooks.pdf.pdftohtml import pdftohtml + log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/conversion/plugins/pdf_output.py similarity index 86% rename from src/calibre/ebooks/pdf/output.py rename to src/calibre/ebooks/conversion/plugins/pdf_output.py index 14dd27368c..4422265976 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py @@ -13,10 +13,50 @@ import os from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory -from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \ - ORIENTATIONS + +UNITS = [ + 'millimeter', + 'point', + 'inch' , + 'pica' , + 'didot', + 'cicero', + 'devicepixel', + ] + +PAPER_SIZES = ['b2', + 'a9', + 'executive', + 'tabloid', + 'b4', + 'b5', + 'b6', + 'b7', + 'b0', + 'b1', + 'letter', + 'b3', + 'a7', + 'a8', + 'b8', + 'b9', + 'a3', + 'a1', + 'folio', + 'c5e', + 'dle', + 'a0', + 'ledger', + 'legal', + 'a6', + 'a2', + 'b10', + 'a5', + 'comm10e', + 'a4'] + +ORIENTATIONS = ['portrait', 'landscape'] class PDFOutput(OutputFormatPlugin): @@ -26,23 +66,23 @@ class PDFOutput(OutputFormatPlugin): options = set([ OptionRecommendation(name='unit', recommended_value='inch', - level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(), + level=OptionRecommendation.LOW, short_switch='u', choices=UNITS, help=_('The unit of measure. Default is inch. Choices ' 'are %s ' - 'Note: This does not override the unit for margins!') % UNITS.keys()), + 'Note: This does not override the unit for margins!') % UNITS), OptionRecommendation(name='paper_size', recommended_value='letter', - level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(), + level=OptionRecommendation.LOW, choices=PAPER_SIZES, help=_('The size of the paper. This size will be overridden when a ' 'non default output profile is used. Default is letter. Choices ' - 'are %s') % PAPER_SIZES.keys()), + 'are %s') % PAPER_SIZES), OptionRecommendation(name='custom_size', recommended_value=None, help=_('Custom size of the document. Use the form widthxheight ' 'EG. `123x321` to specify the width and height. ' 'This overrides any specified paper-size.')), OptionRecommendation(name='orientation', recommended_value='portrait', - level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(), + level=OptionRecommendation.LOW, choices=ORIENTATIONS, help=_('The orientation of the page. Default is portrait. Choices ' - 'are %s') % ORIENTATIONS.keys()), + 'are %s') % ORIENTATIONS), OptionRecommendation(name='preserve_cover_aspect_ratio', recommended_value=False, help=_('Preserve the aspect ratio of the cover, instead' @@ -105,6 +145,8 @@ class PDFOutput(OutputFormatPlugin): def convert_text(self, oeb_book): from calibre.ebooks.pdf.writer import PDFWriter + from calibre.ebooks.metadata.opf2 import OPF + self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/conversion/plugins/pml_input.py similarity index 96% rename from src/calibre/ebooks/pml/input.py rename to src/calibre/ebooks/conversion/plugins/pml_input.py index 4d59668b12..1351a5c492 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/conversion/plugins/pml_input.py @@ -11,9 +11,6 @@ import shutil from calibre.customize.conversion import InputFormatPlugin from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile -from calibre.ebooks.pml.pmlconverter import PML_HTMLizer -from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.metadata.opf2 import OPFCreator class PMLInput(InputFormatPlugin): @@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin): file_types = set(['pml', 'pmlz']) def process_pml(self, pml_path, html_path, close_all=False): + from calibre.ebooks.pml.pmlconverter import PML_HTMLizer + pclose = False hclose = False @@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.metadata.toc import TOC + from calibre.ebooks.metadata.opf2 import OPFCreator + self.options = options self.log = log pages, images = [], [] diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/conversion/plugins/pml_output.py similarity index 88% rename from src/calibre/ebooks/pml/output.py rename to src/calibre/ebooks/conversion/plugins/pml_output.py index 63d8a8b220..b406537a98 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/conversion/plugins/pml_output.py @@ -4,21 +4,11 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import os +import os, cStringIO -try: - from PIL import Image - Image -except ImportError: - import Image - -import cStringIO - -from calibre.customize.conversion import OutputFormatPlugin -from calibre.customize.conversion import OptionRecommendation +from calibre.customize.conversion import (OutputFormatPlugin, + OptionRecommendation) from calibre.ptempfile import TemporaryDirectory -from calibre.utils.zipfile import ZipFile -from calibre.ebooks.pml.pmlml import PMLMLizer class PMLOutput(OutputFormatPlugin): @@ -43,6 +33,9 @@ class PMLOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.pml.pmlml import PMLMLizer + from calibre.utils.zipfile import ZipFile + with TemporaryDirectory('_pmlz_output') as tdir: pmlmlizer = PMLMLizer(log) pml = unicode(pmlmlizer.extract_content(oeb_book, opts)) @@ -59,6 +52,13 @@ class PMLOutput(OutputFormatPlugin): pmlz.add_dir(tdir) def write_images(self, manifest, image_hrefs, out_dir, opts): + try: + from PIL import Image + Image + except ImportError: + import Image + + from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES for item in manifest: if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys(): diff --git a/src/calibre/ebooks/rb/input.py b/src/calibre/ebooks/conversion/plugins/rb_input.py similarity index 91% rename from src/calibre/ebooks/rb/input.py rename to src/calibre/ebooks/conversion/plugins/rb_input.py index 8b05c1d42e..6a6ca3205a 100644 --- a/src/calibre/ebooks/rb/input.py +++ b/src/calibre/ebooks/conversion/plugins/rb_input.py @@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en' import os -from calibre.ebooks.rb.reader import Reader from calibre.customize.conversion import InputFormatPlugin class RBInput(InputFormatPlugin): @@ -18,6 +17,8 @@ class RBInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.rb.reader import Reader + reader = Reader(stream, log, options.input_encoding) opf = reader.extract_content(os.getcwd()) diff --git a/src/calibre/ebooks/rb/output.py b/src/calibre/ebooks/conversion/plugins/rb_output.py similarity index 95% rename from src/calibre/ebooks/rb/output.py rename to src/calibre/ebooks/conversion/plugins/rb_output.py index a16e408b0f..992843719c 100644 --- a/src/calibre/ebooks/rb/output.py +++ b/src/calibre/ebooks/conversion/plugins/rb_output.py @@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation -from calibre.ebooks.rb.writer import RBWriter class RBOutput(OutputFormatPlugin): @@ -22,6 +21,8 @@ class RBOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.rb.writer import RBWriter + close = False if not hasattr(output_path, 'write'): close = True diff --git a/src/calibre/web/feeds/input.py b/src/calibre/ebooks/conversion/plugins/recipe_input.py similarity index 100% rename from src/calibre/web/feeds/input.py rename to src/calibre/ebooks/conversion/plugins/recipe_input.py diff --git a/src/calibre/ebooks/conversion/plugins/rtf_input.py b/src/calibre/ebooks/conversion/plugins/rtf_input.py new file mode 100644 index 0000000000..91c285c10c --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/rtf_input.py @@ -0,0 +1,298 @@ +from __future__ import with_statement +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +import os, glob, re, textwrap + +from calibre.customize.conversion import InputFormatPlugin + +border_style_map = { + 'single' : 'solid', + 'double-thickness-border' : 'double', + 'shadowed-border': 'outset', + 'double-border': 'double', + 'dotted-border': 'dotted', + 'dashed': 'dashed', + 'hairline': 'solid', + 'inset': 'inset', + 'dash-small': 'dashed', + 'dot-dash': 'dotted', + 'dot-dot-dash': 'dotted', + 'outset': 'outset', + 'tripple': 'double', + 'triple': 'double', + 'thick-thin-small': 'solid', + 'thin-thick-small': 'solid', + 'thin-thick-thin-small': 'solid', + 'thick-thin-medium': 'solid', + 'thin-thick-medium': 'solid', + 'thin-thick-thin-medium': 'solid', + 'thick-thin-large': 'solid', + 'thin-thick-thin-large': 'solid', + 'wavy': 'ridge', + 'double-wavy': 'ridge', + 'striped': 'ridge', + 'emboss': 'inset', + 'engrave': 'inset', + 'frame': 'ridge', +} + + +class RTFInput(InputFormatPlugin): + + name = 'RTF Input' + author = 'Kovid Goyal' + description = 'Convert RTF files to HTML' + file_types = set(['rtf']) + + def generate_xml(self, stream): + from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf + ofile = 'dataxml.xml' + run_lev, debug_dir, indent_out = 1, None, 0 + if getattr(self.opts, 'debug_pipeline', None) is not None: + try: + os.mkdir('rtfdebug') + debug_dir = 'rtfdebug' + run_lev = 4 + indent_out = 1 + self.log('Running RTFParser in debug mode') + except: + self.log.warn('Impossible to run RTFParser in debug mode') + parser = ParseRtf( + in_file = stream, + out_file = ofile, + # Convert symbol fonts to unicode equivalents. Default + # is 1 + convert_symbol = 1, + + # Convert Zapf fonts to unicode equivalents. Default + # is 1. + convert_zapf = 1, + + # Convert Wingding fonts to unicode equivalents. + # Default is 1. + convert_wingdings = 1, + + # Convert RTF caps to real caps. + # Default is 1. + convert_caps = 1, + + # Indent resulting XML. + # Default is 0 (no indent). + indent = indent_out, + + # Form lists from RTF. Default is 1. + form_lists = 1, + + # Convert headings to sections. Default is 0. + headings_to_sections = 1, + + # Group paragraphs with the same style name. Default is 1. + group_styles = 1, + + # Group borders. Default is 1. + group_borders = 1, + + # Write or do not write paragraphs. Default is 0. + empty_paragraphs = 1, + + #debug + deb_dir = debug_dir, + run_level = run_lev, + ) + parser.parse_rtf() + with open(ofile, 'rb') as f: + return f.read() + + def extract_images(self, picts): + import imghdr + self.log('Extracting images...') + + with open(picts, 'rb') as f: + raw = f.read() + picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) + hex = re.compile(r'[^a-fA-F0-9]') + encs = [hex.sub('', pict) for pict in picts] + + count = 0 + imap = {} + for enc in encs: + if len(enc) % 2 == 1: + enc = enc[:-1] + data = enc.decode('hex') + fmt = imghdr.what(None, data) + if fmt is None: + fmt = 'wmf' + count += 1 + name = '%04d.%s' % (count, fmt) + with open(name, 'wb') as f: + f.write(data) + imap[count] = name + # with open(name+'.hex', 'wb') as f: + # f.write(enc) + return self.convert_images(imap) + + def convert_images(self, imap): + self.default_img = None + for count, val in imap.iteritems(): + try: + imap[count] = self.convert_image(val) + except: + self.log.exception('Failed to convert', val) + return imap + + def convert_image(self, name): + if not name.endswith('.wmf'): + return name + try: + return self.rasterize_wmf(name) + except: + self.log.exception('Failed to convert WMF image %r'%name) + return self.replace_wmf(name) + + def replace_wmf(self, name): + from calibre.ebooks import calibre_cover + if self.default_img is None: + self.default_img = calibre_cover('Conversion of WMF images is not supported', + 'Use Microsoft Word or OpenOffice to save this RTF file' + ' as HTML and convert that in calibre.', title_size=36, + author_size=20) + name = name.replace('.wmf', '.jpg') + with open(name, 'wb') as f: + f.write(self.default_img) + return name + + def rasterize_wmf(self, name): + from calibre.utils.wmf.parse import wmf_unwrap + with open(name, 'rb') as f: + data = f.read() + data = wmf_unwrap(data) + name = name.replace('.wmf', '.png') + with open(name, 'wb') as f: + f.write(data) + return name + + + def write_inline_css(self, ic, border_styles): + font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in + enumerate(ic.font_sizes)] + color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in + enumerate(ic.colors)] + css = textwrap.dedent(''' + span.none { + text-decoration: none; font-weight: normal; + font-style: normal; font-variant: normal + } + + span.italics { font-style: italic } + + span.bold { font-weight: bold } + + span.small-caps { font-variant: small-caps } + + span.underlined { text-decoration: underline } + + span.strike-through { text-decoration: line-through } + + ''') + css += '\n'+'\n'.join(font_size_classes) + css += '\n' +'\n'.join(color_classes) + + for cls, val in border_styles.iteritems(): + css += '\n\n.%s {\n%s\n}'%(cls, val) + + with open('styles.css', 'ab') as f: + f.write(css) + + def convert_borders(self, doc): + border_styles = [] + style_map = {} + for elem in doc.xpath(r'//*[local-name()="cell"]'): + style = ['border-style: hidden', 'border-width: 1px', + 'border-color: black'] + for x in ('bottom', 'top', 'left', 'right'): + bs = elem.get('border-cell-%s-style'%x, None) + if bs: + cbs = border_style_map.get(bs, 'solid') + style.append('border-%s-style: %s'%(x, cbs)) + bw = elem.get('border-cell-%s-line-width'%x, None) + if bw: + style.append('border-%s-width: %spt'%(x, bw)) + bc = elem.get('border-cell-%s-color'%x, None) + if bc: + style.append('border-%s-color: %s'%(x, bc)) + style = ';\n'.join(style) + if style not in border_styles: + border_styles.append(style) + idx = border_styles.index(style) + cls = 'border_style%d'%idx + style_map[cls] = style + elem.set('class', cls) + return style_map + + def convert(self, stream, options, file_ext, log, + accelerators): + from lxml import etree + from calibre.ebooks.metadata.meta import get_metadata + from calibre.ebooks.metadata.opf2 import OPFCreator + from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException + from calibre.ebooks.rtf.input import InlineClass + self.opts = options + self.log = log + self.log('Converting RTF to XML...') + try: + xml = self.generate_xml(stream.name) + except RtfInvalidCodeException as e: + raise ValueError(_('This RTF file has a feature calibre does not ' + 'support. Convert it to HTML first and then try it.\n%s')%e) + + d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) + if d: + imap = {} + try: + imap = self.extract_images(d[0]) + except: + self.log.exception('Failed to extract images...') + + self.log('Parsing XML...') + parser = etree.XMLParser(recover=True, no_network=True) + doc = etree.fromstring(xml, parser=parser) + border_styles = self.convert_borders(doc) + for pict in doc.xpath('//rtf:pict[@num]', + namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}): + num = int(pict.get('num')) + name = imap.get(num, None) + if name is not None: + pict.set('num', name) + + self.log('Converting XML to HTML...') + inline_class = InlineClass(self.log) + styledoc = etree.fromstring(P('templates/rtf.xsl', data=True)) + extensions = { ('calibre', 'inline-class') : inline_class } + transform = etree.XSLT(styledoc, extensions=extensions) + result = transform(doc) + html = 'index.xhtml' + with open(html, 'wb') as f: + res = transform.tostring(result) + # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] + #clean multiple \n + res = re.sub('\n+', '\n', res) + # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines + # res = re.sub('\s*', '', res) + # res = re.sub('(?<=\n)\n{2}', + # u'

\u00a0

\n'.encode('utf-8'), res) + f.write(res) + self.write_inline_css(inline_class, border_styles) + stream.seek(0) + mi = get_metadata(stream, 'rtf') + if not mi.title: + mi.title = _('Unknown') + if not mi.authors: + mi.authors = [_('Unknown')] + opf = OPFCreator(os.getcwd(), mi) + opf.create_manifest([('index.xhtml', None)]) + opf.create_spine(['index.xhtml']) + opf.render(open('metadata.opf', 'wb')) + return os.path.abspath('metadata.opf') + + diff --git a/src/calibre/ebooks/rtf/output.py b/src/calibre/ebooks/conversion/plugins/rtf_output.py similarity index 94% rename from src/calibre/ebooks/rtf/output.py rename to src/calibre/ebooks/conversion/plugins/rtf_output.py index 5738b7e6f4..ae9e1ea566 100644 --- a/src/calibre/ebooks/rtf/output.py +++ b/src/calibre/ebooks/conversion/plugins/rtf_output.py @@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en' import os -from calibre.ebooks.rtf.rtfml import RTFMLizer from calibre.customize.conversion import OutputFormatPlugin class RTFOutput(OutputFormatPlugin): @@ -16,6 +15,8 @@ class RTFOutput(OutputFormatPlugin): file_type = 'rtf' def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.rtf.rtfml import RTFMLizer + rtfmlitzer = RTFMLizer(log) content = rtfmlitzer.extract_content(oeb_book, opts) diff --git a/src/calibre/ebooks/snb/input.py b/src/calibre/ebooks/conversion/plugins/snb_input.py similarity index 97% rename from src/calibre/ebooks/snb/input.py rename to src/calibre/ebooks/conversion/plugins/snb_input.py index 13b1ca45f9..ae3ab0033c 100755 --- a/src/calibre/ebooks/snb/input.py +++ b/src/calibre/ebooks/conversion/plugins/snb_input.py @@ -4,13 +4,11 @@ __license__ = 'GPL 3' __copyright__ = '2010, Li Fanxi ' __docformat__ = 'restructuredtext en' -import os, uuid +import os from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.snb.snbfile import SNBFile from calibre.ptempfile import TemporaryDirectory from calibre.utils.filenames import ascii_filename -from lxml import etree HTML_TEMPLATE = u'%s\n%s\n' @@ -29,7 +27,12 @@ class SNBInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + import uuid + from lxml import etree + from calibre.ebooks.oeb.base import DirContainer + from calibre.ebooks.snb.snbfile import SNBFile + log.debug("Parsing SNB file...") snbFile = SNBFile() try: diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/conversion/plugins/snb_output.py similarity index 98% rename from src/calibre/ebooks/snb/output.py rename to src/calibre/ebooks/conversion/plugins/snb_output.py index 07a0460c57..e9b8af0db6 100644 --- a/src/calibre/ebooks/snb/output.py +++ b/src/calibre/ebooks/conversion/plugins/snb_output.py @@ -6,12 +6,9 @@ __docformat__ = 'restructuredtext en' import os, string -from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation from calibre.ptempfile import TemporaryDirectory from calibre.constants import __appname__, __version__ -from calibre.ebooks.snb.snbfile import SNBFile -from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName class SNBOutput(OutputFormatPlugin): @@ -49,6 +46,11 @@ class SNBOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from lxml import etree + from calibre.ebooks.snb.snbfile import SNBFile + from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName + + self.opts = opts from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable try: diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/conversion/plugins/tcr_input.py similarity index 87% rename from src/calibre/ebooks/tcr/input.py rename to src/calibre/ebooks/conversion/plugins/tcr_input.py index 4d15fd0923..5ee34285bd 100644 --- a/src/calibre/ebooks/tcr/input.py +++ b/src/calibre/ebooks/conversion/plugins/tcr_input.py @@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en' from cStringIO import StringIO from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.compression.tcr import decompress class TCRInput(InputFormatPlugin): @@ -17,6 +16,8 @@ class TCRInput(InputFormatPlugin): file_types = set(['tcr']) def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.compression.tcr import decompress + log.info('Decompressing text...') raw_txt = decompress(stream) @@ -28,7 +29,7 @@ class TCRInput(InputFormatPlugin): txt_plugin = plugin_for_input_format('txt') for opt in txt_plugin.options: if not hasattr(self.options, opt.option.name): - setattr(self.options, opt.option.name, opt.recommended_value) + setattr(options, opt.option.name, opt.recommended_value) stream.seek(0) return txt_plugin.convert(stream, options, diff --git a/src/calibre/ebooks/tcr/output.py b/src/calibre/ebooks/conversion/plugins/tcr_output.py similarity index 93% rename from src/calibre/ebooks/tcr/output.py rename to src/calibre/ebooks/conversion/plugins/tcr_output.py index 97c9cae26c..f4dbcce57b 100644 --- a/src/calibre/ebooks/tcr/output.py +++ b/src/calibre/ebooks/conversion/plugins/tcr_output.py @@ -8,8 +8,6 @@ import os from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.txt.txtml import TXTMLizer -from calibre.ebooks.compression.tcr import compress class TCROutput(OutputFormatPlugin): @@ -25,6 +23,9 @@ class TCROutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.txt.txtml import TXTMLizer + from calibre.ebooks.compression.tcr import compress + close = False if not hasattr(output_path, 'write'): close = True diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py similarity index 94% rename from src/calibre/ebooks/txt/input.py rename to src/calibre/ebooks/conversion/plugins/txt_input.py index 49c8a2129d..e916b30c29 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/conversion/plugins/txt_input.py @@ -8,14 +8,6 @@ import os from calibre import _ent_pat, walk, xml_entity_to_unicode from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator -from calibre.ebooks.chardet import detect -from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ - separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ - preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \ - separate_hard_scene_breaks -from calibre.utils.zipfile import ZipFile class TXTInput(InputFormatPlugin): @@ -61,6 +53,17 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator + from calibre.ebooks.chardet import detect + from calibre.utils.zipfile import ZipFile + from calibre.ebooks.txt.processor import (convert_basic, + convert_markdown, separate_paragraphs_single_line, + separate_paragraphs_print_formatted, preserve_spaces, + detect_paragraph_type, detect_formatting_type, + normalize_line_endings, convert_textile, remove_indents, + block_to_single_line, separate_hard_scene_breaks) + + self.log = log txt = '' log.debug('Reading text from file...') diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/conversion/plugins/txt_output.py similarity index 93% rename from src/calibre/ebooks/txt/output.py rename to src/calibre/ebooks/conversion/plugins/txt_output.py index d9c42eb1dc..6cd4c3f801 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/conversion/plugins/txt_output.py @@ -7,15 +7,12 @@ __docformat__ = 'restructuredtext en' import os import shutil -from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.txt.txtml import TXTMLizer -from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines from calibre.ptempfile import TemporaryDirectory, TemporaryFile -from calibre.utils.cleantext import clean_ascii_chars -from calibre.utils.zipfile import ZipFile + +NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows'] class TXTOutput(OutputFormatPlugin): @@ -26,11 +23,11 @@ class TXTOutput(OutputFormatPlugin): options = set([ OptionRecommendation(name='newline', recommended_value='system', level=OptionRecommendation.LOW, - short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), + short_switch='n', choices=NEWLINE_TYPES, help=_('Type of newline to use. Options are %s. Default is \'system\'. ' 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' - 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())), + 'type used by this OS.') % sorted(NEWLINE_TYPES)), OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8', level=OptionRecommendation.LOW, help=_('Specify the character encoding of the output document. ' \ @@ -76,6 +73,11 @@ class TXTOutput(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.txt.txtml import TXTMLizer + from calibre.utils.cleantext import clean_ascii_chars + from calibre.ebooks.txt.newlines import specified_newlines, TxtNewlines + + if opts.txt_output_formatting.lower() == 'markdown': from calibre.ebooks.txt.markdownml import MarkdownMLizer self.writer = MarkdownMLizer(log) @@ -116,6 +118,9 @@ class TXTZOutput(TXTOutput): def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.oeb.base import OEB_IMAGES + from calibre.utils.zipfile import ZipFile + from lxml import etree + with TemporaryDirectory('_txtz_output') as tdir: # TXT txt_name = 'index.txt' diff --git a/src/calibre/ebooks/epub/fix/epubcheck.py b/src/calibre/ebooks/epub/fix/epubcheck.py index 9e812e1cf4..0029868c23 100644 --- a/src/calibre/ebooks/epub/fix/epubcheck.py +++ b/src/calibre/ebooks/epub/fix/epubcheck.py @@ -6,7 +6,6 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub -from calibre.utils.date import parse_date, strptime class Epubcheck(ePubFixer): @@ -35,6 +34,8 @@ class Epubcheck(ePubFixer): return 'epubcheck' def fix_pubdates(self): + from calibre.utils.date import parse_date, strptime + dirtied = False opf = self.container.opf for dcdate in opf.xpath('//dc:date', diff --git a/src/calibre/ebooks/html/__init__.py b/src/calibre/ebooks/html/__init__.py index d026256ee8..00afa6d6b6 100644 --- a/src/calibre/ebooks/html/__init__.py +++ b/src/calibre/ebooks/html/__init__.py @@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en' import re -from lxml.etree import tostring as _tostring def tostring(root, strip_comments=False, pretty_print=False): ''' Serialize processed XHTML. ''' + from lxml.etree import tostring as _tostring + root.set('xmlns', 'http://www.w3.org/1999/xhtml') root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink') for x in root.iter(): diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index d303dd66a5..6cacb34edc 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -11,19 +11,13 @@ __docformat__ = 'restructuredtext en' Input plugin for HTML or OPF ebooks. ''' -import os, re, sys, uuid, tempfile, errno as gerrno +import os, re, sys, errno as gerrno from urlparse import urlparse, urlunparse -from urllib import unquote, quote -from functools import partial -from itertools import izip +from urllib import unquote -from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.chardet import detect_xml_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.constants import islinux, isbsd, iswindows +from calibre.constants import iswindows from calibre import unicode_path, as_unicode -from calibre.utils.localization import get_lang -from calibre.utils.filenames import ascii_filename class Link(object): ''' @@ -241,262 +235,4 @@ def get_filelist(htmlfile, dir, opts, log): return filelist -class HTMLInput(InputFormatPlugin): - name = 'HTML Input' - author = 'Kovid Goyal' - description = 'Convert HTML and OPF files to an OEB' - file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml']) - - options = set([ - OptionRecommendation(name='breadth_first', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Traverse links in HTML files breadth first. Normally, ' - 'they are traversed depth first.' - ) - ), - - OptionRecommendation(name='max_levels', - recommended_value=5, level=OptionRecommendation.LOW, - help=_('Maximum levels of recursion when following links in ' - 'HTML files. Must be non-negative. 0 implies that no ' - 'links in the root HTML file are followed. Default is ' - '%default.' - ) - ), - - OptionRecommendation(name='dont_package', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Normally this input plugin re-arranges all the input ' - 'files into a standard folder hierarchy. Only use this option ' - 'if you know what you are doing as it can result in various ' - 'nasty side effects in the rest of the conversion pipeline.' - ) - ), - - ]) - - def convert(self, stream, opts, file_ext, log, - accelerators): - self._is_case_sensitive = None - basedir = os.getcwd() - self.opts = opts - - fname = None - if hasattr(stream, 'name'): - basedir = os.path.dirname(stream.name) - fname = os.path.basename(stream.name) - - if file_ext != 'opf': - if opts.dont_package: - raise ValueError('The --dont-package option is not supported for an HTML input file') - from calibre.ebooks.metadata.html import get_metadata - mi = get_metadata(stream) - if fname: - from calibre.ebooks.metadata.meta import metadata_from_filename - fmi = metadata_from_filename(fname) - fmi.smart_update(mi) - mi = fmi - oeb = self.create_oebbook(stream.name, basedir, opts, log, mi) - return oeb - - from calibre.ebooks.conversion.plumber import create_oebbook - return create_oebbook(log, stream.name, opts, - encoding=opts.input_encoding) - - def is_case_sensitive(self, path): - if getattr(self, '_is_case_sensitive', None) is not None: - return self._is_case_sensitive - if not path or not os.path.exists(path): - return islinux or isbsd - self._is_case_sensitive = not (os.path.exists(path.lower()) \ - and os.path.exists(path.upper())) - return self._is_case_sensitive - - def create_oebbook(self, htmlpath, basedir, opts, log, mi): - from calibre.ebooks.conversion.plumber import create_oebbook - from calibre.ebooks.oeb.base import (DirContainer, - rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, - xpath) - from calibre import guess_type - from calibre.ebooks.oeb.transforms.metadata import \ - meta_info_to_oeb_metadata - import cssutils, logging - cssutils.log.setLevel(logging.WARN) - self.OEB_STYLES = OEB_STYLES - oeb = create_oebbook(log, None, opts, self, - encoding=opts.input_encoding, populate=False) - self.oeb = oeb - - metadata = oeb.metadata - meta_info_to_oeb_metadata(mi, metadata, log) - if not metadata.language: - oeb.logger.warn(u'Language not specified') - metadata.add('language', get_lang().replace('_', '-')) - if not metadata.creator: - oeb.logger.warn('Creator not specified') - metadata.add('creator', self.oeb.translate(__('Unknown'))) - if not metadata.title: - oeb.logger.warn('Title not specified') - metadata.add('title', self.oeb.translate(__('Unknown'))) - bookid = str(uuid.uuid4()) - metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') - for ident in metadata.identifier: - if 'id' in ident.attrib: - self.oeb.uid = metadata.identifier[0] - break - - filelist = get_filelist(htmlpath, basedir, opts, log) - filelist = [f for f in filelist if not f.is_binary] - htmlfile_map = {} - for f in filelist: - path = f.path - oeb.container = DirContainer(os.path.dirname(path), log, - ignore_opf=True) - bname = os.path.basename(path) - id, href = oeb.manifest.generate(id='html', - href=ascii_filename(bname)) - htmlfile_map[path] = href - item = oeb.manifest.add(id, href, 'text/html') - item.html_input_href = bname - oeb.spine.add(item, True) - - self.added_resources = {} - self.log = log - self.log('Normalizing filename cases') - for path, href in htmlfile_map.items(): - if not self.is_case_sensitive(path): - path = path.lower() - self.added_resources[path] = href - self.urlnormalize, self.DirContainer = urlnormalize, DirContainer - self.urldefrag = urldefrag - self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME - - self.log('Rewriting HTML links') - for f in filelist: - path = f.path - dpath = os.path.dirname(path) - oeb.container = DirContainer(dpath, log, ignore_opf=True) - item = oeb.manifest.hrefs[htmlfile_map[path]] - rewrite_links(item.data, partial(self.resource_adder, base=dpath)) - - for item in oeb.manifest.values(): - if item.media_type in self.OEB_STYLES: - dpath = None - for path, href in self.added_resources.items(): - if href == item.href: - dpath = os.path.dirname(path) - break - cssutils.replaceUrls(item.data, - partial(self.resource_adder, base=dpath)) - - toc = self.oeb.toc - self.oeb.auto_generated_toc = True - titles = [] - headers = [] - for item in self.oeb.spine: - if not item.linear: continue - html = item.data - title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) - title = re.sub(r'\s+', ' ', title.strip()) - if title: - titles.append(title) - headers.append('(unlabled)') - for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'): - expr = '/h:html/h:body//h:%s[position()=1]/text()' - header = ''.join(xpath(html, expr % tag)) - header = re.sub(r'\s+', ' ', header.strip()) - if header: - headers[-1] = header - break - use = titles - if len(titles) > len(set(titles)): - use = headers - for title, item in izip(use, self.oeb.spine): - if not item.linear: continue - toc.add(title, item.href) - - oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True) - return oeb - - def link_to_local_path(self, link_, base=None): - if not isinstance(link_, unicode): - try: - link_ = link_.decode('utf-8', 'error') - except: - self.log.warn('Failed to decode link %r. Ignoring'%link_) - return None, None - try: - l = Link(link_, base if base else os.getcwdu()) - except: - self.log.exception('Failed to process link: %r'%link_) - return None, None - if l.path is None: - # Not a local resource - return None, None - link = l.path.replace('/', os.sep).strip() - frag = l.fragment - if not link: - return None, None - return link, frag - - def resource_adder(self, link_, base=None): - link, frag = self.link_to_local_path(link_, base=base) - if link is None: - return link_ - try: - if base and not os.path.isabs(link): - link = os.path.join(base, link) - link = os.path.abspath(link) - except: - return link_ - if not os.access(link, os.R_OK): - return link_ - if os.path.isdir(link): - self.log.warn(link_, 'is a link to a directory. Ignoring.') - return link_ - if not self.is_case_sensitive(tempfile.gettempdir()): - link = link.lower() - if link not in self.added_resources: - bhref = os.path.basename(link) - id, href = self.oeb.manifest.generate(id='added', - href=bhref) - guessed = self.guess_type(href)[0] - media_type = guessed or self.BINARY_MIME - if media_type == 'text/plain': - self.log.warn('Ignoring link to text file %r'%link_) - return None - - self.oeb.log.debug('Added', link) - self.oeb.container = self.DirContainer(os.path.dirname(link), - self.oeb.log, ignore_opf=True) - # Load into memory - item = self.oeb.manifest.add(id, href, media_type) - # bhref refers to an already existing file. The read() method of - # DirContainer will call unquote on it before trying to read the - # file, therefore we quote it here. - if isinstance(bhref, unicode): - bhref = bhref.encode('utf-8') - item.html_input_href = quote(bhref).decode('utf-8') - if guessed in self.OEB_STYLES: - item.override_css_fetch = partial( - self.css_import_handler, os.path.dirname(link)) - item.data - self.added_resources[link] = href - - nlink = self.added_resources[link] - if frag: - nlink = '#'.join((nlink, frag)) - return nlink - - def css_import_handler(self, base, href): - link, frag = self.link_to_local_path(href, base=base) - if link is None or not os.access(link, os.R_OK) or os.path.isdir(link): - return (None, None) - try: - raw = open(link, 'rb').read().decode('utf-8', 'replace') - raw = self.oeb.css_preprocessor(raw, add_namespace=True) - except: - self.log.exception('Failed to read CSS file: %r'%link) - return (None, None) - return (None, raw) diff --git a/src/calibre/ebooks/lrf/__init__.py b/src/calibre/ebooks/lrf/__init__.py index e4a18a1f91..b12c0d6b34 100644 --- a/src/calibre/ebooks/lrf/__init__.py +++ b/src/calibre/ebooks/lrf/__init__.py @@ -4,7 +4,6 @@ __copyright__ = '2008, Kovid Goyal ' This package contains logic to read and write LRF files. The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}. """ -from uuid import uuid4 from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \ @@ -60,6 +59,7 @@ def find_custom_fonts(options, logger): def Book(options, logger, font_delta=0, header=None, profile=PRS500_PROFILE, **settings): + from uuid import uuid4 ps = {} ps['topmargin'] = options.top_margin ps['evensidemargin'] = options.left_margin diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py index 9777a8a998..e9bf42c6bd 100644 --- a/src/calibre/ebooks/lrf/input.py +++ b/src/calibre/ebooks/lrf/input.py @@ -6,12 +6,11 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, textwrap, sys, operator +import textwrap, operator from copy import deepcopy, copy from lxml import etree -from calibre.customize.conversion import InputFormatPlugin from calibre import guess_type class Canvas(etree.XSLTExtension): @@ -406,76 +405,4 @@ class Styles(etree.XSLTExtension): -class LRFInput(InputFormatPlugin): - name = 'LRF Input' - author = 'Kovid Goyal' - description = 'Convert LRF files to HTML' - file_types = set(['lrf']) - - def convert(self, stream, options, file_ext, log, - accelerators): - self.log = log - self.log('Generating XML') - from calibre.ebooks.lrf.lrfparser import LRFDocument - d = LRFDocument(stream) - d.parse() - xml = d.to_xml(write_files=True) - if options.verbose > 2: - open('lrs.xml', 'wb').write(xml.encode('utf-8')) - parser = etree.XMLParser(no_network=True, huge_tree=True) - try: - doc = etree.fromstring(xml, parser=parser) - except: - self.log.warn('Failed to parse XML. Trying to recover') - parser = etree.XMLParser(no_network=True, huge_tree=True, - recover=True) - doc = etree.fromstring(xml, parser=parser) - - - char_button_map = {} - for x in doc.xpath('//CharButton[@refobj]'): - ro = x.get('refobj') - jump_button = doc.xpath('//*[@objid="%s"]'%ro) - if jump_button: - jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]') - if jump_to: - char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'), - jump_to[0].get('refobj')) - plot_map = {} - for x in doc.xpath('//Plot[@refobj]'): - ro = x.get('refobj') - image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro) - if image: - imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'% - image[0].get('refstream')) - if imgstr: - plot_map[ro] = imgstr[0].get('file') - - self.log('Converting XML to HTML...') - styledoc = etree.fromstring(P('templates/lrf.xsl', data=True)) - media_type = MediaType() - styles = Styles() - text_block = TextBlock(styles, char_button_map, plot_map, log) - canvas = Canvas(doc, styles, text_block, log) - image_block = ImageBlock(canvas) - ruled_line = RuledLine() - extensions = { - ('calibre', 'media-type') : media_type, - ('calibre', 'text-block') : text_block, - ('calibre', 'ruled-line') : ruled_line, - ('calibre', 'styles') : styles, - ('calibre', 'canvas') : canvas, - ('calibre', 'image-block'): image_block, - } - transform = etree.XSLT(styledoc, extensions=extensions) - try: - result = transform(doc) - except RuntimeError: - sys.setrecursionlimit(5000) - result = transform(doc) - - with open('content.opf', 'wb') as f: - f.write(result) - styles.write() - return os.path.abspath('content.opf') diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 286bcee9d0..0312a7db6a 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -710,7 +710,7 @@ class Metadata(object): fmt('Title sort', self.title_sort) if self.authors: fmt('Author(s)', authors_to_string(self.authors) + \ - ((' [' + self.author_sort + ']') + ((' [' + self.author_sort + ']') if self.author_sort and self.author_sort != _('Unknown') else '')) if self.publisher: fmt('Publisher', self.publisher) diff --git a/src/calibre/ebooks/metadata/book/json_codec.py b/src/calibre/ebooks/metadata/book/json_codec.py index a14e18569a..c0c3900a5d 100644 --- a/src/calibre/ebooks/metadata/book/json_codec.py +++ b/src/calibre/ebooks/metadata/book/json_codec.py @@ -12,7 +12,6 @@ from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS from calibre.constants import filesystem_encoding, preferred_encoding from calibre.library.field_metadata import FieldMetadata from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz -from calibre.utils.magick import Image from calibre import isbytestring # Translate datetimes to and from strings. The string form is the datetime in @@ -37,6 +36,8 @@ def encode_thumbnail(thumbnail): ''' Encode the image part of a thumbnail, then return the 3 part tuple ''' + from calibre.utils.magick import Image + if thumbnail is None: return None if not isinstance(thumbnail, (tuple, list)): diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 30fe53f1a2..477b805ba0 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -129,9 +129,57 @@ class OCFDirReader(OCFReader): def open(self, path, *args, **kwargs): return open(os.path.join(self.root, path), *args, **kwargs) -def get_cover(opf, opf_path, stream, reader=None): +def render_cover(opf, opf_path, zf, reader=None): from calibre.ebooks import render_html_svg_workaround from calibre.utils.logging import default_log + + cpage = opf.first_spine_item() + if not cpage: + return + if reader is not None and reader.encryption_meta.is_encrypted(cpage): + return + + with TemporaryDirectory('_epub_meta') as tdir: + with CurrentDir(tdir): + zf.extractall() + opf_path = opf_path.replace('/', os.sep) + cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) + if not os.path.exists(cpage): + return + + if isosx: + # On OS X trying to render a HTML cover which uses embedded + # fonts more than once in the same process causes a crash in Qt + # so be safe and remove the fonts as well as any @font-face + # rules + for f in walk('.'): + if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'): + os.remove(f) + ffpat = re.compile(br'@font-face.*?{.*?}', + re.DOTALL|re.IGNORECASE) + with open(cpage, 'r+b') as f: + raw = f.read() + f.truncate(0) + raw = ffpat.sub(b'', raw) + f.write(raw) + from calibre.ebooks.chardet import xml_to_unicode + raw = xml_to_unicode(raw, + strip_encoding_pats=True, resolve_entities=True)[0] + from lxml import html + for link in html.fromstring(raw).xpath('//link'): + href = link.get('href', '') + if href: + path = os.path.join(os.path.dirname(cpage), href) + if os.path.exists(path): + with open(path, 'r+b') as f: + raw = f.read() + f.truncate(0) + raw = ffpat.sub(b'', raw) + f.write(raw) + + return render_html_svg_workaround(cpage, default_log) + +def get_cover(opf, opf_path, stream, reader=None): raster_cover = opf.raster_cover stream.seek(0) zf = ZipFile(stream) @@ -152,27 +200,7 @@ def get_cover(opf, opf_path, stream, reader=None): zf.close() return data - cpage = opf.first_spine_item() - if not cpage: - return - if reader is not None and reader.encryption_meta.is_encrypted(cpage): - return - - with TemporaryDirectory('_epub_meta') as tdir: - with CurrentDir(tdir): - zf.extractall() - if isosx: - # On OS X trying to render an HTML cover which uses embedded - # fonts more than once in the same process causes a crash in Qt - # so be safe and remove the fonts. - for f in walk('.'): - if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'): - os.remove(f) - opf_path = opf_path.replace('/', os.sep) - cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) - if not os.path.exists(cpage): - return - return render_html_svg_workaround(cpage, default_log) + return render_cover(opf, opf_path, zf, reader=reader) def get_metadata(stream, extract_cover=True): """ Return metadata as a :class:`Metadata` object """ diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index 26b0b6c59b..73ba7e77f4 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -222,6 +222,11 @@ def forked_read_metadata(path, tdir): from calibre.ebooks.metadata.opf2 import metadata_to_opf with open(path, 'rb') as f: fmt = os.path.splitext(path)[1][1:].lower() + f.seek(0, 2) + sz = f.tell() + with open(os.path.join(tdir, 'size.txt'), 'wb') as s: + s.write(str(sz).encode('ascii')) + f.seek(0) mi = get_metadata(f, fmt) if mi.cover_data and mi.cover_data[1]: with open(os.path.join(tdir, 'cover.jpg'), 'wb') as f: diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 9b8ae12b10..8d37e95dc4 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -1019,6 +1019,11 @@ class OPF(object): # {{{ mt = item.get('media-type', '') if 'xml' not in mt: return item.get('href', None) + for item in self.itermanifest(): + if item.get('href', None) == cover_id: + mt = item.get('media-type', '') + if mt.startswith('image/'): + return item.get('href', None) @dynamic_property def cover(self): diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index cae31abe09..3d08b96c5f 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -12,19 +12,14 @@ from urllib import urlencode from threading import Thread from Queue import Queue, Empty -from lxml.html import tostring from calibre import as_unicode from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase, fixauthors) -from calibre.utils.cleantext import clean_ascii_chars -from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata.book.base import Metadata -from calibre.library.comments import sanitize_comments_html from calibre.utils.date import parse_date from calibre.utils.localization import canonicalize_lang -from calibre.utils.soupparser import fromstring class Worker(Thread): # Get details {{{ @@ -43,6 +38,8 @@ class Worker(Thread): # Get details {{{ self.browser = browser.clone_browser() self.cover_url = self.amazon_id = self.isbn = None self.domain = domain + from lxml.html import tostring + self.tostring = tostring months = { 'de': { @@ -176,6 +173,10 @@ class Worker(Thread): # Get details {{{ self.log.exception('get_details failed for url: %r'%self.url) def get_details(self): + from calibre.utils.cleantext import clean_ascii_chars + from calibre.utils.soupparser import fromstring + from calibre.ebooks.chardet import xml_to_unicode + try: raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip() except Exception as e: @@ -210,7 +211,7 @@ class Worker(Thread): # Get details {{{ errmsg = root.xpath('//*[@id="errorMessage"]') if errmsg: msg = 'Failed to parse amazon details page: %r'%self.url - msg += tostring(errmsg, method='text', encoding=unicode).strip() + msg += self.tostring(errmsg, method='text', encoding=unicode).strip() self.log.error(msg) return @@ -322,10 +323,10 @@ class Worker(Thread): # Get details {{{ tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0] actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') if actual_title: - title = tostring(actual_title[0], encoding=unicode, + title = self.tostring(actual_title[0], encoding=unicode, method='text').strip() else: - title = tostring(tdiv, encoding=unicode, method='text').strip() + title = self.tostring(tdiv, encoding=unicode, method='text').strip() return re.sub(r'[(\[].*[)\]]', '', title).strip() def parse_authors(self, root): @@ -337,7 +338,7 @@ class Worker(Thread): # Get details {{{ ''') for x in aname: x.tail = '' - authors = [tostring(x, encoding=unicode, method='text').strip() for x + authors = [self.tostring(x, encoding=unicode, method='text').strip() for x in aname] authors = [a for a in authors if a] return authors @@ -356,6 +357,8 @@ class Worker(Thread): # Get details {{{ return float(m.group(1))/float(m.group(3)) * 5 def parse_comments(self, root): + from calibre.library.comments import sanitize_comments_html + desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]') if desc: desc = desc[0] @@ -365,7 +368,7 @@ class Worker(Thread): # Get details {{{ for a in desc.xpath('descendant::a[@href]'): del a.attrib['href'] a.tag = 'span' - desc = tostring(desc, method='html', encoding=unicode).strip() + desc = self.tostring(desc, method='html', encoding=unicode).strip() # Encoding bug in Amazon data U+fffd (replacement char) # in some examples it is present in place of ' @@ -602,6 +605,11 @@ class Amazon(Source): Note this method will retry without identifiers automatically if no match is found with identifiers. ''' + from lxml.html import tostring + from calibre.utils.cleantext import clean_ascii_chars + from calibre.utils.soupparser import fromstring + from calibre.ebooks.chardet import xml_to_unicode + query, domain = self.create_query(log, title=title, authors=authors, identifiers=identifiers) if query is None: diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 9ae8902671..4c334f4e46 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -12,7 +12,6 @@ from future_builtins import map from calibre import browser, random_user_agent from calibre.customize import Plugin -from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.config import JSONConfig from calibre.utils.titlecase import titlecase from calibre.utils.icu import capitalize, lower, upper @@ -34,6 +33,7 @@ msprefs.defaults['fewer_tags'] = True msprefs.defaults['cover_priorities'] = {'Google':2} def create_log(ostream=None): + from calibre.utils.logging import ThreadSafeLog, FileStream log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) log.outputs = [FileStream(ostream)] return log diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py index 06e874e8ca..6857d62d4d 100644 --- a/src/calibre/ebooks/metadata/sources/douban.py +++ b/src/calibre/ebooks/metadata/sources/douban.py @@ -12,14 +12,10 @@ from urllib import urlencode from functools import partial from Queue import Queue, Empty -from lxml import etree from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.book.base import Metadata -from calibre.ebooks.chardet import xml_to_unicode -from calibre.utils.date import parse_date, utcnow -from calibre.utils.cleantext import clean_ascii_chars from calibre import as_unicode NAMESPACES = { @@ -28,22 +24,6 @@ NAMESPACES = { 'db': 'http://www.douban.com/xmlns/', 'gd': 'http://schemas.google.com/g/2005' } -XPath = partial(etree.XPath, namespaces=NAMESPACES) -total_results = XPath('//openSearch:totalResults') -start_index = XPath('//openSearch:startIndex') -items_per_page = XPath('//openSearch:itemsPerPage') -entry = XPath('//atom:entry') -entry_id = XPath('descendant::atom:id') -title = XPath('descendant::atom:title') -description = XPath('descendant::atom:summary') -publisher = XPath("descendant::db:attribute[@name='publisher']") -isbn = XPath("descendant::db:attribute[@name='isbn13']") -date = XPath("descendant::db:attribute[@name='pubdate']") -creator = XPath("descendant::db:attribute[@name='author']") -booktag = XPath("descendant::db:tag/attribute::name") -rating = XPath("descendant::gd:rating/attribute::average") -cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href") - def get_details(browser, url, timeout): # {{{ try: if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '': @@ -61,6 +41,25 @@ def get_details(browser, url, timeout): # {{{ # }}} def to_metadata(browser, log, entry_, timeout): # {{{ + from lxml import etree + from calibre.ebooks.chardet import xml_to_unicode + from calibre.utils.date import parse_date, utcnow + from calibre.utils.cleantext import clean_ascii_chars + + XPath = partial(etree.XPath, namespaces=NAMESPACES) + entry = XPath('//atom:entry') + entry_id = XPath('descendant::atom:id') + title = XPath('descendant::atom:title') + description = XPath('descendant::atom:summary') + publisher = XPath("descendant::db:attribute[@name='publisher']") + isbn = XPath("descendant::db:attribute[@name='isbn13']") + date = XPath("descendant::db:attribute[@name='pubdate']") + creator = XPath("descendant::db:attribute[@name='author']") + booktag = XPath("descendant::db:tag/attribute::name") + rating = XPath("descendant::gd:rating/attribute::average") + cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href") + + def get_text(extra, x): try: ans = x(extra) @@ -275,6 +274,7 @@ class Douban(Source): def get_all_details(self, br, log, entries, abort, # {{{ result_queue, timeout): + from lxml import etree for relevance, i in enumerate(entries): try: ans = to_metadata(br, log, i, timeout) @@ -298,6 +298,13 @@ class Douban(Source): def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=30): + from lxml import etree + from calibre.ebooks.chardet import xml_to_unicode + from calibre.utils.cleantext import clean_ascii_chars + + XPath = partial(etree.XPath, namespaces=NAMESPACES) + entry = XPath('//atom:entry') + query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) if not query: diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index f9c43d86cc..3962afcb5e 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -12,8 +12,6 @@ from urllib import urlencode from functools import partial from Queue import Queue, Empty -from lxml import etree - from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.book.base import Metadata @@ -29,23 +27,6 @@ NAMESPACES = { 'dc' : 'http://purl.org/dc/terms', 'gd' : 'http://schemas.google.com/g/2005' } -XPath = partial(etree.XPath, namespaces=NAMESPACES) - -total_results = XPath('//openSearch:totalResults') -start_index = XPath('//openSearch:startIndex') -items_per_page = XPath('//openSearch:itemsPerPage') -entry = XPath('//atom:entry') -entry_id = XPath('descendant::atom:id') -creator = XPath('descendant::dc:creator') -identifier = XPath('descendant::dc:identifier') -title = XPath('descendant::dc:title') -date = XPath('descendant::dc:date') -publisher = XPath('descendant::dc:publisher') -subject = XPath('descendant::dc:subject') -description = XPath('descendant::dc:description') -language = XPath('descendant::dc:language') -rating = XPath('descendant::gd:rating[@average]') - def get_details(browser, url, timeout): # {{{ try: raw = browser.open_novisit(url, timeout=timeout).read() @@ -61,6 +42,24 @@ def get_details(browser, url, timeout): # {{{ # }}} def to_metadata(browser, log, entry_, timeout): # {{{ + from lxml import etree + XPath = partial(etree.XPath, namespaces=NAMESPACES) + + # total_results = XPath('//openSearch:totalResults') + # start_index = XPath('//openSearch:startIndex') + # items_per_page = XPath('//openSearch:itemsPerPage') + entry = XPath('//atom:entry') + entry_id = XPath('descendant::atom:id') + creator = XPath('descendant::dc:creator') + identifier = XPath('descendant::dc:identifier') + title = XPath('descendant::dc:title') + date = XPath('descendant::dc:date') + publisher = XPath('descendant::dc:publisher') + subject = XPath('descendant::dc:subject') + description = XPath('descendant::dc:description') + language = XPath('descendant::dc:language') + rating = XPath('descendant::gd:rating[@average]') + def get_text(extra, x): try: @@ -266,6 +265,7 @@ class GoogleBooks(Source): def get_all_details(self, br, log, entries, abort, # {{{ result_queue, timeout): + from lxml import etree for relevance, i in enumerate(entries): try: ans = to_metadata(br, log, i, timeout) @@ -289,6 +289,10 @@ class GoogleBooks(Source): def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=30): + from lxml import etree + XPath = partial(etree.XPath, namespaces=NAMESPACES) + entry = XPath('//atom:entry') + query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) if not query: diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index 1da7f906bb..7e15ad275e 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -9,12 +9,9 @@ __docformat__ = 'restructuredtext en' from urllib import quote -from lxml import etree from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source, Option -from calibre.ebooks.chardet import xml_to_unicode -from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.icu import lower from calibre.ebooks.metadata.book.base import Metadata @@ -122,6 +119,7 @@ class ISBNDB(Source): result_queue.put(result) def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers): + from lxml import etree def tostring(x): if x is None: @@ -198,6 +196,10 @@ class ISBNDB(Source): def make_query(self, q, abort, title=None, authors=None, identifiers={}, max_pages=10, timeout=30): + from lxml import etree + from calibre.ebooks.chardet import xml_to_unicode + from calibre.utils.cleantext import clean_ascii_chars + page_num = 1 parser = etree.XMLParser(recover=True, no_network=True) br = self.browser diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index 1164567ff5..bb1bbb9d42 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -9,18 +9,14 @@ __docformat__ = 'restructuredtext en' ''' Fetch metadata using Overdrive Content Reserve ''' -import re, random, mechanize, copy, json +import re, random, copy, json from threading import RLock from Queue import Queue, Empty -from lxml import html from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source, Option from calibre.ebooks.metadata.book.base import Metadata -from calibre.ebooks.chardet import xml_to_unicode -from calibre.library.comments import sanitize_comments_html -from calibre.utils.soupparser import fromstring ovrdrv_data_cache = {} cache_lock = RLock() @@ -80,6 +76,7 @@ class OverDrive(Source): def download_cover(self, log, result_queue, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30): + import mechanize cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') @@ -170,6 +167,7 @@ class OverDrive(Source): this page attempts to set a cookie that Mechanize doesn't like copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar ''' + import mechanize goodcookies = br._ua_handlers['_cookies'].cookiejar clean_cj = mechanize.CookieJar() cookies_to_copy = [] @@ -187,6 +185,7 @@ class OverDrive(Source): br.set_cookiejar(clean_cj) def overdrive_search(self, br, log, q, title, author): + import mechanize # re-initialize the cookiejar to so that it's clean clean_cj = mechanize.CookieJar() br.set_cookiejar(clean_cj) @@ -303,6 +302,7 @@ class OverDrive(Source): return '' def overdrive_get_record(self, br, log, q, ovrdrv_id): + import mechanize search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}' results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc' @@ -393,6 +393,11 @@ class OverDrive(Source): def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log): + from lxml import html + from calibre.ebooks.chardet import xml_to_unicode + from calibre.utils.soupparser import fromstring + from calibre.library.comments import sanitize_comments_html + try: raw = br.open_novisit(metadata_url).read() except Exception, e: diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py index de45e0b8db..d40e43d582 100644 --- a/src/calibre/ebooks/metadata/sources/ozon.py +++ b/src/calibre/ebooks/metadata/sources/ozon.py @@ -6,15 +6,11 @@ __copyright__ = '2011, Roman Mukhin ' __docformat__ = 'restructuredtext en' import re -import urllib2 import datetime from urllib import quote_plus from Queue import Queue, Empty -from lxml import etree, html + from calibre import as_unicode - -from calibre.ebooks.chardet import xml_to_unicode - from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.book.base import Metadata @@ -43,6 +39,7 @@ class Ozon(Source): isbnRegex = re.compile(isbnPattern) def get_book_url(self, identifiers): # {{{ + import urllib2 ozon_id = identifiers.get('ozon', None) res = None if ozon_id: @@ -81,6 +78,9 @@ class Ozon(Source): def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30): # {{{ + from lxml import etree + from calibre.ebooks.chardet import xml_to_unicode + if not self.is_configured(): return query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) @@ -283,6 +283,9 @@ class Ozon(Source): # }}} def get_book_details(self, log, metadata, timeout): # {{{ + from lxml import html, etree + from calibre.ebooks.chardet import xml_to_unicode + url = self.get_book_url(metadata.get_identifiers())[2] raw = self.browser.open_novisit(url, timeout=timeout).read() diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py index 214a40c29b..430d95b31f 100644 --- a/src/calibre/ebooks/odt/input.py +++ b/src/calibre/ebooks/odt/input.py @@ -12,7 +12,6 @@ from lxml import etree from odf.odf2xhtml import ODF2XHTML from calibre import CurrentDir, walk -from calibre.customize.conversion import InputFormatPlugin class Extract(ODF2XHTML): @@ -178,16 +177,4 @@ class Extract(ODF2XHTML): return os.path.abspath('metadata.opf') -class ODTInput(InputFormatPlugin): - - name = 'ODT Input' - author = 'Kovid Goyal' - description = 'Convert ODT (OpenOffice) files to HTML' - file_types = set(['odt']) - - - def convert(self, stream, options, file_ext, log, - accelerators): - return Extract()(stream, '.', log) - diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index c8089297db..428cbe82ab 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -7,31 +7,38 @@ __docformat__ = 'restructuredtext en' class PDBError(Exception): pass +FORMAT_READERS = None -from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader -from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader -from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader -from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader -from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader +def _import_readers(): + global FORMAT_READERS + from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader + from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader + from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader + from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader + from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader -FORMAT_READERS = { - 'PNPdPPrs': ereader_reader, - 'PNRdPPrs': ereader_reader, - 'zTXTGPlm': ztxt_reader, - 'TEXtREAd': palmdoc_reader, - '.pdfADBE': pdf_reader, - 'DataPlkr': plucker_reader, -} + FORMAT_READERS = { + 'PNPdPPrs': ereader_reader, + 'PNRdPPrs': ereader_reader, + 'zTXTGPlm': ztxt_reader, + 'TEXtREAd': palmdoc_reader, + '.pdfADBE': pdf_reader, + 'DataPlkr': plucker_reader, + } -from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer -from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer -from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer +ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'} +FORMAT_WRITERS = None +def _import_writers(): + global FORMAT_WRITERS + from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer + from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer + from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer -FORMAT_WRITERS = { - 'doc': palmdoc_writer, - 'ztxt': ztxt_writer, - 'ereader': ereader_writer, -} + FORMAT_WRITERS = { + 'doc': palmdoc_writer, + 'ztxt': ztxt_writer, + 'ereader': ereader_writer, + } IDENTITY_TO_NAME = { 'PNPdPPrs': 'eReader', @@ -69,11 +76,17 @@ def get_reader(identity): ''' Returns None if no reader is found for the identity. ''' + global FORMAT_READERS + if FORMAT_READERS is None: + _import_readers() return FORMAT_READERS.get(identity, None) def get_writer(extension): ''' Returns None if no writer is found for extension. ''' + global FORMAT_WRITERS + if FORMAT_WRITERS is None: + _import_writers() return FORMAT_WRITERS.get(extension, None) diff --git a/src/calibre/ebooks/rb/reader.py b/src/calibre/ebooks/rb/reader.py index f97c3d78c5..e68cef41d3 100644 --- a/src/calibre/ebooks/rb/reader.py +++ b/src/calibre/ebooks/rb/reader.py @@ -65,7 +65,7 @@ class Reader(object): name = urlunquote(self.stream.read(32).strip('\x00')) size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32() toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags)) - + return toc def get_text(self, toc_item, output_dir): @@ -89,7 +89,7 @@ class Reader(object): output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace') with open(os.path.join(output_dir, toc_item.name), 'wb') as html: - html.write(output.encode('utf-8')) + html.write(output.replace('', '<TITLE> ').encode('utf-8')) def get_image(self, toc_item, output_dir): if toc_item.flags != 0: @@ -105,7 +105,7 @@ class Reader(object): self.log.debug('Extracting content from file...') html = [] images = [] - + for item in self.toc: if item.name.lower().endswith('html'): self.log.debug('HTML item %s found...' % item.name) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 5858824434..8e1a5ac775 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -2,42 +2,9 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' -import os, glob, re, textwrap from lxml import etree -from calibre.customize.conversion import InputFormatPlugin - -border_style_map = { - 'single' : 'solid', - 'double-thickness-border' : 'double', - 'shadowed-border': 'outset', - 'double-border': 'double', - 'dotted-border': 'dotted', - 'dashed': 'dashed', - 'hairline': 'solid', - 'inset': 'inset', - 'dash-small': 'dashed', - 'dot-dash': 'dotted', - 'dot-dot-dash': 'dotted', - 'outset': 'outset', - 'tripple': 'double', - 'triple': 'double', - 'thick-thin-small': 'solid', - 'thin-thick-small': 'solid', - 'thin-thick-thin-small': 'solid', - 'thick-thin-medium': 'solid', - 'thin-thick-medium': 'solid', - 'thin-thick-thin-medium': 'solid', - 'thick-thin-large': 'solid', - 'thin-thick-thin-large': 'solid', - 'wavy': 'ridge', - 'double-wavy': 'ridge', - 'striped': 'ridge', - 'emboss': 'inset', - 'engrave': 'inset', - 'frame': 'ridge', -} class InlineClass(etree.XSLTExtension): @@ -71,261 +38,3 @@ class InlineClass(etree.XSLTExtension): output_parent.text = ' '.join(classes) -class RTFInput(InputFormatPlugin): - - name = 'RTF Input' - author = 'Kovid Goyal' - description = 'Convert RTF files to HTML' - file_types = set(['rtf']) - - def generate_xml(self, stream): - from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf - ofile = 'dataxml.xml' - run_lev, debug_dir, indent_out = 1, None, 0 - if getattr(self.opts, 'debug_pipeline', None) is not None: - try: - os.mkdir('rtfdebug') - debug_dir = 'rtfdebug' - run_lev = 4 - indent_out = 1 - self.log('Running RTFParser in debug mode') - except: - self.log.warn('Impossible to run RTFParser in debug mode') - parser = ParseRtf( - in_file = stream, - out_file = ofile, - # Convert symbol fonts to unicode equivalents. Default - # is 1 - convert_symbol = 1, - - # Convert Zapf fonts to unicode equivalents. Default - # is 1. - convert_zapf = 1, - - # Convert Wingding fonts to unicode equivalents. - # Default is 1. - convert_wingdings = 1, - - # Convert RTF caps to real caps. - # Default is 1. - convert_caps = 1, - - # Indent resulting XML. - # Default is 0 (no indent). - indent = indent_out, - - # Form lists from RTF. Default is 1. - form_lists = 1, - - # Convert headings to sections. Default is 0. - headings_to_sections = 1, - - # Group paragraphs with the same style name. Default is 1. - group_styles = 1, - - # Group borders. Default is 1. - group_borders = 1, - - # Write or do not write paragraphs. Default is 0. - empty_paragraphs = 1, - - #debug - deb_dir = debug_dir, - run_level = run_lev, - ) - parser.parse_rtf() - with open(ofile, 'rb') as f: - return f.read() - - def extract_images(self, picts): - import imghdr - self.log('Extracting images...') - - with open(picts, 'rb') as f: - raw = f.read() - picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) - hex = re.compile(r'[^a-fA-F0-9]') - encs = [hex.sub('', pict) for pict in picts] - - count = 0 - imap = {} - for enc in encs: - if len(enc) % 2 == 1: - enc = enc[:-1] - data = enc.decode('hex') - fmt = imghdr.what(None, data) - if fmt is None: - fmt = 'wmf' - count += 1 - name = '%04d.%s' % (count, fmt) - with open(name, 'wb') as f: - f.write(data) - imap[count] = name - # with open(name+'.hex', 'wb') as f: - # f.write(enc) - return self.convert_images(imap) - - def convert_images(self, imap): - self.default_img = None - for count, val in imap.iteritems(): - try: - imap[count] = self.convert_image(val) - except: - self.log.exception('Failed to convert', val) - return imap - - def convert_image(self, name): - if not name.endswith('.wmf'): - return name - try: - return self.rasterize_wmf(name) - except: - self.log.exception('Failed to convert WMF image %r'%name) - return self.replace_wmf(name) - - def replace_wmf(self, name): - from calibre.ebooks import calibre_cover - if self.default_img is None: - self.default_img = calibre_cover('Conversion of WMF images is not supported', - 'Use Microsoft Word or OpenOffice to save this RTF file' - ' as HTML and convert that in calibre.', title_size=36, - author_size=20) - name = name.replace('.wmf', '.jpg') - with open(name, 'wb') as f: - f.write(self.default_img) - return name - - def rasterize_wmf(self, name): - from calibre.utils.wmf.parse import wmf_unwrap - with open(name, 'rb') as f: - data = f.read() - data = wmf_unwrap(data) - name = name.replace('.wmf', '.png') - with open(name, 'wb') as f: - f.write(data) - return name - - - def write_inline_css(self, ic, border_styles): - font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in - enumerate(ic.font_sizes)] - color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in - enumerate(ic.colors)] - css = textwrap.dedent(''' - span.none { - text-decoration: none; font-weight: normal; - font-style: normal; font-variant: normal - } - - span.italics { font-style: italic } - - span.bold { font-weight: bold } - - span.small-caps { font-variant: small-caps } - - span.underlined { text-decoration: underline } - - span.strike-through { text-decoration: line-through } - - ''') - css += '\n'+'\n'.join(font_size_classes) - css += '\n' +'\n'.join(color_classes) - - for cls, val in border_styles.iteritems(): - css += '\n\n.%s {\n%s\n}'%(cls, val) - - with open('styles.css', 'ab') as f: - f.write(css) - - def convert_borders(self, doc): - border_styles = [] - style_map = {} - for elem in doc.xpath(r'//*[local-name()="cell"]'): - style = ['border-style: hidden', 'border-width: 1px', - 'border-color: black'] - for x in ('bottom', 'top', 'left', 'right'): - bs = elem.get('border-cell-%s-style'%x, None) - if bs: - cbs = border_style_map.get(bs, 'solid') - style.append('border-%s-style: %s'%(x, cbs)) - bw = elem.get('border-cell-%s-line-width'%x, None) - if bw: - style.append('border-%s-width: %spt'%(x, bw)) - bc = elem.get('border-cell-%s-color'%x, None) - if bc: - style.append('border-%s-color: %s'%(x, bc)) - style = ';\n'.join(style) - if style not in border_styles: - border_styles.append(style) - idx = border_styles.index(style) - cls = 'border_style%d'%idx - style_map[cls] = style - elem.set('class', cls) - return style_map - - def convert(self, stream, options, file_ext, log, - accelerators): - from calibre.ebooks.metadata.meta import get_metadata - from calibre.ebooks.metadata.opf2 import OPFCreator - from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException - self.opts = options - self.log = log - self.log('Converting RTF to XML...') - try: - xml = self.generate_xml(stream.name) - except RtfInvalidCodeException as e: - raise ValueError(_('This RTF file has a feature calibre does not ' - 'support. Convert it to HTML first and then try it.\n%s')%e) - - d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) - if d: - imap = {} - try: - imap = self.extract_images(d[0]) - except: - self.log.exception('Failed to extract images...') - - self.log('Parsing XML...') - parser = etree.XMLParser(recover=True, no_network=True) - doc = etree.fromstring(xml, parser=parser) - border_styles = self.convert_borders(doc) - for pict in doc.xpath('//rtf:pict[@num]', - namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}): - num = int(pict.get('num')) - name = imap.get(num, None) - if name is not None: - pict.set('num', name) - - self.log('Converting XML to HTML...') - inline_class = InlineClass(self.log) - styledoc = etree.fromstring(P('templates/rtf.xsl', data=True)) - extensions = { ('calibre', 'inline-class') : inline_class } - transform = etree.XSLT(styledoc, extensions=extensions) - result = transform(doc) - html = 'index.xhtml' - with open(html, 'wb') as f: - res = transform.tostring(result) - # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] - #clean multiple \n - res = re.sub('\n+', '\n', res) - # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines - # res = re.sub('\s*<body>', '<body>', res) - # res = re.sub('(?<=\n)\n{2}', - # u'<p>\u00a0</p>\n'.encode('utf-8'), res) - f.write(res) - self.write_inline_css(inline_class, border_styles) - stream.seek(0) - mi = get_metadata(stream, 'rtf') - if not mi.title: - mi.title = _('Unknown') - if not mi.authors: - mi.authors = [_('Unknown')] - opf = OPFCreator(os.getcwd(), mi) - opf.create_manifest([('index.xhtml', None)]) - opf.create_spine(['index.xhtml']) - opf.render(open('metadata.opf', 'wb')) - return os.path.abspath('metadata.opf') - -#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug" -# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug") -# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug" diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 4cff648fa5..0880eca4ca 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -16,7 +16,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.conversion.preprocess import DocAnalysis from calibre.utils.cleantext import clean_ascii_chars -HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s\n%s\n' +HTML_TEMPLATE = u'%s \n%s\n' def clean_txt(txt): ''' @@ -28,7 +28,7 @@ def clean_txt(txt): # Strip whitespace from the end of the line. Also replace # all line breaks with \n. txt = '\n'.join([line.rstrip() for line in txt.splitlines()]) - + # Replace whitespace at the beginning of the line with   txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', ' ' * 4, txt) @@ -75,7 +75,7 @@ def convert_basic(txt, title='', epub_split_size_kb=0): ''' Converts plain text to html by putting all paragraphs in

tags. It condense and retains blank lines when necessary. - + Requires paragraphs to be in single line format. ''' txt = clean_txt(txt) @@ -215,7 +215,7 @@ def detect_paragraph_type(txt): def detect_formatting_type(txt): ''' Tries to determine the formatting of the document. - + markdown: Markdown formatting is used. textile: Textile formatting is used. heuristic: When none of the above formatting types are diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 35cc249acb..b3e128af82 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -102,6 +102,7 @@ gprefs.defaults['cb_fullscreen'] = False gprefs.defaults['worker_max_time'] = 0 gprefs.defaults['show_files_after_save'] = True gprefs.defaults['auto_add_path'] = None +gprefs.defaults['auto_add_check_for_duplicates'] = False # }}} NONE = QVariant() #: Null value to return from the data function of item models diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 7cdac3b845..972ea57cb9 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -382,7 +382,8 @@ class Adder(QObject): # {{{ if not duplicates: return self.duplicates_processed() self.pd.hide() - files = [x[0].title for x in duplicates] + files = [_('%s by %s')%(x[0].title, x[0].format_field('authors')[1]) + for x in duplicates] if question_dialog(self._parent, _('Duplicates found!'), _('Books with the same title as the following already ' 'exist in the database. Add them anyway?'), diff --git a/src/calibre/gui2/auto_add.py b/src/calibre/gui2/auto_add.py index 71d2b8ecd0..6860f386d6 100644 --- a/src/calibre/gui2/auto_add.py +++ b/src/calibre/gui2/auto_add.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, tempfile, shutil +import os, tempfile, shutil, time from threading import Thread, Event from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer) @@ -15,6 +15,7 @@ from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer) from calibre import prints from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks import BOOK_EXTENSIONS +from calibre.gui2 import question_dialog, gprefs class Worker(Thread): @@ -41,25 +42,58 @@ class Worker(Thread): traceback.print_exc() def auto_add(self): - from calibre.utils.ipc.simple_worker import fork_job + from calibre.utils.ipc.simple_worker import fork_job, WorkerError from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.meta import metadata_from_filename - files = [x for x in os.listdir(self.path) if x not in self.staging - and os.path.isfile(os.path.join(self.path, x)) and - os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and - os.path.splitext(x)[1][1:].lower() in self.be] + files = [x for x in os.listdir(self.path) if + # Must not be in the process of being added to the db + x not in self.staging + # Firefox creates 0 byte placeholder files when downloading + and os.stat(os.path.join(self.path, x)).st_size > 0 + # Must be a file + and os.path.isfile(os.path.join(self.path, x)) + # Must have read and write permissions + and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) + # Must be a known ebook file type + and os.path.splitext(x)[1][1:].lower() in self.be + ] data = {} + # Give any in progress copies time to complete + time.sleep(2) + for fname in files: f = os.path.join(self.path, fname) + + # Try opening the file for reading, if the OS prevents us, then at + # least on windows, it means the file is open in another + # application for writing. We will get notified by + # QFileSystemWatcher when writing is completed, so ignore for now. + try: + open(f, 'rb').close() + except: + continue tdir = tempfile.mkdtemp(dir=self.tdir) try: fork_job('calibre.ebooks.metadata.meta', 'forked_read_metadata', (f, tdir), no_output=True) + except WorkerError as e: + prints('Failed to read metadata from:', fname) + prints(e.orig_tb) except: import traceback traceback.print_exc() + # Ensure that the pre-metadata file size is present. If it isn't, + # write 0 so that the file is rescanned + szpath = os.path.join(tdir, 'size.txt') + try: + with open(szpath, 'rb') as f: + int(f.read()) + except: + with open(szpath, 'wb') as f: + f.write(b'0') + opfpath = os.path.join(tdir, 'metadata.opf') try: if os.stat(opfpath).st_size < 30: @@ -125,25 +159,71 @@ class AutoAdder(QObject): m = gui.library_view.model() count = 0 + needs_rescan = False + duplicates = [] + for fname, tdir in data.iteritems(): paths = [os.path.join(self.worker.path, fname)] + sz = os.path.join(tdir, 'size.txt') + try: + with open(sz, 'rb') as f: + sz = int(f.read()) + if sz != os.stat(paths[0]).st_size: + raise Exception('Looks like the file was written to after' + ' we tried to read metadata') + except: + needs_rescan = True + try: + self.worker.staging.remove(fname) + except KeyError: + pass + + continue + mi = os.path.join(tdir, 'metadata.opf') if not os.access(mi, os.R_OK): continue mi = [OPF(open(mi, 'rb'), tdir, populate_spine=False).to_book_metadata()] - m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi, - add_duplicates=True) + dups, num = m.add_books(paths, + [os.path.splitext(fname)[1][1:].upper()], mi, + add_duplicates=not gprefs['auto_add_check_for_duplicates']) + if dups: + path = dups[0][0] + with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()), + 'wb') as dest, open(path, 'rb') as src: + shutil.copyfileobj(src, dest) + dups[0][0] = dest.name + duplicates.append(dups) + + try: + os.remove(paths[0]) + self.worker.staging.remove(fname) + except: + pass + count += num + + if duplicates: + paths, formats, metadata = [], [], [] + for p, f, mis in duplicates: + paths.extend(p) + formats.extend(f) + metadata.extend(mis) + files = [_('%s by %s')%(mi.title, mi.format_field('authors')[1]) + for mi in metadata] + if question_dialog(self.parent(), _('Duplicates found!'), + _('Books with the same title as the following already ' + 'exist in the database. Add them anyway?'), + '\n'.join(files)): + dups, num = m.add_books(paths, formats, metadata, + add_duplicates=True) + count += num + + for tdir in data.itervalues(): try: - os.remove(os.path.join(self.worker.path, fname)) - try: - self.worker.staging.remove(fname) - except KeyError: - pass shutil.rmtree(tdir) except: pass - count += 1 if count > 0: m.books_added(count) @@ -153,4 +233,7 @@ class AutoAdder(QObject): if hasattr(gui, 'db_images'): gui.db_images.reset() + if needs_rescan: + QTimer.singleShot(2000, self.dir_changed) + diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 6ad1aaf0c4..628f846aea 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -38,14 +38,24 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{ ans = unicode(col.name()) return ans - f = QFontInfo(QApplication.font(widget)).pixelSize() + fi = QFontInfo(QApplication.font(widget)) + f = fi.pixelSize()+1 + fam = unicode(fi.family()).strip().replace('"', '') + if not fam: + fam = 'sans-serif' + c = color_to_string(QApplication.palette().color(QPalette.Normal, QPalette.WindowText)) templ = u'''\