0.8.38+ (kg load optimizations)

This commit is contained in:
GRiker 2012-02-06 04:03:24 -07:00
commit dbd588f94e
111 changed files with 7682 additions and 6921 deletions

View File

@ -3,10 +3,17 @@ import re
from calibre.ptempfile import PersistentTemporaryFile
class ForeignAffairsRecipe(BasicNewsRecipe):
''' there are three modifications:
1) fetch issue cover
2) toggle ignore premium articles
3) extract proper section names, ie. "Comments", "Essay"
by Chen Wei weichen302@gmx.com, 2012-02-05'''
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 1
version = 1.01
title = u'Foreign Affairs (Subcription or (free) Registration)'
publisher = u'Council on Foreign Relations'
@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
remove_javascript = True
INDEX = 'http://www.foreignaffairs.com'
FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
INCLUDE_PREMIUM = False
remove_tags = []
remove_tags.append(dict(name = 'base'))
@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
temp_files = []
articles_are_obfuscated = True
def get_cover_url(self):
soup = self.index_to_soup(self.FRONTPAGE)
div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'})
img_url = div.find('img')['src']
return self.INDEX + img_url
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
@ -50,57 +66,47 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
return self.temp_files[-1].name
def parse_index(self):
soup = self.index_to_soup('http://www.foreignaffairs.com/magazine')
articles = []
answer = []
content = soup.find('div', attrs = {'class': 'center-wrapper'})
if content:
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
tag = div.find('div', attrs = {'class': 'views-field-title'})
if tag:
a = tag.find('a')
if a:
title = self.tag_to_string(a)
url = self.INDEX + a['href']
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
# If they ever fix their markup, this will break :-(
summary = self.tag_to_string(tag.findNextSibling('p'))
description = author + '<br/>' + summary
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
else:
continue
else:
continue
answer.append(('Magazine', articles))
ul = content.find('ul')
if ul:
soup = self.index_to_soup(self.FRONTPAGE)
sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
for sec in sec_start:
content = sec.nextSibling
if content:
section = self.tag_to_string(content.find('h2'))
articles = []
for li in ul.findAll('li'):
tag = li.find('div', attrs = {'class': 'views-field-title'})
if tag:
a = tag.find('a')
if a:
title = self.tag_to_string(a)
url = self.INDEX + a['href']
description = ''
tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
if tag:
description = self.tag_to_string(tag)
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
else:
continue
tags = []
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
tags.append(div)
ul = content.find('ul')
for li in content.findAll('li'):
tags.append(li)
for div in tags:
title = url = description = author = None
if self.INCLUDE_PREMIUM:
found_premium = False
else:
continue
answer.append(('Letters to the Editor', articles))
found_premium = div.findAll('span', attrs={'class':
'premium-icon'})
if not found_premium:
tag = div.find('div', attrs={'class': 'views-field-title'})
if tag:
a = tag.find('a')
if a:
title = self.tag_to_string(a)
url = self.INDEX + a['href']
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
description = self.tag_to_string(tag_summary)
articles.append({'title':title, 'date':None, 'url':url,
'description':description, 'author':author})
if articles:
answer.append((section, articles))
return answer
def preprocess_html(self, soup):

110
recipes/ilmanifesto.recipe Normal file
View File

@ -0,0 +1,110 @@
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
class IlManifesto(BasicNewsRecipe):
title = 'Il Manifesto'
__author__ = 'Giacomo Lacava'
description = 'quotidiano comunista - ultima edizione html disponibile'
publication_type = 'newspaper'
publisher = 'il manifesto coop. editrice a r.l.'
language = 'it'
oldest_article = 2
max_articles_per_feed = 100
delay = 1
no_stylesheets = True
simultaneous_downloads = 5
timeout = 30
auto_cleanup = True
remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
remove_tags_after = dict(id='myPrintArea')
manifesto_index = None
manifesto_datestr = None
def _set_manifesto_index(self):
if self.manifesto_index == None:
startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/'
startSoup = self.index_to_soup(startUrl)
lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
del(startSoup)
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
urlsplit = lastEdition.split('/')
self.manifesto_datestr = urlsplit[-1]
if urlsplit[-1] == '':
self.manifesto_datestr = urlsplit[-2]
def get_cover_url(self):
self._set_manifesto_index()
url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
return url
def parse_index(self):
self._set_manifesto_index()
soup = self.index_to_soup(self.manifesto_index)
feedLinks = soup.find('div',id='accordion_inedicola').findAll('a')
result = []
for feed in feedLinks:
articles = []
feedName = feed.find('h2').string
feedUrl = MANIFESTO_BASEURL + feed['href']
feedSoup = self.index_to_soup(feedUrl)
indexRoot = feedSoup.find('div',attrs={'class':'column1'})
for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
artLink = div.find('a')
if artLink is None: continue # empty div
title = artLink.string
url = MANIFESTO_BASEURL + artLink['href']
description = ''
descNode = div.find('div',attrs={'class':'text_12'})
if descNode is not None:
description = descNode.string
author = ''
authNode = div.find('div',attrs={'class':'firma'})
if authNode is not None:
author = authNode.string
articleText = ''
article = {
'title':title,
'url':url,
'date': strftime('%d %B %Y'),
'description': description,
'content': articleText,
'author': author
}
articles.append(article)
result.append((feedName,articles))
return result
def extract_readable_article(self, html, url):
bs = BeautifulSoup(html)
col1 = bs.find('div',attrs={'class':'column1'})
content = col1.find('div',attrs={'class':'bodytext'})
title = bs.find(id='titolo_articolo').string
author = col1.find('span',attrs={'class':'firma'})
subtitle = ''
subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
if subNode is not None:
subtitle = subNode
summary = ''
sommNode = bs.find('div',attrs={'class':'sommario'})
if sommNode is not None:
summary = sommNode
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
del(bs)
return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)

View File

@ -1,58 +1,53 @@
#!/usr/bin/env python
##
## Title: Microwave Journal RSS recipe
## Title: Microwave Journal
## Contact: Kiavash (use Mobile Read)
##
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
## Copyright: Kiavash
##
## Written: Jan 2012
## Last Edited: Jan 2012
## Last Edited: Feb 2012
##
# Feb 2012: New Recipe compatible with the MWJournal 2.0 website
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
__copyright__ = 'Kiavash'
__author__ = 'Kaivash'
'''
Microwave Journal Monthly Magazine
You need to sign up (free) and get username/password.
microwavejournal.com
'''
import re # Import the regular expressions module.
from calibre.ptempfile import TemporaryFile # we need this for saving to a temp file
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class MWJournal(BasicNewsRecipe):
# Title to use for the ebook.
title = u'Microwave Journal'
__author__ = 'Kiavash'
language = 'en'
#A brief description for the ebook.
description = u'Microwave Journal web site ebook created using rss feeds.'
# Set publisher and publication type.
publisher = 'Horizon House'
title = u'Microwave Journal'
description = u'Microwave Journal Monthly Magazine'
publisher = 'Horizon House'
publication_type = 'magazine'
INDEX = 'http://www.microwavejournal.com/publications/'
oldest_article = 31 # monthly published magazine. Some months are 31 days!
max_articles_per_feed = 100
remove_empty_feeds = True
auto_cleanup = True
# Disable stylesheets and javascript from site.
no_stylesheets = True
remove_javascript = True
asciiize = True # Converts all none ascii characters to their ascii equivalents
needs_subscription = True # oh yeah... we need to login btw.
# Timeout for fetching files from the server in seconds. The default of 120 seconds, seems somewhat excessive.
language = 'en'
timeout = 30
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
Convert_Grayscale = False # Convert images to gray scale or not
keep_only_tags = [dict(name='div', attrs={'class':'record'})]
no_stylesheets = True
remove_javascript = True
remove_tags = [
dict(name='font', attrs={'class':'footer'}), # remove fonts
]
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
.introduction, .first { font-weight: bold; } \
.cross-head { font-weight: bold; font-size: 125%; } \
@ -72,72 +67,75 @@ class MWJournal(BasicNewsRecipe):
h3 { font-size: 125%; font-weight: bold; } \
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
remove_tags = [
dict(name='div', attrs={'class':'boxadzonearea350'}), # Removes banner ads
dict(name='font', attrs={'class':'footer'}), # remove fonts if you do like your fonts more! Comment out to use website's fonts
dict(name='div', attrs={'class':'newsarticlead'})
]
# Remove various tag attributes to improve the look of the ebook pages.
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
# Remove the line breaks as well as href links. Books don't have links generally speaking
# Remove the line breaks, href links and float left/right and picture width/height.
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
(re.compile(r'<a.*?>'), lambda h1: ''),
(re.compile(r'</a>'), lambda h2: '')
(re.compile(r'</a>'), lambda h2: ''),
(re.compile(r'float:.*?'), lambda h3: ''),
(re.compile(r'width:.*?px'), lambda h4: ''),
(re.compile(r'height:.*?px'), lambda h5: '')
]
# Select the feeds that you are interested.
feeds = [
(u'Current Issue', u'http://www.mwjournal.com/rss/Rss.asp?type=99'),
(u'Industry News', u'http://www.mwjournal.com/rss/Rss.asp?type=1'),
(u'Resources', u'http://www.mwjournal.com/rss/Rss.asp?type=3'),
(u'Buyer\'s Guide', u'http://www.mwjournal.com/rss/Rss.asp?type=5'),
(u'Events', u'http://www.mwjournal.com/rss/Rss.asp?type=2'),
(u'All Updates', u'http://www.mwjournal.com/rss/Rss.asp?type=0'),
]
# No magazine is complete without cover. Let's get it then!
# The function is adapted from the Economist recipe
def get_cover_url(self):
cover_url = None
cover_page_location = 'http://www.mwjournal.com/Journal/' # Cover image is located on this page
soup = self.index_to_soup(cover_page_location)
cover_item = soup.find('img',attrs={'src':lambda x: x and '/IssueImg/3_MWJ_CurrIss_CoverImg' in x}) # There are three files named cover, we want the highest resolution which is the 3rd image. So we look for the pattern. Remember that the name of the cover image changes every month so we cannot search for the complete name. Instead we are searching for the pattern
if cover_item:
cover_url = 'http://www.mwjournal.com' + cover_item['src'].strip() # yeah! we found it. Let's fetch the image file and pass it as cover to calibre
return cover_url
def print_version(self, url):
if url.find('/Journal/article.asp?HH_ID=') >= 0:
return self.browser.open_novisit(url).geturl().replace('/Journal/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
elif url.find('/News/article.asp?HH_ID=') >= 0:
return self.browser.open_novisit(url).geturl().replace('/News/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
elif url.find('/Resources/TechLib.asp?HH_ID=') >= 0:
return self.browser.open_novisit(url).geturl().replace('/Resources/TechLib.asp?HH_ID=', '/Resources/PrintRessource.asp?Id=')
return url.replace('/articles/', '/articles/print/')
def get_browser(self):
'''
Microwave Journal website, directs the login page to omeda.com once login info is submitted, omeda.com redirects to mwjournal.com with again the browser logs in into that site (hidden from the user). To overcome this obsticle, first login page is fetch and its output is stored to an HTML file. Then the HTML file is opened again and second login form is submitted (Many thanks to Barty which helped with second page login).
'''
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
url = ('http://www.omeda.com/cgi-win/mwjreg.cgi?m=login') # main login page.
br.open(url) # fetch the 1st login page
br.select_form('login') # finds the login form
br['EMAIL_ADDRESS'] = self.username # fills the username
br['PASSWORD'] = self.password # fills the password
raw = br.submit().read() # submit the form and read the 2nd login form
# save it to an htm temp file (from ESPN recipe written by Kovid Goyal kovid@kovidgoyal.net
with TemporaryFile(suffix='.htm') as fname:
with open(fname, 'wb') as f:
f.write(raw)
br.open_local_file(fname)
br.select_form(nr=0) # finds submit on the 2nd form
didwelogin = br.submit().read() # submit it and read the return html
if 'Welcome ' not in didwelogin: # did it login successfully? Is Username/password correct?
raise Exception('Failed to login, are you sure your username and password are correct?')
#login is done
return br
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
ts = soup.find('div', attrs={'class':'box1 article publications-show'})
ds = self.tag_to_string(ts.find('h2'))
self.log('Found Current Issue:', ds)
self.timefmt = ' [%s]'%ds
cover = ts.find('img', src=True)
if cover is not None:
self.cover_url = 'http://www.microwavejournal.com' + cover['src']
self.log('Found Cover image:', self.cover_url)
feeds = []
seen_titles = set([]) # This is used to remove duplicant articles
sections = soup.find('div', attrs={'class':'box2 publication'})
for section in sections.findAll('div', attrs={'class':'records'}):
section_title = self.tag_to_string(section.find('h3'))
self.log('Found section:', section_title)
articles = []
for post in section.findAll('div', attrs={'class':'record'}):
h = post.find('h2')
title = self.tag_to_string(h)
if title.find('The MWJ Puzzler') >=0: #Let's get rid of the useless Puzzler!
continue
if title in seen_titles:
continue
seen_titles.add(title)
a = post.find('a', href=True)
url = a['href']
if url.startswith('/'):
url = 'http://www.microwavejournal.com'+url
abstract = post.find('div', attrs={'class':'abstract'})
p = abstract.find('p')
desc = None
self.log('\tFound article:', title, 'at', url)
if p is not None:
desc = self.tag_to_string(p)
self.log('\t\t', desc)
articles.append({'title':title, 'url':url, 'description':desc,
'date':self.timefmt})
if articles:
feeds.append((section_title, articles))
return feeds
def postprocess_html(self, soup, first):
if self.Convert_Grayscale:
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -1,30 +1,36 @@
"""
readitlaterlist.com
"""
__license__ = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
'''
'''
readitlaterlist.com
2012, tBunnyMan <Wag That Tail At Me dot com>
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Readitlater(BasicNewsRecipe):
title = 'Read It Later'
__author__ = 'Darko Miletic, Przemyslaw Kryger'
description = '''Personalized news feeds. Go to readitlaterlist.com to
setup up your news. Fill in your account
username, and optionally you can add password.'''
publisher = 'readitlater.com'
title = 'ReadItLater'
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
description = '''Personalized news feeds. Go to readitlaterlist.com to setup \
up your news. This version displays pages of articles from \
oldest to newest, with max & minimum counts, and marks articles \
read after downloading.'''
publisher = 'readitlaterlist.com'
category = 'news, custom'
oldest_article = 7
max_articles_per_feed = 100
max_articles_per_feed = 50
minimum_articles = 1
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
INDEX = u'http://readitlaterlist.com'
LOGIN = INDEX + u'/l'
readList = []
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe):
br.select_form(nr=0)
br['feed_id'] = self.username
if self.password is not None:
br['password'] = self.password
br['password'] = self.password
br.submit()
return br
def get_feeds(self):
self.report_progress(0, ('Fetching list of feeds...'))
self.report_progress(0, ('Fetching list of pages...'))
lfeeds = []
i = 1
feedurl = self.INDEX + u'/unread/1'
while True:
title = u'Unread articles, page ' + str(i)
lfeeds.append((title, feedurl))
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
lfeeds.insert(0, (title, feedurl))
self.report_progress(0, ('Got ') + str(i) + (' pages'))
i += 1
soup = self.index_to_soup(feedurl)
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
if ritem is None:
break
feedurl = self.INDEX + ritem['href']
if self.test:
return lfeeds[:2]
return lfeeds
def parse_index(self):
totalfeeds = []
articlesToGrab = self.max_articles_per_feed
lfeeds = self.get_feeds()
for feedobj in lfeeds:
if articlesToGrab < 1:
break
feedtitle, feedurl = feedobj
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
ritem = soup.find('ul',attrs={'id':'list'})
for item in ritem.findAll('li'):
ritem = soup.find('ul', attrs={'id':'list'})
for item in reversed(ritem.findAll('li')):
if articlesToGrab < 1:
break
else:
articlesToGrab -= 1
description = ''
atag = item.find('a',attrs={'class':'text'})
atag = item.find('a', attrs={'class':'text'})
if atag and atag.has_key('href'):
url = self.INDEX + atag['href']
title = self.tag_to_string(item.div)
@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe):
,'url' :url
,'description':description
})
readLink = item.find('a', attrs={'class':'check'})['href']
self.readList.append(readLink)
totalfeeds.append((feedtitle, articles))
if len(self.readList) < self.minimum_articles:
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
return totalfeeds
def mark_as_read(self, markList):
br = self.get_browser()
for link in markList:
url = self.INDEX + link
response = br.open(url)
response
def cleanup(self):
self.mark_as_read(self.readList)

View File

@ -5,13 +5,14 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, glob, functools, re
from calibre import guess_type
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase)
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.html.to_zip import HTML2ZIP
plugins = []
# To archive plugins {{{
class PML2PMLZ(FileTypePlugin):
@ -86,6 +87,8 @@ class TXT2TXTZ(FileTypePlugin):
return list(set(images))
def run(self, path_to_ebook):
from calibre.ebooks.metadata.opf2 import metadata_to_opf
with open(path_to_ebook, 'rb') as ebf:
txt = ebf.read()
base_dir = os.path.dirname(path_to_ebook)
@ -117,6 +120,7 @@ class TXT2TXTZ(FileTypePlugin):
# No images so just import the TXT file.
return path_to_ebook
plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,]
# }}}
# Metadata reader plugins {{{
@ -399,6 +403,10 @@ class ZipMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata
return get_metadata(stream)
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
# }}}
# Metadata writer plugins {{{
@ -499,107 +507,51 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.extz import set_metadata
set_metadata(stream, mi)
# }}}
from calibre.ebooks.comic.input import ComicInput
from calibre.ebooks.djvu.input import DJVUInput
from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.htmlz.input import HTMLZInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.pdb.input import PDBInput
from calibre.ebooks.azw4.input import AZW4Input
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.pml.input import PMLInput
from calibre.ebooks.rb.input import RBInput
from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.tcr.input import TCRInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lrf.input import LRFInput
from calibre.ebooks.chm.input import CHMInput
from calibre.ebooks.snb.input import SNBInput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output
from calibre.ebooks.lit.output import LITOutput
from calibre.ebooks.lrf.output import LRFOutput
from calibre.ebooks.mobi.output import MOBIOutput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.pdb.output import PDBOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.output import PMLOutput
from calibre.ebooks.rb.output import RBOutput
from calibre.ebooks.rtf.output import RTFOutput
from calibre.ebooks.tcr.output import TCROutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.txt.output import TXTZOutput
from calibre.ebooks.html.output import HTMLOutput
from calibre.ebooks.htmlz.output import HTMLZOutput
from calibre.ebooks.snb.output import SNBOutput
from calibre.customize.profiles import input_profiles, output_profiles
from calibre.devices.apple.driver import ITUNES
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
from calibre.devices.cybook.driver import CYBOOK, ORIZON
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
POCKETBOOK701, POCKETBOOK360P, PI2)
from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
KINDLE_FIRE)
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
from calibre.devices.prs505.driver import PRS505
from calibre.devices.prst1.driver import PRST1
from calibre.devices.user_defined.driver import USER_DEFINED
from calibre.devices.android.driver import ANDROID, S60, WEBOS
from calibre.devices.nokia.driver import N770, N810, E71X, E52
from calibre.devices.eslick.driver import ESLICK, EBK52
from calibre.devices.nuut2.driver import NUUT2
from calibre.devices.iriver.driver import IRIVER_STORY
from calibre.devices.binatone.driver import README
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
LIBREAIR, ODYSSEY)
from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
from calibre.devices.sne.driver import SNE
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, ]
# New metadata download plugins {{{
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban
from calibre.ebooks.metadata.sources.ozon import Ozon
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')]
# }}}
# Conversion plugins {{{
from calibre.ebooks.conversion.plugins.comic_input import ComicInput
from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
from calibre.ebooks.conversion.plugins.html_input import HTMLInput
from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput
from calibre.ebooks.conversion.plugins.lit_input import LITInput
from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
from calibre.ebooks.conversion.plugins.odt_input import ODTInput
from calibre.ebooks.conversion.plugins.pdb_input import PDBInput
from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input
from calibre.ebooks.conversion.plugins.pdf_input import PDFInput
from calibre.ebooks.conversion.plugins.pml_input import PMLInput
from calibre.ebooks.conversion.plugins.rb_input import RBInput
from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput
from calibre.ebooks.conversion.plugins.rtf_input import RTFInput
from calibre.ebooks.conversion.plugins.tcr_input import TCRInput
from calibre.ebooks.conversion.plugins.txt_input import TXTInput
from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
from calibre.ebooks.conversion.plugins.chm_input import CHMInput
from calibre.ebooks.conversion.plugins.snb_input import SNBInput
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
from calibre.ebooks.conversion.plugins.lit_output import LITOutput
from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput
from calibre.ebooks.conversion.plugins.mobi_output import MOBIOutput
from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput
from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput
from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput
from calibre.ebooks.conversion.plugins.pml_output import PMLOutput
from calibre.ebooks.conversion.plugins.rb_output import RBOutput
from calibre.ebooks.conversion.plugins.rtf_output import RTFOutput
from calibre.ebooks.conversion.plugins.tcr_output import TCROutput
from calibre.ebooks.conversion.plugins.txt_output import TXTOutput, TXTZOutput
from calibre.ebooks.conversion.plugins.html_output import HTMLOutput
from calibre.ebooks.conversion.plugins.htmlz_output import HTMLZOutput
from calibre.ebooks.conversion.plugins.snb_output import SNBOutput
plugins += [
ComicInput,
DJVUInput,
@ -642,6 +594,66 @@ plugins += [
HTMLZOutput,
SNBOutput,
]
# }}}
# Catalog plugins {{{
from calibre.library.catalogs.csv_xml import CSV_XML
from calibre.library.catalogs.bibtex import BIBTEX
from calibre.library.catalogs.epub_mobi import EPUB_MOBI
plugins += [CSV_XML, BIBTEX, EPUB_MOBI]
# }}}
# EPUB Fix plugins {{{
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins += [Unmanifested, Epubcheck]
# }}}
# Profiles {{{
from calibre.customize.profiles import input_profiles, output_profiles
plugins += input_profiles + output_profiles
# }}}
# Device driver plugins {{{
from calibre.devices.apple.driver import ITUNES
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
from calibre.devices.cybook.driver import CYBOOK, ORIZON
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
POCKETBOOK701, POCKETBOOK360P, PI2)
from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
KINDLE_FIRE)
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
from calibre.devices.prs505.driver import PRS505
from calibre.devices.prst1.driver import PRST1
from calibre.devices.user_defined.driver import USER_DEFINED
from calibre.devices.android.driver import ANDROID, S60, WEBOS
from calibre.devices.nokia.driver import N770, N810, E71X, E52
from calibre.devices.eslick.driver import ESLICK, EBK52
from calibre.devices.nuut2.driver import NUUT2
from calibre.devices.iriver.driver import IRIVER_STORY
from calibre.devices.binatone.driver import README
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
LIBREAIR, ODYSSEY)
from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
from calibre.devices.sne.driver import SNE
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
# Order here matters. The first matched device is the one used.
plugins += [
HANLINV3,
@ -716,11 +728,20 @@ plugins += [
BOEYE_BDX,
USER_DEFINED,
]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')]
plugins += input_profiles + output_profiles
# }}}
# New metadata download plugins {{{
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban
from calibre.ebooks.metadata.sources.ozon import Ozon
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
# }}}
# Interface Actions {{{
@ -1623,3 +1644,34 @@ plugins += [
]
# }}}
if __name__ == '__main__':
# Test load speed
import subprocess, textwrap
try:
subprocess.check_call(['python', '-c', textwrap.dedent(
'''
from __future__ import print_function
import time, sys, init_calibre
st = time.time()
import calibre.customize.builtins
t = time.time() - st
ret = 0
for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid',
'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image',
'sqlite3', 'mechanize', 'httplib', 'xml'):
if x in sys.modules:
ret = 1
print (x, 'has been loaded by a plugin')
if ret:
print ('\\nA good way to track down what is loading something is to run'
' python -c "import init_calibre; import calibre.customize.builtins"')
print()
print ('Time taken to import all plugins: %.2f'%t)
sys.exit(ret)
''')])
except subprocess.CalledProcessError:
raise SystemExit(1)

View File

@ -5,7 +5,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from itertools import izip
from xml.sax.saxutils import escape
from calibre.customize import Plugin as _Plugin
@ -268,6 +267,7 @@ class OutputProfile(Plugin):
@classmethod
def tags_to_string(cls, tags):
from xml.sax.saxutils import escape
return escape(', '.join(tags))
class iPadOutput(OutputProfile):

View File

@ -447,11 +447,14 @@ def plugin_for_catalog_format(fmt):
# }}}
def device_plugins(): # {{{
def device_plugins(include_disabled=False): # {{{
for plugin in _initialized_plugins:
if isinstance(plugin, DevicePlugin):
if not is_disabled(plugin):
if include_disabled or not is_disabled(plugin):
if platform in plugin.supported_platforms:
if getattr(plugin, 'plugin_needs_delayed_initialization',
False):
plugin.do_delayed_plugin_initialization()
yield plugin
# }}}
@ -496,7 +499,7 @@ def initialize_plugin(plugin, path_to_zip_file):
def has_external_plugins():
return bool(config['plugins'])
def initialize_plugins():
def initialize_plugins(perf=False):
global _initialized_plugins
_initialized_plugins = []
conflicts = [name for name in config['plugins'] if name in
@ -504,6 +507,11 @@ def initialize_plugins():
for p in conflicts:
remove_plugin(p)
external_plugins = config['plugins']
ostdout, ostderr = sys.stdout, sys.stderr
if perf:
from collections import defaultdict
import time
times = defaultdict(lambda:0)
for zfp in list(external_plugins) + builtin_plugins:
try:
if not isinstance(zfp, type):
@ -516,12 +524,22 @@ def initialize_plugins():
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
except PluginNotFound:
continue
if perf:
st = time.time()
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
if perf:
times[plugin.name] = time.time() - st
_initialized_plugins.append(plugin)
except:
print 'Failed to initialize plugin:', repr(zfp)
if DEBUG:
traceback.print_exc()
# Prevent a custom plugin from overriding stdout/stderr as this breaks
# ipython
sys.stdout, sys.stderr = ostdout, ostderr
if perf:
for x in sorted(times, key=lambda x:times[x]):
print ('%50s: %.3f'%(x, times[x]))
_initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
reread_filetype_plugins()
reread_metadata_plugins()

View File

@ -221,6 +221,20 @@ class ANDROID(USBMS):
drives['main'] = letter_a
return drives
@classmethod
def configure_for_kindle_app(cls):
proxy = cls._configProxy()
proxy['format_map'] = ['mobi', 'azw', 'azw1', 'azw4', 'pdf']
proxy['use_subdirs'] = False
proxy['extra_customization'] = ','.join(['kindle']+cls.EBOOK_DIR_MAIN)
@classmethod
def configure_for_generic_epub_app(cls):
proxy = cls._configProxy()
del proxy['format_map']
del proxy['use_subdirs']
del proxy['extra_customization']
class S60(USBMS):
name = 'S60 driver'

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
Sanda library wrapper
'''
import ctypes, uuid, hashlib, os, sys
import ctypes, hashlib, os, sys
from threading import Event, Lock
from calibre.constants import iswindows
from calibre import load_library
@ -350,6 +350,7 @@ class Bambook:
return None
def SendFile(self, fileName, guid = None):
import uuid
if self.handle:
taskID = job.NewJob()
if guid:

File diff suppressed because one or more lines are too long

View File

@ -97,3 +97,13 @@ class FOLDER_DEVICE(USBMS):
@classmethod
def settings(self):
return FOLDER_DEVICE_FOR_CONFIG._config().parse()
@classmethod
def config_widget(cls):
return FOLDER_DEVICE_FOR_CONFIG.config_widget()
@classmethod
def save_settings(cls, config_widget):
return FOLDER_DEVICE_FOR_CONFIG.save_settings(config_widget)

View File

@ -9,7 +9,6 @@ Generates and writes an APNX page mapping file.
'''
import struct
import uuid
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.pdb.header import PdbHeaderReader
@ -51,6 +50,7 @@ class APNXBuilder(object):
apnxf.write(apnx)
def generate_apnx(self, pages):
import uuid
apnx = ''
content_vals = {

View File

@ -10,10 +10,8 @@ Device driver for Amazon's Kindle
import datetime, os, re, sys, json, hashlib
from calibre.devices.kindle.apnx import APNXBuilder
from calibre.devices.kindle.bookmark import Bookmark
from calibre.devices.usbms.driver import USBMS
from calibre.ebooks.metadata import MetaInformation
from calibre import strftime
'''
@ -152,6 +150,7 @@ class KINDLE(USBMS):
path_map, book_ext = resolve_bookmark_paths(storage, path_map)
bookmarked_books = {}
for id in path_map:
bookmark_ext = path_map[id].rpartition('.')[2]
myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
@ -236,6 +235,8 @@ class KINDLE(USBMS):
def add_annotation_to_library(self, db, db_id, annotation):
from calibre.ebooks.BeautifulSoup import Tag
from calibre.ebooks.metadata import MetaInformation
bm = annotation
ignore_tags = set(['Catalog', 'Clippings'])
@ -363,6 +364,8 @@ class KINDLE2(KINDLE):
'''
Hijacking this function to write the apnx file.
'''
from calibre.devices.kindle.apnx import APNXBuilder
opts = self.settings()
if not opts.extra_customization[self.OPT_APNX]:
return

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from contextlib import closing
import sqlite3 as sqlite
class Bookmark(): # {{{
'''
@ -32,7 +31,7 @@ class Bookmark(): # {{{
def get_bookmark_data(self):
''' Return the timestamp and last_read_location '''
import sqlite3 as sqlite
user_notes = {}
self.timestamp = os.path.getmtime(self.path)
with closing(sqlite.connect(self.db_path)) as connection:

View File

@ -6,7 +6,6 @@ __copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid
__docformat__ = 'restructuredtext en'
import os, time, calendar
import sqlite3 as sqlite
from contextlib import closing
from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book
@ -16,7 +15,6 @@ from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS, debug_print
from calibre import prints
from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.magick.draw import save_cover_data_to
from calibre.ptempfile import PersistentTemporaryFile
class KOBO(USBMS):
@ -230,6 +228,7 @@ class KOBO(USBMS):
traceback.print_exc()
return changed
import sqlite3 as sqlite
with closing(sqlite.connect(
self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:
@ -344,6 +343,7 @@ class KOBO(USBMS):
# 2) volume_shorcover
# 2) content
import sqlite3 as sqlite
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:
@ -739,6 +739,8 @@ class KOBO(USBMS):
# Needs to be outside books collection as in the case of removing
# the last book from the collection the list of books is empty
# and the removal of the last book would not occur
import sqlite3 as sqlite
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:
@ -850,6 +852,7 @@ class KOBO(USBMS):
debug_print('FAILED to upload cover', filepath)
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
from calibre.utils.magick.draw import save_cover_data_to
if metadata.cover:
cover = self.normalize_path(metadata.cover.replace('/', os.sep))
@ -859,6 +862,7 @@ class KOBO(USBMS):
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
ContentID = self.contentid_from_path(filepath, ContentType)
import sqlite3 as sqlite
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:

View File

@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os, time
from base64 import b64decode
from uuid import uuid4
from lxml import etree
from datetime import date
from calibre import prints, guess_type, isbytestring
@ -78,6 +76,7 @@ def strftime(epoch, zone=time.localtime):
return ' '.join(src)
def uuid():
from uuid import uuid4
return str(uuid4()).replace('-', '', 1).upper()
# }}}
@ -85,6 +84,8 @@ def uuid():
class XMLCache(object):
def __init__(self, paths, ext_paths, prefixes, use_author_sort):
from lxml import etree
if DEBUG:
debug_print('Building XMLCache...', paths)
self.paths = paths
@ -714,6 +715,8 @@ class XMLCache(object):
def write(self):
from lxml import etree
for i, path in self.paths.items():
self.move_playlists_to_bottom()
self.cleanup_whitespace(i)

View File

@ -12,8 +12,6 @@ Device driver for the SONY T1 devices
'''
import os, time, re
import sqlite3 as sqlite
from sqlite3 import DatabaseError
from contextlib import closing
from datetime import date
@ -146,6 +144,8 @@ class PRST1(USBMS):
return True
def books(self, oncard=None, end_session=True):
import sqlite3 as sqlite
dummy_bl = BookList(None, None, None)
if (
@ -246,6 +246,8 @@ class PRST1(USBMS):
debug_print('PRST1: finished sync_booklists')
def update_device_database(self, booklist, collections_attributes, oncard):
import sqlite3 as sqlite
debug_print('PRST1: starting update_device_database')
plugboard = None
@ -274,6 +276,8 @@ class PRST1(USBMS):
def update_device_books(self, connection, booklist, source_id, plugboard,
dbpath):
from sqlite3 import DatabaseError
opts = self.settings()
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
@ -489,6 +493,8 @@ class PRST1(USBMS):
debug_print('PRS-T1: finished rebuild_collections')
def upload_cover(self, path, filename, metadata, filepath):
import sqlite3 as sqlite
debug_print('PRS-T1: uploading cover')
if filepath.startswith(self._main_prefix):

View File

@ -10,7 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
for a particular device.
'''
import os, re, time, json, uuid, functools, shutil
import os, re, time, json, functools, shutil
from itertools import cycle
from calibre.constants import numeric_version
@ -58,6 +58,7 @@ class USBMS(CLI, Device):
SCAN_FROM_ROOT = False
def _update_driveinfo_record(self, dinfo, prefix, location_code, name=None):
import uuid
if not isinstance(dinfo, dict):
dinfo = {}
if dinfo.get('device_store_uuid', None) is None:

View File

@ -90,6 +90,10 @@ class USER_DEFINED(USBMS):
OPT_CARD_A_FOLDER = 9
def initialize(self):
self.plugin_needs_delayed_initialization = True
USBMS.initialize(self)
def do_delayed_plugin_initialization(self):
try:
e = self.settings().extra_customization
self.VENDOR_ID = int(e[self.OPT_USB_VENDOR_ID], 16)
@ -107,4 +111,6 @@ class USER_DEFINED(USBMS):
except:
import traceback
traceback.print_exc()
USBMS.initialize(self)
self.plugin_needs_delayed_initialization = False

View File

@ -8,7 +8,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, codecs
from chardet import detect
ENCODING_PATS = [
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
@ -34,8 +33,13 @@ def substitute_entites(raw):
_CHARSET_ALIASES = { "macintosh" : "mac-roman",
"x-sjis" : "shift-jis" }
def detect(*args, **kwargs):
from chardet import detect
return detect(*args, **kwargs)
def force_encoding(raw, verbose, assume_utf8=False):
from calibre.constants import preferred_encoding
try:
chardet = detect(raw[:1024*50])
except:

View File

@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en'
Based on ideas from comiclrf created by FangornUK.
'''
import os, shutil, traceback, textwrap, time, codecs
import os, traceback, time
from Queue import Empty
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import extract, CurrentDir, prints, walk
from calibre import extract, prints, walk
from calibre.constants import filesystem_encoding
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.ipc.server import Server
@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir):
return ans, failures
class ComicInput(InputFormatPlugin):
name = 'Comic Input'
author = 'Kovid Goyal'
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
file_types = set(['cbz', 'cbr', 'cbc'])
is_image_collection = True
core_usage = -1
options = set([
OptionRecommendation(name='colors', recommended_value=256,
help=_('Number of colors for grayscale image conversion. Default: '
'%default. Values of less than 256 may result in blurred text '
'on your device if you are creating your comics in EPUB format.')),
OptionRecommendation(name='dont_normalize', recommended_value=False,
help=_('Disable normalize (improve contrast) color range '
'for pictures. Default: False')),
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
OptionRecommendation(name='dont_sharpen', recommended_value=False,
help=_('Disable sharpening.')),
OptionRecommendation(name='disable_trim', recommended_value=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.')),
OptionRecommendation(name='landscape', recommended_value=False,
help=_("Don't split landscape images into two portrait images")),
OptionRecommendation(name='wide', recommended_value=False,
help=_("Keep aspect ratio and scale image using screen height as "
"image width for viewing in landscape mode.")),
OptionRecommendation(name='right2left', recommended_value=False,
help=_('Used for right-to-left publications like manga. '
'Causes landscape pages to be split into portrait pages '
'from right to left.')),
OptionRecommendation(name='despeckle', recommended_value=False,
help=_('Enable Despeckle. Reduces speckle noise. '
'May greatly increase processing time.')),
OptionRecommendation(name='no_sort', recommended_value=False,
help=_("Don't sort the files found in the comic "
"alphabetically by name. Instead use the order they were "
"added to the comic.")),
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
recommended_value='png', help=_('The format that images in the created ebook '
'are converted to. You can experiment to see which format gives '
'you optimal size and look on your device.')),
OptionRecommendation(name='no_process', recommended_value=False,
help=_("Apply no processing to the image")),
OptionRecommendation(name='dont_grayscale', recommended_value=False,
help=_('Do not convert the image to grayscale (black and white)')),
OptionRecommendation(name='comic_image_size', recommended_value=None,
help=_('Specify the image size as widthxheight pixels. Normally,'
' an image size is automatically calculated from the output '
'profile, this option overrides it.')),
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
help=_('When converting a CBC do not add links to each page to'
' the TOC. Note this only applies if the TOC has more than one'
' section')),
])
recommendations = set([
('margin_left', 0, OptionRecommendation.HIGH),
('margin_top', 0, OptionRecommendation.HIGH),
('margin_right', 0, OptionRecommendation.HIGH),
('margin_bottom', 0, OptionRecommendation.HIGH),
('insert_blank_line', False, OptionRecommendation.HIGH),
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
('change_justification', 'left', OptionRecommendation.HIGH),
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
('chapter', None, OptionRecommendation.HIGH),
('page_breaks_brefore', None, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
('page_breaks_before', None, OptionRecommendation.HIGH),
('disable_font_rescaling', True, OptionRecommendation.HIGH),
('linearize_tables', False, OptionRecommendation.HIGH),
])
def get_comics_from_collection(self, stream):
from calibre.libunzip import extract as zipextract
tdir = PersistentTemporaryDirectory('_comic_collection')
zipextract(stream, tdir)
comics = []
with CurrentDir(tdir):
if not os.path.exists('comics.txt'):
raise ValueError((
'%s is not a valid comic collection'
' no comics.txt was found in the file')
%stream.name)
raw = open('comics.txt', 'rb').read()
if raw.startswith(codecs.BOM_UTF16_BE):
raw = raw.decode('utf-16-be')[1:]
elif raw.startswith(codecs.BOM_UTF16_LE):
raw = raw.decode('utf-16-le')[1:]
elif raw.startswith(codecs.BOM_UTF8):
raw = raw.decode('utf-8')[1:]
else:
raw = raw.decode('utf-8')
for line in raw.splitlines():
line = line.strip()
if not line:
continue
fname, title = line.partition(':')[0], line.partition(':')[-1]
fname = fname.replace('#', '_')
fname = os.path.join(tdir, *fname.split('/'))
if not title:
title = os.path.basename(fname).rpartition('.')[0]
if os.access(fname, os.R_OK):
comics.append([title, fname])
if not comics:
raise ValueError('%s has no comics'%stream.name)
return comics
def get_pages(self, comic, tdir2):
tdir = extract_comic(comic)
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
verbose=self.opts.verbose)
thumbnail = None
if not new_pages:
raise ValueError('Could not find any pages in the comic: %s'
%comic)
if self.opts.no_process:
n2 = []
for page in new_pages:
n2.append(os.path.join(tdir2, os.path.basename(page)))
shutil.copyfile(page, n2[-1])
new_pages = n2
else:
new_pages, failures = process_pages(new_pages, self.opts,
self.report_progress, tdir2)
if failures:
self.log.warning('Could not process the following pages '
'(run with --verbose to see why):')
for f in failures:
self.log.warning('\t', f)
if not new_pages:
raise ValueError('Could not find any valid pages in comic: %s'
% comic)
thumbnail = os.path.join(tdir2,
'thumbnail.'+self.opts.output_format.lower())
if not os.access(thumbnail, os.R_OK):
thumbnail = None
return new_pages
def get_images(self):
return self._images
def convert(self, stream, opts, file_ext, log, accelerators):
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
self.opts, self.log= opts, log
if file_ext == 'cbc':
comics_ = self.get_comics_from_collection(stream)
else:
comics_ = [['Comic', os.path.abspath(stream.name)]]
stream.close()
comics = []
for i, x in enumerate(comics_):
title, fname = x
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
cdir = os.path.abspath(cdir)
if not os.path.exists(cdir):
os.makedirs(cdir)
pages = self.get_pages(fname, cdir)
if not pages: continue
wrappers = self.create_wrappers(pages)
comics.append((title, pages, wrappers))
if not comics:
raise ValueError('No comic pages found in %s'%stream.name)
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
[_('Unknown')])
opf = OPFCreator(os.path.abspath('.'), mi)
entries = []
def href(x):
if len(comics) == 1: return os.path.basename(x)
return '/'.join(x.split(os.sep)[-2:])
for comic in comics:
pages, wrappers = comic[1:]
entries += [(w, None) for w in map(href, wrappers)] + \
[(x, None) for x in map(href, pages)]
opf.create_manifest(entries)
spine = []
for comic in comics:
spine.extend(map(href, comic[2]))
self._images = []
for comic in comics:
self._images.extend(comic[1])
opf.create_spine(spine)
toc = TOC()
if len(comics) == 1:
wrappers = comics[0][2]
for i, x in enumerate(wrappers):
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
play_order=i)
else:
po = 0
for comic in comics:
po += 1
wrappers = comic[2]
stoc = toc.add_item(href(wrappers[0]),
None, comic[0], play_order=po)
if not opts.dont_add_comic_pages_to_toc:
for i, x in enumerate(wrappers):
stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po)
po += 1
opf.set_toc(toc)
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, 'toc.ncx')
return os.path.abspath('metadata.opf')
def create_wrappers(self, pages):
from calibre.ebooks.oeb.base import XHTML_NS
wrappers = []
WRAPPER = textwrap.dedent('''\
<html xmlns="%s">
<head>
<title>Page #%d</title>
<style type="text/css">
@page { margin:0pt; padding: 0pt}
body { margin: 0pt; padding: 0pt}
div { text-align: center }
</style>
</head>
<body>
<div>
<img src="%s" alt="comic page #%d" />
</div>
</body>
</html>
''')
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
return wrappers

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.azw4.reader import Reader
class AZW4Input(InputFormatPlugin):
@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.azw4.reader import Reader
header = PdbHeaderReader(stream)
reader = Reader(header, stream, log, options)
opf = reader.extract_content(os.getcwd())

View File

@ -3,9 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, uuid
from lxml import html
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin):
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
# use HTMLInput plugin to generate book
from calibre.ebooks.html.input import HTMLInput
from calibre.customize.builtins import HTMLInput
opts.breadth_first = True
htmlinput = HTMLInput(None)
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin):
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
import uuid
from lxml import html
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer
oeb = create_oebbook(log, None, opts,
@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin):
return oeb
def _create_html_root(self, hhcpath, log):
from lxml import html
hhcdata = self._read_file(hhcpath)
hhcroot = html.fromstring(hhcdata)
chapters = self._process_nodes(hhcroot)

View File

@ -0,0 +1,259 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Based on ideas from comiclrf created by FangornUK.
'''
import shutil, textwrap, codecs, os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
class ComicInput(InputFormatPlugin):
name = 'Comic Input'
author = 'Kovid Goyal'
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
file_types = set(['cbz', 'cbr', 'cbc'])
is_image_collection = True
core_usage = -1
options = set([
OptionRecommendation(name='colors', recommended_value=256,
help=_('Number of colors for grayscale image conversion. Default: '
'%default. Values of less than 256 may result in blurred text '
'on your device if you are creating your comics in EPUB format.')),
OptionRecommendation(name='dont_normalize', recommended_value=False,
help=_('Disable normalize (improve contrast) color range '
'for pictures. Default: False')),
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
OptionRecommendation(name='dont_sharpen', recommended_value=False,
help=_('Disable sharpening.')),
OptionRecommendation(name='disable_trim', recommended_value=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.')),
OptionRecommendation(name='landscape', recommended_value=False,
help=_("Don't split landscape images into two portrait images")),
OptionRecommendation(name='wide', recommended_value=False,
help=_("Keep aspect ratio and scale image using screen height as "
"image width for viewing in landscape mode.")),
OptionRecommendation(name='right2left', recommended_value=False,
help=_('Used for right-to-left publications like manga. '
'Causes landscape pages to be split into portrait pages '
'from right to left.')),
OptionRecommendation(name='despeckle', recommended_value=False,
help=_('Enable Despeckle. Reduces speckle noise. '
'May greatly increase processing time.')),
OptionRecommendation(name='no_sort', recommended_value=False,
help=_("Don't sort the files found in the comic "
"alphabetically by name. Instead use the order they were "
"added to the comic.")),
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
recommended_value='png', help=_('The format that images in the created ebook '
'are converted to. You can experiment to see which format gives '
'you optimal size and look on your device.')),
OptionRecommendation(name='no_process', recommended_value=False,
help=_("Apply no processing to the image")),
OptionRecommendation(name='dont_grayscale', recommended_value=False,
help=_('Do not convert the image to grayscale (black and white)')),
OptionRecommendation(name='comic_image_size', recommended_value=None,
help=_('Specify the image size as widthxheight pixels. Normally,'
' an image size is automatically calculated from the output '
'profile, this option overrides it.')),
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
help=_('When converting a CBC do not add links to each page to'
' the TOC. Note this only applies if the TOC has more than one'
' section')),
])
recommendations = set([
('margin_left', 0, OptionRecommendation.HIGH),
('margin_top', 0, OptionRecommendation.HIGH),
('margin_right', 0, OptionRecommendation.HIGH),
('margin_bottom', 0, OptionRecommendation.HIGH),
('insert_blank_line', False, OptionRecommendation.HIGH),
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
('change_justification', 'left', OptionRecommendation.HIGH),
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
('chapter', None, OptionRecommendation.HIGH),
('page_breaks_brefore', None, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
('page_breaks_before', None, OptionRecommendation.HIGH),
('disable_font_rescaling', True, OptionRecommendation.HIGH),
('linearize_tables', False, OptionRecommendation.HIGH),
])
def get_comics_from_collection(self, stream):
from calibre.libunzip import extract as zipextract
tdir = PersistentTemporaryDirectory('_comic_collection')
zipextract(stream, tdir)
comics = []
with CurrentDir(tdir):
if not os.path.exists('comics.txt'):
raise ValueError((
'%s is not a valid comic collection'
' no comics.txt was found in the file')
%stream.name)
raw = open('comics.txt', 'rb').read()
if raw.startswith(codecs.BOM_UTF16_BE):
raw = raw.decode('utf-16-be')[1:]
elif raw.startswith(codecs.BOM_UTF16_LE):
raw = raw.decode('utf-16-le')[1:]
elif raw.startswith(codecs.BOM_UTF8):
raw = raw.decode('utf-8')[1:]
else:
raw = raw.decode('utf-8')
for line in raw.splitlines():
line = line.strip()
if not line:
continue
fname, title = line.partition(':')[0], line.partition(':')[-1]
fname = fname.replace('#', '_')
fname = os.path.join(tdir, *fname.split('/'))
if not title:
title = os.path.basename(fname).rpartition('.')[0]
if os.access(fname, os.R_OK):
comics.append([title, fname])
if not comics:
raise ValueError('%s has no comics'%stream.name)
return comics
def get_pages(self, comic, tdir2):
from calibre.ebooks.comic.input import (extract_comic, process_pages,
find_pages)
tdir = extract_comic(comic)
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
verbose=self.opts.verbose)
thumbnail = None
if not new_pages:
raise ValueError('Could not find any pages in the comic: %s'
%comic)
if self.opts.no_process:
n2 = []
for page in new_pages:
n2.append(os.path.join(tdir2, os.path.basename(page)))
shutil.copyfile(page, n2[-1])
new_pages = n2
else:
new_pages, failures = process_pages(new_pages, self.opts,
self.report_progress, tdir2)
if failures:
self.log.warning('Could not process the following pages '
'(run with --verbose to see why):')
for f in failures:
self.log.warning('\t', f)
if not new_pages:
raise ValueError('Could not find any valid pages in comic: %s'
% comic)
thumbnail = os.path.join(tdir2,
'thumbnail.'+self.opts.output_format.lower())
if not os.access(thumbnail, os.R_OK):
thumbnail = None
return new_pages
def get_images(self):
return self._images
def convert(self, stream, opts, file_ext, log, accelerators):
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
self.opts, self.log= opts, log
if file_ext == 'cbc':
comics_ = self.get_comics_from_collection(stream)
else:
comics_ = [['Comic', os.path.abspath(stream.name)]]
stream.close()
comics = []
for i, x in enumerate(comics_):
title, fname = x
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
cdir = os.path.abspath(cdir)
if not os.path.exists(cdir):
os.makedirs(cdir)
pages = self.get_pages(fname, cdir)
if not pages: continue
wrappers = self.create_wrappers(pages)
comics.append((title, pages, wrappers))
if not comics:
raise ValueError('No comic pages found in %s'%stream.name)
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
[_('Unknown')])
opf = OPFCreator(os.path.abspath('.'), mi)
entries = []
def href(x):
if len(comics) == 1: return os.path.basename(x)
return '/'.join(x.split(os.sep)[-2:])
for comic in comics:
pages, wrappers = comic[1:]
entries += [(w, None) for w in map(href, wrappers)] + \
[(x, None) for x in map(href, pages)]
opf.create_manifest(entries)
spine = []
for comic in comics:
spine.extend(map(href, comic[2]))
self._images = []
for comic in comics:
self._images.extend(comic[1])
opf.create_spine(spine)
toc = TOC()
if len(comics) == 1:
wrappers = comics[0][2]
for i, x in enumerate(wrappers):
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
play_order=i)
else:
po = 0
for comic in comics:
po += 1
wrappers = comic[2]
stoc = toc.add_item(href(wrappers[0]),
None, comic[0], play_order=po)
if not opts.dont_add_comic_pages_to_toc:
for i, x in enumerate(wrappers):
stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po)
po += 1
opf.set_toc(toc)
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, 'toc.ncx')
return os.path.abspath('metadata.opf')
def create_wrappers(self, pages):
from calibre.ebooks.oeb.base import XHTML_NS
wrappers = []
WRAPPER = textwrap.dedent('''\
<html xmlns="%s">
<head>
<title>Page #%d</title>
<style type="text/css">
@page { margin:0pt; padding: 0pt}
body { margin: 0pt; padding: 0pt}
div { text-align: center }
</style>
</head>
<body>
<div>
<img src="%s" alt="comic page #%d" />
</div>
</body>
</html>
''')
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
return wrappers

View File

@ -12,7 +12,6 @@ from subprocess import Popen, PIPE
from cStringIO import StringIO
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.txt.processor import convert_basic
class DJVUInput(InputFormatPlugin):
@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin):
])
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.txt.processor import convert_basic
stdout = StringIO()
ppdjvu = True
# using djvutxt is MUCH faster, should make it an option

View File

@ -3,11 +3,9 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, uuid
import os
from itertools import cycle
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
class EPUBInput(InputFormatPlugin):
@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin):
f.write(raw[1024:])
def process_encryption(self, encfile, opf, log):
from lxml import etree
import uuid
key = None
for item in opf.identifier_iter():
scheme = None
@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
return False
def rationalize_cover(self, opf, log):
from lxml import etree
guide_cover, guide_elem = None, None
for guide_elem in opf.iterguide():
if guide_elem.get('type', '').lower() == 'cover':
@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
renderer)
def find_opf(self):
from lxml import etree
def attr(n, attr):
for k, v in n.attrib.items():
if k.endswith(attr):

View File

@ -8,14 +8,12 @@ __docformat__ = 'restructuredtext en'
import os, shutil, re
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from calibre.customize.conversion import OptionRecommendation
from calibre.constants import filesystem_encoding
from lxml import etree
block_level_tags = (
'address',
'body',
@ -289,6 +287,7 @@ class EPUBOutput(OutputFormatPlugin):
# }}}
def condense_ncx(self, ncx_path):
from lxml import etree
if not self.opts.pretty_print:
tree = etree.parse(ncx_path)
for tag in tree.getroot().iter(tag=etree.Element):

View File

@ -6,7 +6,6 @@ Convert .fb2 files to .lrf
"""
import os, re
from base64 import b64decode
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type
@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from lxml import etree
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre.ebooks.fb2.fb2ml import FB2MLizer
class FB2Output(OutputFormatPlugin):
@ -162,6 +161,7 @@ class FB2Output(OutputFormatPlugin):
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
from calibre.ebooks.fb2.fb2ml import FB2MLizer
try:
rasterizer = SVGRasterizer()

View File

@ -0,0 +1,283 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, tempfile, os
from functools import partial
from itertools import izip
from urllib import quote
from calibre.constants import islinux, isbsd
from calibre.customize.conversion import (InputFormatPlugin,
OptionRecommendation)
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
class HTMLInput(InputFormatPlugin):
name = 'HTML Input'
author = 'Kovid Goyal'
description = 'Convert HTML and OPF files to an OEB'
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
options = set([
OptionRecommendation(name='breadth_first',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Traverse links in HTML files breadth first. Normally, '
'they are traversed depth first.'
)
),
OptionRecommendation(name='max_levels',
recommended_value=5, level=OptionRecommendation.LOW,
help=_('Maximum levels of recursion when following links in '
'HTML files. Must be non-negative. 0 implies that no '
'links in the root HTML file are followed. Default is '
'%default.'
)
),
OptionRecommendation(name='dont_package',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally this input plugin re-arranges all the input '
'files into a standard folder hierarchy. Only use this option '
'if you know what you are doing as it can result in various '
'nasty side effects in the rest of the conversion pipeline.'
)
),
])
def convert(self, stream, opts, file_ext, log,
accelerators):
self._is_case_sensitive = None
basedir = os.getcwd()
self.opts = opts
fname = None
if hasattr(stream, 'name'):
basedir = os.path.dirname(stream.name)
fname = os.path.basename(stream.name)
if file_ext != 'opf':
if opts.dont_package:
raise ValueError('The --dont-package option is not supported for an HTML input file')
from calibre.ebooks.metadata.html import get_metadata
mi = get_metadata(stream)
if fname:
from calibre.ebooks.metadata.meta import metadata_from_filename
fmi = metadata_from_filename(fname)
fmi.smart_update(mi)
mi = fmi
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
return oeb
from calibre.ebooks.conversion.plumber import create_oebbook
return create_oebbook(log, stream.name, opts,
encoding=opts.input_encoding)
def is_case_sensitive(self, path):
if getattr(self, '_is_case_sensitive', None) is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isbsd
self._is_case_sensitive = not (os.path.exists(path.lower()) \
and os.path.exists(path.upper()))
return self._is_case_sensitive
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
import uuid
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import (DirContainer,
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
xpath)
from calibre import guess_type
from calibre.ebooks.oeb.transforms.metadata import \
meta_info_to_oeb_metadata
from calibre.ebooks.html.input import get_filelist
import cssutils, logging
cssutils.log.setLevel(logging.WARN)
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False)
self.oeb = oeb
metadata = oeb.metadata
meta_info_to_oeb_metadata(mi, metadata, log)
if not metadata.language:
oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang().replace('_', '-'))
if not metadata.creator:
oeb.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown')))
if not metadata.title:
oeb.logger.warn('Title not specified')
metadata.add('title', self.oeb.translate(__('Unknown')))
bookid = str(uuid.uuid4())
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in metadata.identifier:
if 'id' in ident.attrib:
self.oeb.uid = metadata.identifier[0]
break
filelist = get_filelist(htmlpath, basedir, opts, log)
filelist = [f for f in filelist if not f.is_binary]
htmlfile_map = {}
for f in filelist:
path = f.path
oeb.container = DirContainer(os.path.dirname(path), log,
ignore_opf=True)
bname = os.path.basename(path)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(bname))
htmlfile_map[path] = href
item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = bname
oeb.spine.add(item, True)
self.added_resources = {}
self.log = log
self.log('Normalizing filename cases')
for path, href in htmlfile_map.items():
if not self.is_case_sensitive(path):
path = path.lower()
self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.log('Rewriting HTML links')
for f in filelist:
path = f.path
dpath = os.path.dirname(path)
oeb.container = DirContainer(dpath, log, ignore_opf=True)
item = oeb.manifest.hrefs[htmlfile_map[path]]
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values():
if item.media_type in self.OEB_STYLES:
dpath = None
for path, href in self.added_resources.items():
if href == item.href:
dpath = os.path.dirname(path)
break
cssutils.replaceUrls(item.data,
partial(self.resource_adder, base=dpath))
toc = self.oeb.toc
self.oeb.auto_generated_toc = True
titles = []
headers = []
for item in self.oeb.spine:
if not item.linear: continue
html = item.data
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
title = re.sub(r'\s+', ' ', title.strip())
if title:
titles.append(title)
headers.append('(unlabled)')
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
expr = '/h:html/h:body//h:%s[position()=1]/text()'
header = ''.join(xpath(html, expr % tag))
header = re.sub(r'\s+', ' ', header.strip())
if header:
headers[-1] = header
break
use = titles
if len(titles) > len(set(titles)):
use = headers
for title, item in izip(use, self.oeb.spine):
if not item.linear: continue
toc.add(title, item.href)
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
return oeb
def link_to_local_path(self, link_, base=None):
from calibre.ebooks.html.input import Link
if not isinstance(link_, unicode):
try:
link_ = link_.decode('utf-8', 'error')
except:
self.log.warn('Failed to decode link %r. Ignoring'%link_)
return None, None
try:
l = Link(link_, base if base else os.getcwdu())
except:
self.log.exception('Failed to process link: %r'%link_)
return None, None
if l.path is None:
# Not a local resource
return None, None
link = l.path.replace('/', os.sep).strip()
frag = l.fragment
if not link:
return None, None
return link, frag
def resource_adder(self, link_, base=None):
link, frag = self.link_to_local_path(link_, base=base)
if link is None:
return link_
try:
if base and not os.path.isabs(link):
link = os.path.join(base, link)
link = os.path.abspath(link)
except:
return link_
if not os.access(link, os.R_OK):
return link_
if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_
if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower()
if link not in self.added_resources:
bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added',
href=bhref)
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME
if media_type == 'text/plain':
self.log.warn('Ignoring link to text file %r'%link_)
return None
self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log, ignore_opf=True)
# Load into memory
item = self.oeb.manifest.add(id, href, media_type)
# bhref refers to an already existing file. The read() method of
# DirContainer will call unquote on it before trying to read the
# file, therefore we quote it here.
if isinstance(bhref, unicode):
bhref = bhref.encode('utf-8')
item.html_input_href = quote(bhref).decode('utf-8')
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
item.data
self.added_resources[link] = href
nlink = self.added_resources[link]
if frag:
nlink = '#'.join((nlink, frag))
return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return (None, None)
try:
raw = open(link, 'rb').read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return (None, None)
return (None, raw)

View File

@ -4,22 +4,11 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
__docformat__ = 'restructuredtext en'
import os, re, shutil
from calibre.utils import zipfile
from os.path import dirname, abspath, relpath, exists, basename
from lxml import etree
from templite import Templite
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.zipfile import ZipFile
from urllib import unquote
from calibre.ebooks.html.meta import EasyMeta
class HTMLOutput(OutputFormatPlugin):
@ -50,6 +39,9 @@ class HTMLOutput(OutputFormatPlugin):
'''
Generate table of contents
'''
from lxml import etree
from urllib import unquote
from calibre.ebooks.oeb.base import element
with CurrentDir(output_dir):
def build_node(current_node, parent=None):
@ -72,11 +64,18 @@ class HTMLOutput(OutputFormatPlugin):
return wrap
def generate_html_toc(self, oeb_book, ref_url, output_dir):
from lxml import etree
root = self.generate_toc(oeb_book, ref_url, output_dir)
return etree.tostring(root, pretty_print=True, encoding='utf-8',
xml_declaration=False)
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from lxml import etree
from calibre.utils import zipfile
from templite import Templite
from urllib import unquote
from calibre.ebooks.html.meta import EasyMeta
# read template files
if opts.template_html_index is not None:
@ -192,7 +191,7 @@ class HTMLOutput(OutputFormatPlugin):
f.write(t)
item.unload_data_from_memory(memory=path)
zfile = ZipFile(output_path, "w")
zfile = zipfile.ZipFile(output_path, "w")
zfile.add_dir(output_dir, basename(output_dir))
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)

View File

@ -10,9 +10,6 @@ import os
from calibre import guess_type
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.opf2 import OPF
from calibre.utils.zipfile import ZipFile
class HTMLZInput(InputFormatPlugin):
@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.opf2 import OPF
from calibre.utils.zipfile import ZipFile
self.log = log
html = u''
top_levels = []

View File

@ -9,13 +9,10 @@ __docformat__ = 'restructuredtext en'
import os
from cStringIO import StringIO
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
class HTMLZOutput(OutputFormatPlugin):
@ -43,7 +40,10 @@ class HTMLZOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from lxml import etree
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.utils.zipfile import ZipFile
# HTML
if opts.htmlz_css_type == 'inline':
@ -81,7 +81,7 @@ class HTMLZOutput(OutputFormatPlugin):
fname = os.path.join(tdir, 'images', images[item.href])
with open(fname, 'wb') as img:
img.write(data)
# Cover
cover_path = None
try:

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, sys
from calibre.customize.conversion import InputFormatPlugin
class LRFInput(InputFormatPlugin):
name = 'LRF Input'
author = 'Kovid Goyal'
description = 'Convert LRF files to HTML'
file_types = set(['lrf'])
def convert(self, stream, options, file_ext, log,
accelerators):
from lxml import etree
from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
Canvas, ImageBlock, RuledLine)
self.log = log
self.log('Generating XML')
from calibre.ebooks.lrf.lrfparser import LRFDocument
d = LRFDocument(stream)
d.parse()
xml = d.to_xml(write_files=True)
if options.verbose > 2:
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
parser = etree.XMLParser(no_network=True, huge_tree=True)
try:
doc = etree.fromstring(xml, parser=parser)
except:
self.log.warn('Failed to parse XML. Trying to recover')
parser = etree.XMLParser(no_network=True, huge_tree=True,
recover=True)
doc = etree.fromstring(xml, parser=parser)
char_button_map = {}
for x in doc.xpath('//CharButton[@refobj]'):
ro = x.get('refobj')
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
if jump_button:
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
if jump_to:
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
jump_to[0].get('refobj'))
plot_map = {}
for x in doc.xpath('//Plot[@refobj]'):
ro = x.get('refobj')
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
if image:
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
image[0].get('refstream'))
if imgstr:
plot_map[ro] = imgstr[0].get('file')
self.log('Converting XML to HTML...')
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
media_type = MediaType()
styles = Styles()
text_block = TextBlock(styles, char_button_map, plot_map, log)
canvas = Canvas(doc, styles, text_block, log)
image_block = ImageBlock(canvas)
ruled_line = RuledLine()
extensions = {
('calibre', 'media-type') : media_type,
('calibre', 'text-block') : text_block,
('calibre', 'ruled-line') : ruled_line,
('calibre', 'styles') : styles,
('calibre', 'canvas') : canvas,
('calibre', 'image-block'): image_block,
}
transform = etree.XSLT(styledoc, extensions=extensions)
try:
result = transform(doc)
except RuntimeError:
sys.setrecursionlimit(5000)
result = transform(doc)
with open('content.opf', 'wb') as f:
f.write(result)
styles.write()
return os.path.abspath('content.opf')

View File

@ -0,0 +1,25 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert an ODT file into a Open Ebook
'''
from calibre.customize.conversion import InputFormatPlugin
class ODTInput(InputFormatPlugin):
name = 'ODT Input'
author = 'Kovid Goyal'
description = 'Convert ODT (OpenOffice) files to HTML'
file_types = set(['odt'])
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.odt.input import Extract
return Extract()(stream, '.', log)

View File

@ -5,13 +5,10 @@ __docformat__ = 'restructuredtext en'
import os, re
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from calibre import CurrentDir
from calibre.customize.conversion import OptionRecommendation
from urllib import unquote
class OEBOutput(OutputFormatPlugin):
@ -23,6 +20,9 @@ class OEBOutput(OutputFormatPlugin):
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from urllib import unquote
from lxml import etree
self.log, self.opts = log, opts
if not os.path.exists(output_path):
os.makedirs(output_path)

View File

@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
class PDBInput(InputFormatPlugin):
@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
header = PdbHeaderReader(stream)
Reader = get_reader(header.ident)

View File

@ -8,7 +8,7 @@ import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.pdb import PDBError, get_writer, FORMAT_WRITERS
from calibre.ebooks.pdb import PDBError, get_writer, ALL_FORMAT_WRITERS
class PDBOutput(OutputFormatPlugin):
@ -19,9 +19,9 @@ class PDBOutput(OutputFormatPlugin):
options = set([
OptionRecommendation(name='format', recommended_value='doc',
level=OptionRecommendation.LOW,
short_switch='f', choices=FORMAT_WRITERS.keys(),
short_switch='f', choices=list(ALL_FORMAT_WRITERS),
help=(_('Format to use inside the pdb container. Choices are:')+\
' %s' % FORMAT_WRITERS.keys())),
' %s' % list(ALL_FORMAT_WRITERS))),
OptionRecommendation(name='pdb_output_encoding', recommended_value='cp1252',
level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the output document. ' \

View File

@ -7,10 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.pdf.pdftohtml import pdftohtml
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.constants import plugins
pdfreflow, pdfreflow_err = plugins['pdfreflow']
class PDFInput(InputFormatPlugin):
@ -31,6 +27,9 @@ class PDFInput(InputFormatPlugin):
])
def convert_new(self, stream, accelerators):
from calibre.constants import plugins
pdfreflow, pdfreflow_err = plugins['pdfreflow']
from calibre.ebooks.pdf.reflow import PDFDocument
from calibre.utils.cleantext import clean_ascii_chars
if pdfreflow_err:
@ -43,6 +42,9 @@ class PDFInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.pdf.pdftohtml import pdftohtml
log.debug('Converting file to html...')
# The main html file will be named index.html
self.opts, self.log = options, log

View File

@ -13,10 +13,50 @@ import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \
ORIENTATIONS
UNITS = [
'millimeter',
'point',
'inch' ,
'pica' ,
'didot',
'cicero',
'devicepixel',
]
PAPER_SIZES = ['b2',
'a9',
'executive',
'tabloid',
'b4',
'b5',
'b6',
'b7',
'b0',
'b1',
'letter',
'b3',
'a7',
'a8',
'b8',
'b9',
'a3',
'a1',
'folio',
'c5e',
'dle',
'a0',
'ledger',
'legal',
'a6',
'a2',
'b10',
'a5',
'comm10e',
'a4']
ORIENTATIONS = ['portrait', 'landscape']
class PDFOutput(OutputFormatPlugin):
@ -26,23 +66,23 @@ class PDFOutput(OutputFormatPlugin):
options = set([
OptionRecommendation(name='unit', recommended_value='inch',
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS,
help=_('The unit of measure. Default is inch. Choices '
'are %s '
'Note: This does not override the unit for margins!') % UNITS.keys()),
'Note: This does not override the unit for margins!') % UNITS),
OptionRecommendation(name='paper_size', recommended_value='letter',
level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(),
level=OptionRecommendation.LOW, choices=PAPER_SIZES,
help=_('The size of the paper. This size will be overridden when a '
'non default output profile is used. Default is letter. Choices '
'are %s') % PAPER_SIZES.keys()),
'are %s') % PAPER_SIZES),
OptionRecommendation(name='custom_size', recommended_value=None,
help=_('Custom size of the document. Use the form widthxheight '
'EG. `123x321` to specify the width and height. '
'This overrides any specified paper-size.')),
OptionRecommendation(name='orientation', recommended_value='portrait',
level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(),
level=OptionRecommendation.LOW, choices=ORIENTATIONS,
help=_('The orientation of the page. Default is portrait. Choices '
'are %s') % ORIENTATIONS.keys()),
'are %s') % ORIENTATIONS),
OptionRecommendation(name='preserve_cover_aspect_ratio',
recommended_value=False,
help=_('Preserve the aspect ratio of the cover, instead'
@ -105,6 +145,8 @@ class PDFOutput(OutputFormatPlugin):
def convert_text(self, oeb_book):
from calibre.ebooks.pdf.writer import PDFWriter
from calibre.ebooks.metadata.opf2 import OPF
self.log.debug('Serializing oeb input to disk for processing...')
self.get_cover_data()

View File

@ -11,9 +11,6 @@ import shutil
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.opf2 import OPFCreator
class PMLInput(InputFormatPlugin):
@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin):
file_types = set(['pml', 'pmlz'])
def process_pml(self, pml_path, html_path, close_all=False):
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
pclose = False
hclose = False
@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.opf2 import OPFCreator
self.options = options
self.log = log
pages, images = [], []

View File

@ -4,21 +4,11 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import os, cStringIO
try:
from PIL import Image
Image
except ImportError:
import Image
import cStringIO
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.pml.pmlml import PMLMLizer
class PMLOutput(OutputFormatPlugin):
@ -43,6 +33,9 @@ class PMLOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.pml.pmlml import PMLMLizer
from calibre.utils.zipfile import ZipFile
with TemporaryDirectory('_pmlz_output') as tdir:
pmlmlizer = PMLMLizer(log)
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
@ -59,6 +52,13 @@ class PMLOutput(OutputFormatPlugin):
pmlz.add_dir(tdir)
def write_images(self, manifest, image_hrefs, out_dir, opts):
try:
from PIL import Image
Image
except ImportError:
import Image
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in manifest:
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():

View File

@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rb.reader import Reader
from calibre.customize.conversion import InputFormatPlugin
class RBInput(InputFormatPlugin):
@ -18,6 +17,8 @@ class RBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.rb.reader import Reader
reader = Reader(stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre.ebooks.rb.writer import RBWriter
class RBOutput(OutputFormatPlugin):
@ -22,6 +21,8 @@ class RBOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.rb.writer import RBWriter
close = False
if not hasattr(output_path, 'write'):
close = True

View File

@ -0,0 +1,298 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, glob, re, textwrap
from calibre.customize.conversion import InputFormatPlugin
border_style_map = {
'single' : 'solid',
'double-thickness-border' : 'double',
'shadowed-border': 'outset',
'double-border': 'double',
'dotted-border': 'dotted',
'dashed': 'dashed',
'hairline': 'solid',
'inset': 'inset',
'dash-small': 'dashed',
'dot-dash': 'dotted',
'dot-dot-dash': 'dotted',
'outset': 'outset',
'tripple': 'double',
'triple': 'double',
'thick-thin-small': 'solid',
'thin-thick-small': 'solid',
'thin-thick-thin-small': 'solid',
'thick-thin-medium': 'solid',
'thin-thick-medium': 'solid',
'thin-thick-thin-medium': 'solid',
'thick-thin-large': 'solid',
'thin-thick-thin-large': 'solid',
'wavy': 'ridge',
'double-wavy': 'ridge',
'striped': 'ridge',
'emboss': 'inset',
'engrave': 'inset',
'frame': 'ridge',
}
class RTFInput(InputFormatPlugin):
name = 'RTF Input'
author = 'Kovid Goyal'
description = 'Convert RTF files to HTML'
file_types = set(['rtf'])
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
run_lev, debug_dir, indent_out = 1, None, 0
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir('rtfdebug')
debug_dir = 'rtfdebug'
run_lev = 4
indent_out = 1
self.log('Running RTFParser in debug mode')
except:
self.log.warn('Impossible to run RTFParser in debug mode')
parser = ParseRtf(
in_file = stream,
out_file = ofile,
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivalents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivalents.
# Default is 1.
convert_wingdings = 1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = indent_out,
# Form lists from RTF. Default is 1.
form_lists = 1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
# Group borders. Default is 1.
group_borders = 1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 1,
#debug
deb_dir = debug_dir,
run_level = run_lev,
)
parser.parse_rtf()
with open(ofile, 'rb') as f:
return f.read()
def extract_images(self, picts):
import imghdr
self.log('Extracting images...')
with open(picts, 'rb') as f:
raw = f.read()
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
hex = re.compile(r'[^a-fA-F0-9]')
encs = [hex.sub('', pict) for pict in picts]
count = 0
imap = {}
for enc in encs:
if len(enc) % 2 == 1:
enc = enc[:-1]
data = enc.decode('hex')
fmt = imghdr.what(None, data)
if fmt is None:
fmt = 'wmf'
count += 1
name = '%04d.%s' % (count, fmt)
with open(name, 'wb') as f:
f.write(data)
imap[count] = name
# with open(name+'.hex', 'wb') as f:
# f.write(enc)
return self.convert_images(imap)
def convert_images(self, imap):
self.default_img = None
for count, val in imap.iteritems():
try:
imap[count] = self.convert_image(val)
except:
self.log.exception('Failed to convert', val)
return imap
def convert_image(self, name):
if not name.endswith('.wmf'):
return name
try:
return self.rasterize_wmf(name)
except:
self.log.exception('Failed to convert WMF image %r'%name)
return self.replace_wmf(name)
def replace_wmf(self, name):
from calibre.ebooks import calibre_cover
if self.default_img is None:
self.default_img = calibre_cover('Conversion of WMF images is not supported',
'Use Microsoft Word or OpenOffice to save this RTF file'
' as HTML and convert that in calibre.', title_size=36,
author_size=20)
name = name.replace('.wmf', '.jpg')
with open(name, 'wb') as f:
f.write(self.default_img)
return name
def rasterize_wmf(self, name):
from calibre.utils.wmf.parse import wmf_unwrap
with open(name, 'rb') as f:
data = f.read()
data = wmf_unwrap(data)
name = name.replace('.wmf', '.png')
with open(name, 'wb') as f:
f.write(data)
return name
def write_inline_css(self, ic, border_styles):
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
enumerate(ic.font_sizes)]
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
enumerate(ic.colors)]
css = textwrap.dedent('''
span.none {
text-decoration: none; font-weight: normal;
font-style: normal; font-variant: normal
}
span.italics { font-style: italic }
span.bold { font-weight: bold }
span.small-caps { font-variant: small-caps }
span.underlined { text-decoration: underline }
span.strike-through { text-decoration: line-through }
''')
css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes)
for cls, val in border_styles.iteritems():
css += '\n\n.%s {\n%s\n}'%(cls, val)
with open('styles.css', 'ab') as f:
f.write(css)
def convert_borders(self, doc):
border_styles = []
style_map = {}
for elem in doc.xpath(r'//*[local-name()="cell"]'):
style = ['border-style: hidden', 'border-width: 1px',
'border-color: black']
for x in ('bottom', 'top', 'left', 'right'):
bs = elem.get('border-cell-%s-style'%x, None)
if bs:
cbs = border_style_map.get(bs, 'solid')
style.append('border-%s-style: %s'%(x, cbs))
bw = elem.get('border-cell-%s-line-width'%x, None)
if bw:
style.append('border-%s-width: %spt'%(x, bw))
bc = elem.get('border-cell-%s-color'%x, None)
if bc:
style.append('border-%s-color: %s'%(x, bc))
style = ';\n'.join(style)
if style not in border_styles:
border_styles.append(style)
idx = border_styles.index(style)
cls = 'border_style%d'%idx
style_map[cls] = style
elem.set('class', cls)
return style_map
def convert(self, stream, options, file_ext, log,
accelerators):
from lxml import etree
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
from calibre.ebooks.rtf.input import InlineClass
self.opts = options
self.log = log
self.log('Converting RTF to XML...')
try:
xml = self.generate_xml(stream.name)
except RtfInvalidCodeException as e:
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
imap = {}
try:
imap = self.extract_images(d[0])
except:
self.log.exception('Failed to extract images...')
self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser)
border_styles = self.convert_borders(doc)
for pict in doc.xpath('//rtf:pict[@num]',
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
num = int(pict.get('num'))
name = imap.get(num, None)
if name is not None:
pict.set('num', name)
self.log('Converting XML to HTML...')
inline_class = InlineClass(self.log)
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
extensions = { ('calibre', 'inline-class') : inline_class }
transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc)
html = 'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
#clean multiple \n
res = re.sub('\n+', '\n', res)
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
# res = re.sub('\s*<body>', '<body>', res)
# res = re.sub('(?<=\n)\n{2}',
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
f.write(res)
self.write_inline_css(inline_class, border_styles)
stream.seek(0)
mi = get_metadata(stream, 'rtf')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.xhtml', None)])
opf.create_spine(['index.xhtml'])
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')

View File

@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rtf.rtfml import RTFMLizer
from calibre.customize.conversion import OutputFormatPlugin
class RTFOutput(OutputFormatPlugin):
@ -16,6 +15,8 @@ class RTFOutput(OutputFormatPlugin):
file_type = 'rtf'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.rtf.rtfml import RTFMLizer
rtfmlitzer = RTFMLizer(log)
content = rtfmlitzer.extract_content(oeb_book, opts)

View File

@ -4,13 +4,11 @@ __license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import os, uuid
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
from lxml import etree
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
@ -29,7 +27,12 @@ class SNBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
import uuid
from lxml import etree
from calibre.ebooks.oeb.base import DirContainer
from calibre.ebooks.snb.snbfile import SNBFile
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:

View File

@ -6,12 +6,9 @@ __docformat__ = 'restructuredtext en'
import os, string
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
class SNBOutput(OutputFormatPlugin):
@ -49,6 +46,11 @@ class SNBOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from lxml import etree
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
self.opts = opts
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
try:

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
from cStringIO import StringIO
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.compression.tcr import decompress
class TCRInput(InputFormatPlugin):
@ -17,6 +16,8 @@ class TCRInput(InputFormatPlugin):
file_types = set(['tcr'])
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.compression.tcr import decompress
log.info('Decompressing text...')
raw_txt = decompress(stream)
@ -28,7 +29,7 @@ class TCRInput(InputFormatPlugin):
txt_plugin = plugin_for_input_format('txt')
for opt in txt_plugin.options:
if not hasattr(self.options, opt.option.name):
setattr(self.options, opt.option.name, opt.recommended_value)
setattr(options, opt.option.name, opt.recommended_value)
stream.seek(0)
return txt_plugin.convert(stream, options,

View File

@ -8,8 +8,6 @@ import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.ebooks.compression.tcr import compress
class TCROutput(OutputFormatPlugin):
@ -25,6 +23,9 @@ class TCROutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.ebooks.compression.tcr import compress
close = False
if not hasattr(output_path, 'write'):
close = True

View File

@ -8,14 +8,6 @@ import os
from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
separate_hard_scene_breaks
from calibre.utils.zipfile import ZipFile
class TXTInput(InputFormatPlugin):
@ -61,6 +53,17 @@ class TXTInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.txt.processor import (convert_basic,
convert_markdown, separate_paragraphs_single_line,
separate_paragraphs_print_formatted, preserve_spaces,
detect_paragraph_type, detect_formatting_type,
normalize_line_endings, convert_textile, remove_indents,
block_to_single_line, separate_hard_scene_breaks)
self.log = log
txt = ''
log.debug('Reading text from file...')

View File

@ -7,15 +7,12 @@ __docformat__ = 'restructuredtext en'
import os
import shutil
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
from calibre.ptempfile import TemporaryDirectory, TemporaryFile
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.zipfile import ZipFile
NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows']
class TXTOutput(OutputFormatPlugin):
@ -26,11 +23,11 @@ class TXTOutput(OutputFormatPlugin):
options = set([
OptionRecommendation(name='newline', recommended_value='system',
level=OptionRecommendation.LOW,
short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
short_switch='n', choices=NEWLINE_TYPES,
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
'type used by this OS.') % sorted(NEWLINE_TYPES)),
OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8',
level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the output document. ' \
@ -76,6 +73,11 @@ class TXTOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.txt.newlines import specified_newlines, TxtNewlines
if opts.txt_output_formatting.lower() == 'markdown':
from calibre.ebooks.txt.markdownml import MarkdownMLizer
self.writer = MarkdownMLizer(log)
@ -116,6 +118,9 @@ class TXTZOutput(TXTOutput):
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.utils.zipfile import ZipFile
from lxml import etree
with TemporaryDirectory('_txtz_output') as tdir:
# TXT
txt_name = 'index.txt'

View File

@ -6,7 +6,6 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
from calibre.utils.date import parse_date, strptime
class Epubcheck(ePubFixer):
@ -35,6 +34,8 @@ class Epubcheck(ePubFixer):
return 'epubcheck'
def fix_pubdates(self):
from calibre.utils.date import parse_date, strptime
dirtied = False
opf = self.container.opf
for dcdate in opf.xpath('//dc:date',

View File

@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en'
import re
from lxml.etree import tostring as _tostring
def tostring(root, strip_comments=False, pretty_print=False):
'''
Serialize processed XHTML.
'''
from lxml.etree import tostring as _tostring
root.set('xmlns', 'http://www.w3.org/1999/xhtml')
root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
for x in root.iter():

View File

@ -11,19 +11,13 @@ __docformat__ = 'restructuredtext en'
Input plugin for HTML or OPF ebooks.
'''
import os, re, sys, uuid, tempfile, errno as gerrno
import os, re, sys, errno as gerrno
from urlparse import urlparse, urlunparse
from urllib import unquote, quote
from functools import partial
from itertools import izip
from urllib import unquote
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import detect_xml_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isbsd, iswindows
from calibre.constants import iswindows
from calibre import unicode_path, as_unicode
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
class Link(object):
'''
@ -241,262 +235,4 @@ def get_filelist(htmlfile, dir, opts, log):
return filelist
class HTMLInput(InputFormatPlugin):
name = 'HTML Input'
author = 'Kovid Goyal'
description = 'Convert HTML and OPF files to an OEB'
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
options = set([
OptionRecommendation(name='breadth_first',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Traverse links in HTML files breadth first. Normally, '
'they are traversed depth first.'
)
),
OptionRecommendation(name='max_levels',
recommended_value=5, level=OptionRecommendation.LOW,
help=_('Maximum levels of recursion when following links in '
'HTML files. Must be non-negative. 0 implies that no '
'links in the root HTML file are followed. Default is '
'%default.'
)
),
OptionRecommendation(name='dont_package',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally this input plugin re-arranges all the input '
'files into a standard folder hierarchy. Only use this option '
'if you know what you are doing as it can result in various '
'nasty side effects in the rest of the conversion pipeline.'
)
),
])
def convert(self, stream, opts, file_ext, log,
accelerators):
self._is_case_sensitive = None
basedir = os.getcwd()
self.opts = opts
fname = None
if hasattr(stream, 'name'):
basedir = os.path.dirname(stream.name)
fname = os.path.basename(stream.name)
if file_ext != 'opf':
if opts.dont_package:
raise ValueError('The --dont-package option is not supported for an HTML input file')
from calibre.ebooks.metadata.html import get_metadata
mi = get_metadata(stream)
if fname:
from calibre.ebooks.metadata.meta import metadata_from_filename
fmi = metadata_from_filename(fname)
fmi.smart_update(mi)
mi = fmi
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
return oeb
from calibre.ebooks.conversion.plumber import create_oebbook
return create_oebbook(log, stream.name, opts,
encoding=opts.input_encoding)
def is_case_sensitive(self, path):
if getattr(self, '_is_case_sensitive', None) is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isbsd
self._is_case_sensitive = not (os.path.exists(path.lower()) \
and os.path.exists(path.upper()))
return self._is_case_sensitive
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import (DirContainer,
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
xpath)
from calibre import guess_type
from calibre.ebooks.oeb.transforms.metadata import \
meta_info_to_oeb_metadata
import cssutils, logging
cssutils.log.setLevel(logging.WARN)
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False)
self.oeb = oeb
metadata = oeb.metadata
meta_info_to_oeb_metadata(mi, metadata, log)
if not metadata.language:
oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang().replace('_', '-'))
if not metadata.creator:
oeb.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown')))
if not metadata.title:
oeb.logger.warn('Title not specified')
metadata.add('title', self.oeb.translate(__('Unknown')))
bookid = str(uuid.uuid4())
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in metadata.identifier:
if 'id' in ident.attrib:
self.oeb.uid = metadata.identifier[0]
break
filelist = get_filelist(htmlpath, basedir, opts, log)
filelist = [f for f in filelist if not f.is_binary]
htmlfile_map = {}
for f in filelist:
path = f.path
oeb.container = DirContainer(os.path.dirname(path), log,
ignore_opf=True)
bname = os.path.basename(path)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(bname))
htmlfile_map[path] = href
item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = bname
oeb.spine.add(item, True)
self.added_resources = {}
self.log = log
self.log('Normalizing filename cases')
for path, href in htmlfile_map.items():
if not self.is_case_sensitive(path):
path = path.lower()
self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.log('Rewriting HTML links')
for f in filelist:
path = f.path
dpath = os.path.dirname(path)
oeb.container = DirContainer(dpath, log, ignore_opf=True)
item = oeb.manifest.hrefs[htmlfile_map[path]]
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values():
if item.media_type in self.OEB_STYLES:
dpath = None
for path, href in self.added_resources.items():
if href == item.href:
dpath = os.path.dirname(path)
break
cssutils.replaceUrls(item.data,
partial(self.resource_adder, base=dpath))
toc = self.oeb.toc
self.oeb.auto_generated_toc = True
titles = []
headers = []
for item in self.oeb.spine:
if not item.linear: continue
html = item.data
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
title = re.sub(r'\s+', ' ', title.strip())
if title:
titles.append(title)
headers.append('(unlabled)')
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
expr = '/h:html/h:body//h:%s[position()=1]/text()'
header = ''.join(xpath(html, expr % tag))
header = re.sub(r'\s+', ' ', header.strip())
if header:
headers[-1] = header
break
use = titles
if len(titles) > len(set(titles)):
use = headers
for title, item in izip(use, self.oeb.spine):
if not item.linear: continue
toc.add(title, item.href)
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
return oeb
def link_to_local_path(self, link_, base=None):
if not isinstance(link_, unicode):
try:
link_ = link_.decode('utf-8', 'error')
except:
self.log.warn('Failed to decode link %r. Ignoring'%link_)
return None, None
try:
l = Link(link_, base if base else os.getcwdu())
except:
self.log.exception('Failed to process link: %r'%link_)
return None, None
if l.path is None:
# Not a local resource
return None, None
link = l.path.replace('/', os.sep).strip()
frag = l.fragment
if not link:
return None, None
return link, frag
def resource_adder(self, link_, base=None):
link, frag = self.link_to_local_path(link_, base=base)
if link is None:
return link_
try:
if base and not os.path.isabs(link):
link = os.path.join(base, link)
link = os.path.abspath(link)
except:
return link_
if not os.access(link, os.R_OK):
return link_
if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_
if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower()
if link not in self.added_resources:
bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added',
href=bhref)
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME
if media_type == 'text/plain':
self.log.warn('Ignoring link to text file %r'%link_)
return None
self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log, ignore_opf=True)
# Load into memory
item = self.oeb.manifest.add(id, href, media_type)
# bhref refers to an already existing file. The read() method of
# DirContainer will call unquote on it before trying to read the
# file, therefore we quote it here.
if isinstance(bhref, unicode):
bhref = bhref.encode('utf-8')
item.html_input_href = quote(bhref).decode('utf-8')
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
item.data
self.added_resources[link] = href
nlink = self.added_resources[link]
if frag:
nlink = '#'.join((nlink, frag))
return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return (None, None)
try:
raw = open(link, 'rb').read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return (None, None)
return (None, raw)

View File

@ -4,7 +4,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
This package contains logic to read and write LRF files.
The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
"""
from uuid import uuid4
from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \
@ -60,6 +59,7 @@ def find_custom_fonts(options, logger):
def Book(options, logger, font_delta=0, header=None,
profile=PRS500_PROFILE, **settings):
from uuid import uuid4
ps = {}
ps['topmargin'] = options.top_margin
ps['evensidemargin'] = options.left_margin

View File

@ -6,12 +6,11 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, textwrap, sys, operator
import textwrap, operator
from copy import deepcopy, copy
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
from calibre import guess_type
class Canvas(etree.XSLTExtension):
@ -406,76 +405,4 @@ class Styles(etree.XSLTExtension):
class LRFInput(InputFormatPlugin):
name = 'LRF Input'
author = 'Kovid Goyal'
description = 'Convert LRF files to HTML'
file_types = set(['lrf'])
def convert(self, stream, options, file_ext, log,
accelerators):
self.log = log
self.log('Generating XML')
from calibre.ebooks.lrf.lrfparser import LRFDocument
d = LRFDocument(stream)
d.parse()
xml = d.to_xml(write_files=True)
if options.verbose > 2:
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
parser = etree.XMLParser(no_network=True, huge_tree=True)
try:
doc = etree.fromstring(xml, parser=parser)
except:
self.log.warn('Failed to parse XML. Trying to recover')
parser = etree.XMLParser(no_network=True, huge_tree=True,
recover=True)
doc = etree.fromstring(xml, parser=parser)
char_button_map = {}
for x in doc.xpath('//CharButton[@refobj]'):
ro = x.get('refobj')
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
if jump_button:
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
if jump_to:
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
jump_to[0].get('refobj'))
plot_map = {}
for x in doc.xpath('//Plot[@refobj]'):
ro = x.get('refobj')
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
if image:
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
image[0].get('refstream'))
if imgstr:
plot_map[ro] = imgstr[0].get('file')
self.log('Converting XML to HTML...')
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
media_type = MediaType()
styles = Styles()
text_block = TextBlock(styles, char_button_map, plot_map, log)
canvas = Canvas(doc, styles, text_block, log)
image_block = ImageBlock(canvas)
ruled_line = RuledLine()
extensions = {
('calibre', 'media-type') : media_type,
('calibre', 'text-block') : text_block,
('calibre', 'ruled-line') : ruled_line,
('calibre', 'styles') : styles,
('calibre', 'canvas') : canvas,
('calibre', 'image-block'): image_block,
}
transform = etree.XSLT(styledoc, extensions=extensions)
try:
result = transform(doc)
except RuntimeError:
sys.setrecursionlimit(5000)
result = transform(doc)
with open('content.opf', 'wb') as f:
f.write(result)
styles.write()
return os.path.abspath('content.opf')

View File

@ -710,7 +710,7 @@ class Metadata(object):
fmt('Title sort', self.title_sort)
if self.authors:
fmt('Author(s)', authors_to_string(self.authors) + \
((' [' + self.author_sort + ']')
((' [' + self.author_sort + ']')
if self.author_sort and self.author_sort != _('Unknown') else ''))
if self.publisher:
fmt('Publisher', self.publisher)

View File

@ -12,7 +12,6 @@ from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
from calibre.constants import filesystem_encoding, preferred_encoding
from calibre.library.field_metadata import FieldMetadata
from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz
from calibre.utils.magick import Image
from calibre import isbytestring
# Translate datetimes to and from strings. The string form is the datetime in
@ -37,6 +36,8 @@ def encode_thumbnail(thumbnail):
'''
Encode the image part of a thumbnail, then return the 3 part tuple
'''
from calibre.utils.magick import Image
if thumbnail is None:
return None
if not isinstance(thumbnail, (tuple, list)):

View File

@ -129,9 +129,57 @@ class OCFDirReader(OCFReader):
def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs)
def get_cover(opf, opf_path, stream, reader=None):
def render_cover(opf, opf_path, zf, reader=None):
from calibre.ebooks import render_html_svg_workaround
from calibre.utils.logging import default_log
cpage = opf.first_spine_item()
if not cpage:
return
if reader is not None and reader.encryption_meta.is_encrypted(cpage):
return
with TemporaryDirectory('_epub_meta') as tdir:
with CurrentDir(tdir):
zf.extractall()
opf_path = opf_path.replace('/', os.sep)
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
if not os.path.exists(cpage):
return
if isosx:
# On OS X trying to render a HTML cover which uses embedded
# fonts more than once in the same process causes a crash in Qt
# so be safe and remove the fonts as well as any @font-face
# rules
for f in walk('.'):
if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
os.remove(f)
ffpat = re.compile(br'@font-face.*?{.*?}',
re.DOTALL|re.IGNORECASE)
with open(cpage, 'r+b') as f:
raw = f.read()
f.truncate(0)
raw = ffpat.sub(b'', raw)
f.write(raw)
from calibre.ebooks.chardet import xml_to_unicode
raw = xml_to_unicode(raw,
strip_encoding_pats=True, resolve_entities=True)[0]
from lxml import html
for link in html.fromstring(raw).xpath('//link'):
href = link.get('href', '')
if href:
path = os.path.join(os.path.dirname(cpage), href)
if os.path.exists(path):
with open(path, 'r+b') as f:
raw = f.read()
f.truncate(0)
raw = ffpat.sub(b'', raw)
f.write(raw)
return render_html_svg_workaround(cpage, default_log)
def get_cover(opf, opf_path, stream, reader=None):
raster_cover = opf.raster_cover
stream.seek(0)
zf = ZipFile(stream)
@ -152,27 +200,7 @@ def get_cover(opf, opf_path, stream, reader=None):
zf.close()
return data
cpage = opf.first_spine_item()
if not cpage:
return
if reader is not None and reader.encryption_meta.is_encrypted(cpage):
return
with TemporaryDirectory('_epub_meta') as tdir:
with CurrentDir(tdir):
zf.extractall()
if isosx:
# On OS X trying to render an HTML cover which uses embedded
# fonts more than once in the same process causes a crash in Qt
# so be safe and remove the fonts.
for f in walk('.'):
if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
os.remove(f)
opf_path = opf_path.replace('/', os.sep)
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
if not os.path.exists(cpage):
return
return render_html_svg_workaround(cpage, default_log)
return render_cover(opf, opf_path, zf, reader=reader)
def get_metadata(stream, extract_cover=True):
""" Return metadata as a :class:`Metadata` object """

View File

@ -222,6 +222,11 @@ def forked_read_metadata(path, tdir):
from calibre.ebooks.metadata.opf2 import metadata_to_opf
with open(path, 'rb') as f:
fmt = os.path.splitext(path)[1][1:].lower()
f.seek(0, 2)
sz = f.tell()
with open(os.path.join(tdir, 'size.txt'), 'wb') as s:
s.write(str(sz).encode('ascii'))
f.seek(0)
mi = get_metadata(f, fmt)
if mi.cover_data and mi.cover_data[1]:
with open(os.path.join(tdir, 'cover.jpg'), 'wb') as f:

View File

@ -1019,6 +1019,11 @@ class OPF(object): # {{{
mt = item.get('media-type', '')
if 'xml' not in mt:
return item.get('href', None)
for item in self.itermanifest():
if item.get('href', None) == cover_id:
mt = item.get('media-type', '')
if mt.startswith('image/'):
return item.get('href', None)
@dynamic_property
def cover(self):

View File

@ -12,19 +12,14 @@ from urllib import urlencode
from threading import Thread
from Queue import Queue, Empty
from lxml.html import tostring
from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
fixauthors)
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.date import parse_date
from calibre.utils.localization import canonicalize_lang
from calibre.utils.soupparser import fromstring
class Worker(Thread): # Get details {{{
@ -43,6 +38,8 @@ class Worker(Thread): # Get details {{{
self.browser = browser.clone_browser()
self.cover_url = self.amazon_id = self.isbn = None
self.domain = domain
from lxml.html import tostring
self.tostring = tostring
months = {
'de': {
@ -176,6 +173,10 @@ class Worker(Thread): # Get details {{{
self.log.exception('get_details failed for url: %r'%self.url)
def get_details(self):
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.soupparser import fromstring
from calibre.ebooks.chardet import xml_to_unicode
try:
raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
except Exception as e:
@ -210,7 +211,7 @@ class Worker(Thread): # Get details {{{
errmsg = root.xpath('//*[@id="errorMessage"]')
if errmsg:
msg = 'Failed to parse amazon details page: %r'%self.url
msg += tostring(errmsg, method='text', encoding=unicode).strip()
msg += self.tostring(errmsg, method='text', encoding=unicode).strip()
self.log.error(msg)
return
@ -322,10 +323,10 @@ class Worker(Thread): # Get details {{{
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
if actual_title:
title = tostring(actual_title[0], encoding=unicode,
title = self.tostring(actual_title[0], encoding=unicode,
method='text').strip()
else:
title = tostring(tdiv, encoding=unicode, method='text').strip()
title = self.tostring(tdiv, encoding=unicode, method='text').strip()
return re.sub(r'[(\[].*[)\]]', '', title).strip()
def parse_authors(self, root):
@ -337,7 +338,7 @@ class Worker(Thread): # Get details {{{
''')
for x in aname:
x.tail = ''
authors = [tostring(x, encoding=unicode, method='text').strip() for x
authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
in aname]
authors = [a for a in authors if a]
return authors
@ -356,6 +357,8 @@ class Worker(Thread): # Get details {{{
return float(m.group(1))/float(m.group(3)) * 5
def parse_comments(self, root):
from calibre.library.comments import sanitize_comments_html
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
if desc:
desc = desc[0]
@ -365,7 +368,7 @@ class Worker(Thread): # Get details {{{
for a in desc.xpath('descendant::a[@href]'):
del a.attrib['href']
a.tag = 'span'
desc = tostring(desc, method='html', encoding=unicode).strip()
desc = self.tostring(desc, method='html', encoding=unicode).strip()
# Encoding bug in Amazon data U+fffd (replacement char)
# in some examples it is present in place of '
@ -602,6 +605,11 @@ class Amazon(Source):
Note this method will retry without identifiers automatically if no
match is found with identifiers.
'''
from lxml.html import tostring
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.soupparser import fromstring
from calibre.ebooks.chardet import xml_to_unicode
query, domain = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
if query is None:

View File

@ -12,7 +12,6 @@ from future_builtins import map
from calibre import browser, random_user_agent
from calibre.customize import Plugin
from calibre.utils.logging import ThreadSafeLog, FileStream
from calibre.utils.config import JSONConfig
from calibre.utils.titlecase import titlecase
from calibre.utils.icu import capitalize, lower, upper
@ -34,6 +33,7 @@ msprefs.defaults['fewer_tags'] = True
msprefs.defaults['cover_priorities'] = {'Google':2}
def create_log(ostream=None):
from calibre.utils.logging import ThreadSafeLog, FileStream
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
log.outputs = [FileStream(ostream)]
return log

View File

@ -12,14 +12,10 @@ from urllib import urlencode
from functools import partial
from Queue import Queue, Empty
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
from calibre import as_unicode
NAMESPACES = {
@ -28,22 +24,6 @@ NAMESPACES = {
'db': 'http://www.douban.com/xmlns/',
'gd': 'http://schemas.google.com/g/2005'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
booktag = XPath("descendant::db:tag/attribute::name")
rating = XPath("descendant::gd:rating/attribute::average")
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
def get_details(browser, url, timeout): # {{{
try:
if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '':
@ -61,6 +41,25 @@ def get_details(browser, url, timeout): # {{{
# }}}
def to_metadata(browser, log, entry_, timeout): # {{{
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
booktag = XPath("descendant::db:tag/attribute::name")
rating = XPath("descendant::gd:rating/attribute::average")
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
def get_text(extra, x):
try:
ans = x(extra)
@ -275,6 +274,7 @@ class Douban(Source):
def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout):
from lxml import etree
for relevance, i in enumerate(entries):
try:
ans = to_metadata(br, log, i, timeout)
@ -298,6 +298,13 @@ class Douban(Source):
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
if not query:

View File

@ -12,8 +12,6 @@ from urllib import urlencode
from functools import partial
from Queue import Queue, Empty
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
@ -29,23 +27,6 @@ NAMESPACES = {
'dc' : 'http://purl.org/dc/terms',
'gd' : 'http://schemas.google.com/g/2005'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
identifier = XPath('descendant::dc:identifier')
title = XPath('descendant::dc:title')
date = XPath('descendant::dc:date')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
rating = XPath('descendant::gd:rating[@average]')
def get_details(browser, url, timeout): # {{{
try:
raw = browser.open_novisit(url, timeout=timeout).read()
@ -61,6 +42,24 @@ def get_details(browser, url, timeout): # {{{
# }}}
def to_metadata(browser, log, entry_, timeout): # {{{
from lxml import etree
XPath = partial(etree.XPath, namespaces=NAMESPACES)
# total_results = XPath('//openSearch:totalResults')
# start_index = XPath('//openSearch:startIndex')
# items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
identifier = XPath('descendant::dc:identifier')
title = XPath('descendant::dc:title')
date = XPath('descendant::dc:date')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
rating = XPath('descendant::gd:rating[@average]')
def get_text(extra, x):
try:
@ -266,6 +265,7 @@ class GoogleBooks(Source):
def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout):
from lxml import etree
for relevance, i in enumerate(entries):
try:
ans = to_metadata(br, log, i, timeout)
@ -289,6 +289,10 @@ class GoogleBooks(Source):
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
from lxml import etree
XPath = partial(etree.XPath, namespaces=NAMESPACES)
entry = XPath('//atom:entry')
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
if not query:

View File

@ -9,12 +9,9 @@ __docformat__ = 'restructuredtext en'
from urllib import quote
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower
from calibre.ebooks.metadata.book.base import Metadata
@ -122,6 +119,7 @@ class ISBNDB(Source):
result_queue.put(result)
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
from lxml import etree
def tostring(x):
if x is None:
@ -198,6 +196,10 @@ class ISBNDB(Source):
def make_query(self, q, abort, title=None, authors=None, identifiers={},
max_pages=10, timeout=30):
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
page_num = 1
parser = etree.XMLParser(recover=True, no_network=True)
br = self.browser

View File

@ -9,18 +9,14 @@ __docformat__ = 'restructuredtext en'
'''
Fetch metadata using Overdrive Content Reserve
'''
import re, random, mechanize, copy, json
import re, random, copy, json
from threading import RLock
from Queue import Queue, Empty
from lxml import html
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html
from calibre.utils.soupparser import fromstring
ovrdrv_data_cache = {}
cache_lock = RLock()
@ -80,6 +76,7 @@ class OverDrive(Source):
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
import mechanize
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
@ -170,6 +167,7 @@ class OverDrive(Source):
this page attempts to set a cookie that Mechanize doesn't like
copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
'''
import mechanize
goodcookies = br._ua_handlers['_cookies'].cookiejar
clean_cj = mechanize.CookieJar()
cookies_to_copy = []
@ -187,6 +185,7 @@ class OverDrive(Source):
br.set_cookiejar(clean_cj)
def overdrive_search(self, br, log, q, title, author):
import mechanize
# re-initialize the cookiejar to so that it's clean
clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj)
@ -303,6 +302,7 @@ class OverDrive(Source):
return ''
def overdrive_get_record(self, br, log, q, ovrdrv_id):
import mechanize
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
@ -393,6 +393,11 @@ class OverDrive(Source):
def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
from lxml import html
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.soupparser import fromstring
from calibre.library.comments import sanitize_comments_html
try:
raw = br.open_novisit(metadata_url).read()
except Exception, e:

View File

@ -6,15 +6,11 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib2
import datetime
from urllib import quote_plus
from Queue import Queue, Empty
from lxml import etree, html
from calibre import as_unicode
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
@ -43,6 +39,7 @@ class Ozon(Source):
isbnRegex = re.compile(isbnPattern)
def get_book_url(self, identifiers): # {{{
import urllib2
ozon_id = identifiers.get('ozon', None)
res = None
if ozon_id:
@ -81,6 +78,9 @@ class Ozon(Source):
def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=30): # {{{
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
if not self.is_configured():
return
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
@ -283,6 +283,9 @@ class Ozon(Source):
# }}}
def get_book_details(self, log, metadata, timeout): # {{{
from lxml import html, etree
from calibre.ebooks.chardet import xml_to_unicode
url = self.get_book_url(metadata.get_identifiers())[2]
raw = self.browser.open_novisit(url, timeout=timeout).read()

View File

@ -12,7 +12,6 @@ from lxml import etree
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
from calibre.customize.conversion import InputFormatPlugin
class Extract(ODF2XHTML):
@ -178,16 +177,4 @@ class Extract(ODF2XHTML):
return os.path.abspath('metadata.opf')
class ODTInput(InputFormatPlugin):
name = 'ODT Input'
author = 'Kovid Goyal'
description = 'Convert ODT (OpenOffice) files to HTML'
file_types = set(['odt'])
def convert(self, stream, options, file_ext, log,
accelerators):
return Extract()(stream, '.', log)

View File

@ -7,31 +7,38 @@ __docformat__ = 'restructuredtext en'
class PDBError(Exception):
pass
FORMAT_READERS = None
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
def _import_readers():
global FORMAT_READERS
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
FORMAT_READERS = {
'PNPdPPrs': ereader_reader,
'PNRdPPrs': ereader_reader,
'zTXTGPlm': ztxt_reader,
'TEXtREAd': palmdoc_reader,
'.pdfADBE': pdf_reader,
'DataPlkr': plucker_reader,
}
FORMAT_READERS = {
'PNPdPPrs': ereader_reader,
'PNRdPPrs': ereader_reader,
'zTXTGPlm': ztxt_reader,
'TEXtREAd': palmdoc_reader,
'.pdfADBE': pdf_reader,
'DataPlkr': plucker_reader,
}
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
FORMAT_WRITERS = None
def _import_writers():
global FORMAT_WRITERS
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
FORMAT_WRITERS = {
'doc': palmdoc_writer,
'ztxt': ztxt_writer,
'ereader': ereader_writer,
}
FORMAT_WRITERS = {
'doc': palmdoc_writer,
'ztxt': ztxt_writer,
'ereader': ereader_writer,
}
IDENTITY_TO_NAME = {
'PNPdPPrs': 'eReader',
@ -69,11 +76,17 @@ def get_reader(identity):
'''
Returns None if no reader is found for the identity.
'''
global FORMAT_READERS
if FORMAT_READERS is None:
_import_readers()
return FORMAT_READERS.get(identity, None)
def get_writer(extension):
'''
Returns None if no writer is found for extension.
'''
global FORMAT_WRITERS
if FORMAT_WRITERS is None:
_import_writers()
return FORMAT_WRITERS.get(extension, None)

View File

@ -65,7 +65,7 @@ class Reader(object):
name = urlunquote(self.stream.read(32).strip('\x00'))
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
return toc
def get_text(self, toc_item, output_dir):
@ -89,7 +89,7 @@ class Reader(object):
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
html.write(output.encode('utf-8'))
html.write(output.replace('<TITLE>', '<TITLE> ').encode('utf-8'))
def get_image(self, toc_item, output_dir):
if toc_item.flags != 0:
@ -105,7 +105,7 @@ class Reader(object):
self.log.debug('Extracting content from file...')
html = []
images = []
for item in self.toc:
if item.name.lower().endswith('html'):
self.log.debug('HTML item %s found...' % item.name)

View File

@ -2,42 +2,9 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, glob, re, textwrap
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
border_style_map = {
'single' : 'solid',
'double-thickness-border' : 'double',
'shadowed-border': 'outset',
'double-border': 'double',
'dotted-border': 'dotted',
'dashed': 'dashed',
'hairline': 'solid',
'inset': 'inset',
'dash-small': 'dashed',
'dot-dash': 'dotted',
'dot-dot-dash': 'dotted',
'outset': 'outset',
'tripple': 'double',
'triple': 'double',
'thick-thin-small': 'solid',
'thin-thick-small': 'solid',
'thin-thick-thin-small': 'solid',
'thick-thin-medium': 'solid',
'thin-thick-medium': 'solid',
'thin-thick-thin-medium': 'solid',
'thick-thin-large': 'solid',
'thin-thick-thin-large': 'solid',
'wavy': 'ridge',
'double-wavy': 'ridge',
'striped': 'ridge',
'emboss': 'inset',
'engrave': 'inset',
'frame': 'ridge',
}
class InlineClass(etree.XSLTExtension):
@ -71,261 +38,3 @@ class InlineClass(etree.XSLTExtension):
output_parent.text = ' '.join(classes)
class RTFInput(InputFormatPlugin):
name = 'RTF Input'
author = 'Kovid Goyal'
description = 'Convert RTF files to HTML'
file_types = set(['rtf'])
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
run_lev, debug_dir, indent_out = 1, None, 0
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir('rtfdebug')
debug_dir = 'rtfdebug'
run_lev = 4
indent_out = 1
self.log('Running RTFParser in debug mode')
except:
self.log.warn('Impossible to run RTFParser in debug mode')
parser = ParseRtf(
in_file = stream,
out_file = ofile,
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivalents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivalents.
# Default is 1.
convert_wingdings = 1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = indent_out,
# Form lists from RTF. Default is 1.
form_lists = 1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
# Group borders. Default is 1.
group_borders = 1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 1,
#debug
deb_dir = debug_dir,
run_level = run_lev,
)
parser.parse_rtf()
with open(ofile, 'rb') as f:
return f.read()
def extract_images(self, picts):
import imghdr
self.log('Extracting images...')
with open(picts, 'rb') as f:
raw = f.read()
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
hex = re.compile(r'[^a-fA-F0-9]')
encs = [hex.sub('', pict) for pict in picts]
count = 0
imap = {}
for enc in encs:
if len(enc) % 2 == 1:
enc = enc[:-1]
data = enc.decode('hex')
fmt = imghdr.what(None, data)
if fmt is None:
fmt = 'wmf'
count += 1
name = '%04d.%s' % (count, fmt)
with open(name, 'wb') as f:
f.write(data)
imap[count] = name
# with open(name+'.hex', 'wb') as f:
# f.write(enc)
return self.convert_images(imap)
def convert_images(self, imap):
self.default_img = None
for count, val in imap.iteritems():
try:
imap[count] = self.convert_image(val)
except:
self.log.exception('Failed to convert', val)
return imap
def convert_image(self, name):
if not name.endswith('.wmf'):
return name
try:
return self.rasterize_wmf(name)
except:
self.log.exception('Failed to convert WMF image %r'%name)
return self.replace_wmf(name)
def replace_wmf(self, name):
from calibre.ebooks import calibre_cover
if self.default_img is None:
self.default_img = calibre_cover('Conversion of WMF images is not supported',
'Use Microsoft Word or OpenOffice to save this RTF file'
' as HTML and convert that in calibre.', title_size=36,
author_size=20)
name = name.replace('.wmf', '.jpg')
with open(name, 'wb') as f:
f.write(self.default_img)
return name
def rasterize_wmf(self, name):
from calibre.utils.wmf.parse import wmf_unwrap
with open(name, 'rb') as f:
data = f.read()
data = wmf_unwrap(data)
name = name.replace('.wmf', '.png')
with open(name, 'wb') as f:
f.write(data)
return name
def write_inline_css(self, ic, border_styles):
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
enumerate(ic.font_sizes)]
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
enumerate(ic.colors)]
css = textwrap.dedent('''
span.none {
text-decoration: none; font-weight: normal;
font-style: normal; font-variant: normal
}
span.italics { font-style: italic }
span.bold { font-weight: bold }
span.small-caps { font-variant: small-caps }
span.underlined { text-decoration: underline }
span.strike-through { text-decoration: line-through }
''')
css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes)
for cls, val in border_styles.iteritems():
css += '\n\n.%s {\n%s\n}'%(cls, val)
with open('styles.css', 'ab') as f:
f.write(css)
def convert_borders(self, doc):
border_styles = []
style_map = {}
for elem in doc.xpath(r'//*[local-name()="cell"]'):
style = ['border-style: hidden', 'border-width: 1px',
'border-color: black']
for x in ('bottom', 'top', 'left', 'right'):
bs = elem.get('border-cell-%s-style'%x, None)
if bs:
cbs = border_style_map.get(bs, 'solid')
style.append('border-%s-style: %s'%(x, cbs))
bw = elem.get('border-cell-%s-line-width'%x, None)
if bw:
style.append('border-%s-width: %spt'%(x, bw))
bc = elem.get('border-cell-%s-color'%x, None)
if bc:
style.append('border-%s-color: %s'%(x, bc))
style = ';\n'.join(style)
if style not in border_styles:
border_styles.append(style)
idx = border_styles.index(style)
cls = 'border_style%d'%idx
style_map[cls] = style
elem.set('class', cls)
return style_map
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
self.opts = options
self.log = log
self.log('Converting RTF to XML...')
try:
xml = self.generate_xml(stream.name)
except RtfInvalidCodeException as e:
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
imap = {}
try:
imap = self.extract_images(d[0])
except:
self.log.exception('Failed to extract images...')
self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser)
border_styles = self.convert_borders(doc)
for pict in doc.xpath('//rtf:pict[@num]',
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
num = int(pict.get('num'))
name = imap.get(num, None)
if name is not None:
pict.set('num', name)
self.log('Converting XML to HTML...')
inline_class = InlineClass(self.log)
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
extensions = { ('calibre', 'inline-class') : inline_class }
transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc)
html = 'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
#clean multiple \n
res = re.sub('\n+', '\n', res)
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
# res = re.sub('\s*<body>', '<body>', res)
# res = re.sub('(?<=\n)\n{2}',
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
f.write(res)
self.write_inline_css(inline_class, border_styles)
stream.seek(0)
mi = get_metadata(stream, 'rtf')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.xhtml', None)])
opf.create_spine(['index.xhtml'])
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"

View File

@ -16,7 +16,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
def clean_txt(txt):
'''
@ -28,7 +28,7 @@ def clean_txt(txt):
# Strip whitespace from the end of the line. Also replace
# all line breaks with \n.
txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
# Replace whitespace at the beginning of the line with &nbsp;
txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', '&nbsp;' * 4, txt)
@ -75,7 +75,7 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
'''
Converts plain text to html by putting all paragraphs in
<p> tags. It condense and retains blank lines when necessary.
Requires paragraphs to be in single line format.
'''
txt = clean_txt(txt)
@ -215,7 +215,7 @@ def detect_paragraph_type(txt):
def detect_formatting_type(txt):
'''
Tries to determine the formatting of the document.
markdown: Markdown formatting is used.
textile: Textile formatting is used.
heuristic: When none of the above formatting types are

View File

@ -102,6 +102,7 @@ gprefs.defaults['cb_fullscreen'] = False
gprefs.defaults['worker_max_time'] = 0
gprefs.defaults['show_files_after_save'] = True
gprefs.defaults['auto_add_path'] = None
gprefs.defaults['auto_add_check_for_duplicates'] = False
# }}}
NONE = QVariant() #: Null value to return from the data function of item models

View File

@ -382,7 +382,8 @@ class Adder(QObject): # {{{
if not duplicates:
return self.duplicates_processed()
self.pd.hide()
files = [x[0].title for x in duplicates]
files = [_('%s by %s')%(x[0].title, x[0].format_field('authors')[1])
for x in duplicates]
if question_dialog(self._parent, _('Duplicates found!'),
_('Books with the same title as the following already '
'exist in the database. Add them anyway?'),

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, tempfile, shutil
import os, tempfile, shutil, time
from threading import Thread, Event
from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
@ -15,6 +15,7 @@ from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
from calibre import prints
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.gui2 import question_dialog, gprefs
class Worker(Thread):
@ -41,25 +42,58 @@ class Worker(Thread):
traceback.print_exc()
def auto_add(self):
from calibre.utils.ipc.simple_worker import fork_job
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.metadata.meta import metadata_from_filename
files = [x for x in os.listdir(self.path) if x not in self.staging
and os.path.isfile(os.path.join(self.path, x)) and
os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and
os.path.splitext(x)[1][1:].lower() in self.be]
files = [x for x in os.listdir(self.path) if
# Must not be in the process of being added to the db
x not in self.staging
# Firefox creates 0 byte placeholder files when downloading
and os.stat(os.path.join(self.path, x)).st_size > 0
# Must be a file
and os.path.isfile(os.path.join(self.path, x))
# Must have read and write permissions
and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK)
# Must be a known ebook file type
and os.path.splitext(x)[1][1:].lower() in self.be
]
data = {}
# Give any in progress copies time to complete
time.sleep(2)
for fname in files:
f = os.path.join(self.path, fname)
# Try opening the file for reading, if the OS prevents us, then at
# least on windows, it means the file is open in another
# application for writing. We will get notified by
# QFileSystemWatcher when writing is completed, so ignore for now.
try:
open(f, 'rb').close()
except:
continue
tdir = tempfile.mkdtemp(dir=self.tdir)
try:
fork_job('calibre.ebooks.metadata.meta',
'forked_read_metadata', (f, tdir), no_output=True)
except WorkerError as e:
prints('Failed to read metadata from:', fname)
prints(e.orig_tb)
except:
import traceback
traceback.print_exc()
# Ensure that the pre-metadata file size is present. If it isn't,
# write 0 so that the file is rescanned
szpath = os.path.join(tdir, 'size.txt')
try:
with open(szpath, 'rb') as f:
int(f.read())
except:
with open(szpath, 'wb') as f:
f.write(b'0')
opfpath = os.path.join(tdir, 'metadata.opf')
try:
if os.stat(opfpath).st_size < 30:
@ -125,25 +159,71 @@ class AutoAdder(QObject):
m = gui.library_view.model()
count = 0
needs_rescan = False
duplicates = []
for fname, tdir in data.iteritems():
paths = [os.path.join(self.worker.path, fname)]
sz = os.path.join(tdir, 'size.txt')
try:
with open(sz, 'rb') as f:
sz = int(f.read())
if sz != os.stat(paths[0]).st_size:
raise Exception('Looks like the file was written to after'
' we tried to read metadata')
except:
needs_rescan = True
try:
self.worker.staging.remove(fname)
except KeyError:
pass
continue
mi = os.path.join(tdir, 'metadata.opf')
if not os.access(mi, os.R_OK):
continue
mi = [OPF(open(mi, 'rb'), tdir,
populate_spine=False).to_book_metadata()]
m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi,
add_duplicates=True)
dups, num = m.add_books(paths,
[os.path.splitext(fname)[1][1:].upper()], mi,
add_duplicates=not gprefs['auto_add_check_for_duplicates'])
if dups:
path = dups[0][0]
with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
'wb') as dest, open(path, 'rb') as src:
shutil.copyfileobj(src, dest)
dups[0][0] = dest.name
duplicates.append(dups)
try:
os.remove(paths[0])
self.worker.staging.remove(fname)
except:
pass
count += num
if duplicates:
paths, formats, metadata = [], [], []
for p, f, mis in duplicates:
paths.extend(p)
formats.extend(f)
metadata.extend(mis)
files = [_('%s by %s')%(mi.title, mi.format_field('authors')[1])
for mi in metadata]
if question_dialog(self.parent(), _('Duplicates found!'),
_('Books with the same title as the following already '
'exist in the database. Add them anyway?'),
'\n'.join(files)):
dups, num = m.add_books(paths, formats, metadata,
add_duplicates=True)
count += num
for tdir in data.itervalues():
try:
os.remove(os.path.join(self.worker.path, fname))
try:
self.worker.staging.remove(fname)
except KeyError:
pass
shutil.rmtree(tdir)
except:
pass
count += 1
if count > 0:
m.books_added(count)
@ -153,4 +233,7 @@ class AutoAdder(QObject):
if hasattr(gui, 'db_images'):
gui.db_images.reset()
if needs_rescan:
QTimer.singleShot(2000, self.dir_changed)

View File

@ -38,14 +38,24 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
ans = unicode(col.name())
return ans
f = QFontInfo(QApplication.font(widget)).pixelSize()
fi = QFontInfo(QApplication.font(widget))
f = fi.pixelSize()+1
fam = unicode(fi.family()).strip().replace('"', '')
if not fam:
fam = 'sans-serif'
c = color_to_string(QApplication.palette().color(QPalette.Normal,
QPalette.WindowText))
templ = u'''\
<html>
<head>
<style type="text/css">
body, td {background-color: transparent; font-size: %dpx; color: %s }
body, td {
background-color: transparent;
font-size: %dpx;
font-family: "%s",sans-serif;
color: %s
}
</style>
<style type="text/css">
%s
@ -55,7 +65,7 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
%%s
</body>
<html>
'''%(f, c, css)
'''%(f, fam, c, css)
fm = getattr(mi, 'field_metadata', field_metadata)
fl = dict(get_field_list(fm))
show_comments = (all_fields or fl.get('comments', True))

View File

@ -31,7 +31,7 @@ class PluginWidget(QWidget, Ui_Form):
self.setupUi(self)
def initialize(self, name, db): #not working properly to update
from calibre.library.catalog import FIELDS
from calibre.library.catalogs import FIELDS
self.all_fields = [x for x in FIELDS if x != 'all']
#add custom columns

View File

@ -21,7 +21,7 @@ class PluginWidget(QWidget, Ui_Form):
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.setupUi(self)
from calibre.library.catalog import FIELDS
from calibre.library.catalogs import FIELDS
self.all_fields = []
for x in FIELDS:
if x != 'all':

View File

@ -251,8 +251,12 @@ class EditorWidget(QWebView): # {{{
def fset(self, val):
self.setHtml(val)
f = QFontInfo(QApplication.font(self)).pixelSize()
style = 'font-size: %dpx;' % (f,)
fi = QFontInfo(QApplication.font(self))
f = fi.pixelSize()+1
fam = unicode(fi.family()).strip().replace('"', '')
if not fam:
fam = 'sans-serif'
style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
# toList() is needed because PyQt on Debian is old/broken
for body in self.page().mainFrame().documentElement().findAll('body').toList():

View File

@ -7,7 +7,8 @@ import os, traceback, Queue, time, cStringIO, re, sys
from threading import Thread
from PyQt4.Qt import (QMenu, QAction, QActionGroup, QIcon, SIGNAL,
Qt, pyqtSignal, QDialog, QObject)
Qt, pyqtSignal, QDialog, QObject, QVBoxLayout,
QDialogButtonBox)
from calibre.customize.ui import (available_input_formats, available_output_formats,
device_plugins)
@ -718,6 +719,31 @@ class DeviceMixin(object): # {{{
def disconnect_mounted_device(self):
self.device_manager.umount_device()
def configure_connected_device(self):
if not self.device_manager.is_device_connected: return
if self.job_manager.has_device_jobs(queued_also=True):
return error_dialog(self, _('Running jobs'),
_('Cannot configure the device while there are running'
' device jobs.'), show=True)
dev = self.device_manager.connected_device
cw = dev.config_widget()
d = QDialog(self)
d.setWindowTitle(_('Configure %s')%dev.get_gui_name())
d.setWindowIcon(QIcon(I('config.png')))
l = QVBoxLayout(d)
d.setLayout(l)
bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel)
bb.accepted.connect(d.accept)
bb.rejected.connect(d.reject)
l.addWidget(cw)
l.addWidget(bb)
if d.exec_() == d.Accepted:
dev.save_settings(cw)
warning_dialog(self, _('Disconnect device'),
_('Disconnect and re-connect the %s for your changes to'
' be applied.')%dev.get_gui_name(), show=True,
show_copy_button=False)
def _sync_action_triggered(self, *args):
m = getattr(self, '_sync_menu', None)
if m is not None:

View File

@ -97,6 +97,7 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
l.setWordWrap(True)
if settings.extra_customization:
self.opt_extra_customization.setText(settings.extra_customization)
self.opt_extra_customization.setCursorPosition(0)
self.extra_layout.addWidget(l, 0, 0)
self.extra_layout.addWidget(self.opt_extra_customization, 1, 0)
self.opt_save_template.setText(settings.save_template)

View File

@ -25,6 +25,7 @@ class LocationManager(QObject): # {{{
locations_changed = pyqtSignal()
unmount_device = pyqtSignal()
location_selected = pyqtSignal(object)
configure_device = pyqtSignal()
def __init__(self, parent=None):
QObject.__init__(self, parent)
@ -57,6 +58,10 @@ class LocationManager(QObject): # {{{
a = m.addAction(QIcon(I('eject.png')), _('Eject this device'))
a.triggered.connect(self._eject_requested)
self._mem.append(a)
a = m.addAction(QIcon(I('config.png')), _('Configure this device'))
a.triggered.connect(self._configure_requested)
self._mem.append(a)
else:
ac.setToolTip(tooltip)
ac.setMenu(m)
@ -109,6 +114,9 @@ class LocationManager(QObject): # {{{
def _eject_requested(self, *args):
self.unmount_device.emit()
def _configure_requested(self):
self.configure_device.emit()
def update_devices(self, cp=(None, None), fs=[-1, -1, -1], icon=None):
if icon is None:
icon = I('reader.png')

View File

@ -138,8 +138,8 @@ class GuiRunner(QObject):
if self.splash_screen is not None:
self.splash_screen.finish(main)
if DEBUG:
prints('Started up in', time.time() - self.startup_time, 'with',
len(db.data), 'books')
prints('Started up in %.2f seconds'%(time.time() -
self.startup_time), 'with', len(db.data), 'books')
add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
sys.excepthook = main.unhandled_exception
if len(self.args) > 1:

View File

@ -323,14 +323,19 @@ class Comments(QWebView): # {{{
ans = unicode(col.name())
return ans
f = QFontInfo(QApplication.font(self.parent())).pixelSize()
fi = QFontInfo(QApplication.font(self.parent()))
f = fi.pixelSize()+1
fam = unicode(fi.family()).strip().replace('"', '')
if not fam:
fam = 'sans-serif'
c = color_to_string(QApplication.palette().color(QPalette.Normal,
QPalette.WindowText))
templ = '''\
<html>
<head>
<style type="text/css">
body, td {background-color: transparent; font-size: %dpx; color: %s }
body, td {background-color: transparent; font-family: %s; font-size: %dpx; color: %s }
a { text-decoration: none; color: blue }
div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
table { margin-bottom: 0; padding-bottom: 0; }
@ -342,7 +347,7 @@ class Comments(QWebView): # {{{
</div>
</body>
<html>
'''%(f, c)
'''%(fam, f, c)
self.setHtml(templ%html)
# }}}

View File

@ -32,6 +32,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('automerge', gprefs, choices=choices)
r('new_book_tags', prefs, setting=CommaSeparatedList)
r('auto_add_path', gprefs, restart_required=True)
r('auto_add_check_for_duplicates', gprefs)
self.filename_pattern = FilenamePattern(self)
self.metadata_box.layout().insertWidget(0, self.filename_pattern)

View File

@ -193,6 +193,19 @@ Author matching is exact.</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
<property name="toolTip">
<string>If set, this option will causes calibre to check if a file
being auto-added is already in the calibre library.
If it is, a meesage will pop up asking you whether
you want to add it anyway.</string>
</property>
<property name="text">
<string>Check for &amp;duplicates when auto-adding files</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer_2">
<property name="orientation">

View File

@ -171,7 +171,7 @@ class Preferences(QMainWindow):
self.committed = False
self.close_after_initial = close_after_initial
self.resize(900, 720)
self.resize(930, 720)
nh, nw = min_available_height()-25, available_width()-10
if nh < 0:
nh = 800

View File

@ -45,6 +45,7 @@ class SearchDialog(QDialog, Ui_Dialog):
self.cache_pool = CacheUpdateThreadPool(self.cache_thread_count)
self.results_view.model().cover_pool.set_thread_count(self.cover_thread_count)
self.results_view.model().details_pool.set_thread_count(self.details_thread_count)
self.results_view.setCursor(Qt.PointingHandCursor)
# Check for results and hung threads.
self.checker = QTimer()
@ -90,7 +91,7 @@ class SearchDialog(QDialog, Ui_Dialog):
def setup_store_checks(self):
first_run = self.config.get('first_run', True)
# Add check boxes for each store so the user
# can disable searching specific stores on a
# per search basis.
@ -117,7 +118,7 @@ class SearchDialog(QDialog, Ui_Dialog):
self.store_checks[x] = cbox
store_list_layout.setRowStretch(store_list_layout.rowCount(), 10)
self.store_list.setWidget(stores_check_widget)
self.config['first_run'] = False
def build_adv_search(self):
@ -340,7 +341,7 @@ class SearchDialog(QDialog, Ui_Dialog):
def result_item_activated(self, index):
result = self.results_view.model().get_result(index)
if result.downloads:
self.download_book(result)
else:
@ -353,7 +354,7 @@ class SearchDialog(QDialog, Ui_Dialog):
fname = result.title[:60] + '.' + ext.lower()
fname = ascii_filename(fname)
self.gui.download_ebook(result.downloads[ext], filename=fname)
def open_store(self, result):
self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())

View File

@ -265,6 +265,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
####################### Location Manager ########################
self.location_manager.location_selected.connect(self.location_selected)
self.location_manager.unmount_device.connect(self.device_manager.umount_device)
self.location_manager.configure_device.connect(self.configure_connected_device)
self.eject_action.triggered.connect(self.device_manager.umount_device)
#################### Update notification ###################

View File

@ -28,6 +28,7 @@ from calibre.gui2 import min_available_height, available_width
from calibre.utils.config import dynamic, prefs
from calibre.gui2 import NONE, choose_dir, error_dialog
from calibre.gui2.dialogs.progress import ProgressDialog
from calibre.customize.ui import device_plugins
# Devices {{{
@ -251,15 +252,39 @@ class Android(Device):
id = 'android'
supports_color = True
class AndroidTablet(Device):
@classmethod
def commit(cls):
super(Android, cls).commit()
for plugin in device_plugins(include_disabled=True):
if plugin.name == 'Android driver':
plugin.configure_for_generic_epub_app()
class AndroidTablet(Android):
name = 'Android tablet'
output_format = 'EPUB'
manufacturer = 'Android'
id = 'android_tablet'
supports_color = True
output_profile = 'tablet'
class AndroidPhoneWithKindle(Android):
name = 'Android phone with Kindle reader'
output_format = 'MOBI'
id = 'android_phone_with_kindle'
output_profile = 'kindle'
@classmethod
def commit(cls):
super(Android, cls).commit()
for plugin in device_plugins(include_disabled=True):
if plugin.name == 'Android driver':
plugin.configure_for_kindle_app()
class AndroidTabletWithKindle(AndroidPhoneWithKindle):
name = 'Android tablet with Kindle reader'
id = 'android_tablet_with_kindle'
output_profile = 'kindle_fire'
class HanlinV3(Device):
name = 'Hanlin V3'

Some files were not shown because too many files have changed in this diff Show More