mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.38+ (kg load optimizations)
This commit is contained in:
commit
dbd588f94e
@ -3,10 +3,17 @@ import re
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
''' there are three modifications:
|
||||
1) fetch issue cover
|
||||
2) toggle ignore premium articles
|
||||
3) extract proper section names, ie. "Comments", "Essay"
|
||||
|
||||
by Chen Wei weichen302@gmx.com, 2012-02-05'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en'
|
||||
version = 1
|
||||
version = 1.01
|
||||
|
||||
title = u'Foreign Affairs (Subcription or (free) Registration)'
|
||||
publisher = u'Council on Foreign Relations'
|
||||
@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
|
||||
INDEX = 'http://www.foreignaffairs.com'
|
||||
FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
|
||||
INCLUDE_PREMIUM = False
|
||||
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'base'))
|
||||
@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'})
|
||||
img_url = div.find('img')['src']
|
||||
return self.INDEX + img_url
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
@ -50,57 +66,47 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
|
||||
return self.temp_files[-1].name
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.foreignaffairs.com/magazine')
|
||||
articles = []
|
||||
answer = []
|
||||
content = soup.find('div', attrs = {'class': 'center-wrapper'})
|
||||
if content:
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tag = div.find('div', attrs = {'class': 'views-field-title'})
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
# If they ever fix their markup, this will break :-(
|
||||
summary = self.tag_to_string(tag.findNextSibling('p'))
|
||||
description = author + '<br/>' + summary
|
||||
|
||||
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
answer.append(('Magazine', articles))
|
||||
|
||||
ul = content.find('ul')
|
||||
if ul:
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
|
||||
for sec in sec_start:
|
||||
content = sec.nextSibling
|
||||
if content:
|
||||
section = self.tag_to_string(content.find('h2'))
|
||||
articles = []
|
||||
for li in ul.findAll('li'):
|
||||
tag = li.find('div', attrs = {'class': 'views-field-title'})
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
description = ''
|
||||
tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
|
||||
if tag:
|
||||
description = self.tag_to_string(tag)
|
||||
|
||||
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
|
||||
else:
|
||||
continue
|
||||
tags = []
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tags.append(div)
|
||||
ul = content.find('ul')
|
||||
for li in content.findAll('li'):
|
||||
tags.append(li)
|
||||
|
||||
for div in tags:
|
||||
title = url = description = author = None
|
||||
|
||||
if self.INCLUDE_PREMIUM:
|
||||
found_premium = False
|
||||
else:
|
||||
continue
|
||||
|
||||
answer.append(('Letters to the Editor', articles))
|
||||
found_premium = div.findAll('span', attrs={'class':
|
||||
'premium-icon'})
|
||||
if not found_premium:
|
||||
tag = div.find('div', attrs={'class': 'views-field-title'})
|
||||
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
description = self.tag_to_string(tag_summary)
|
||||
articles.append({'title':title, 'date':None, 'url':url,
|
||||
'description':description, 'author':author})
|
||||
if articles:
|
||||
answer.append((section, articles))
|
||||
return answer
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
110
recipes/ilmanifesto.recipe
Normal file
110
recipes/ilmanifesto.recipe
Normal file
@ -0,0 +1,110 @@
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
|
||||
|
||||
class IlManifesto(BasicNewsRecipe):
|
||||
title = 'Il Manifesto'
|
||||
__author__ = 'Giacomo Lacava'
|
||||
description = 'quotidiano comunista - ultima edizione html disponibile'
|
||||
publication_type = 'newspaper'
|
||||
publisher = 'il manifesto coop. editrice a r.l.'
|
||||
language = 'it'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 1
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads = 5
|
||||
timeout = 30
|
||||
auto_cleanup = True
|
||||
remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
|
||||
remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
|
||||
remove_tags_after = dict(id='myPrintArea')
|
||||
|
||||
manifesto_index = None
|
||||
manifesto_datestr = None
|
||||
|
||||
def _set_manifesto_index(self):
|
||||
if self.manifesto_index == None:
|
||||
startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/'
|
||||
startSoup = self.index_to_soup(startUrl)
|
||||
lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
|
||||
del(startSoup)
|
||||
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
|
||||
urlsplit = lastEdition.split('/')
|
||||
self.manifesto_datestr = urlsplit[-1]
|
||||
if urlsplit[-1] == '':
|
||||
self.manifesto_datestr = urlsplit[-2]
|
||||
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
self._set_manifesto_index()
|
||||
url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
|
||||
return url
|
||||
|
||||
def parse_index(self):
|
||||
self._set_manifesto_index()
|
||||
soup = self.index_to_soup(self.manifesto_index)
|
||||
feedLinks = soup.find('div',id='accordion_inedicola').findAll('a')
|
||||
result = []
|
||||
for feed in feedLinks:
|
||||
articles = []
|
||||
feedName = feed.find('h2').string
|
||||
feedUrl = MANIFESTO_BASEURL + feed['href']
|
||||
feedSoup = self.index_to_soup(feedUrl)
|
||||
indexRoot = feedSoup.find('div',attrs={'class':'column1'})
|
||||
for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
|
||||
artLink = div.find('a')
|
||||
if artLink is None: continue # empty div
|
||||
title = artLink.string
|
||||
url = MANIFESTO_BASEURL + artLink['href']
|
||||
|
||||
description = ''
|
||||
descNode = div.find('div',attrs={'class':'text_12'})
|
||||
if descNode is not None:
|
||||
description = descNode.string
|
||||
|
||||
author = ''
|
||||
authNode = div.find('div',attrs={'class':'firma'})
|
||||
if authNode is not None:
|
||||
author = authNode.string
|
||||
|
||||
articleText = ''
|
||||
article = {
|
||||
'title':title,
|
||||
'url':url,
|
||||
'date': strftime('%d %B %Y'),
|
||||
'description': description,
|
||||
'content': articleText,
|
||||
'author': author
|
||||
}
|
||||
articles.append(article)
|
||||
result.append((feedName,articles))
|
||||
return result
|
||||
|
||||
|
||||
def extract_readable_article(self, html, url):
|
||||
|
||||
bs = BeautifulSoup(html)
|
||||
col1 = bs.find('div',attrs={'class':'column1'})
|
||||
|
||||
content = col1.find('div',attrs={'class':'bodytext'})
|
||||
title = bs.find(id='titolo_articolo').string
|
||||
author = col1.find('span',attrs={'class':'firma'})
|
||||
subtitle = ''
|
||||
subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
|
||||
if subNode is not None:
|
||||
subtitle = subNode
|
||||
summary = ''
|
||||
sommNode = bs.find('div',attrs={'class':'sommario'})
|
||||
if sommNode is not None:
|
||||
summary = sommNode
|
||||
|
||||
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
|
||||
del(bs)
|
||||
return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)
|
||||
|
||||
|
@ -1,58 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Microwave Journal RSS recipe
|
||||
## Title: Microwave Journal
|
||||
## Contact: Kiavash (use Mobile Read)
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: Kiavash
|
||||
##
|
||||
## Written: Jan 2012
|
||||
## Last Edited: Jan 2012
|
||||
## Last Edited: Feb 2012
|
||||
##
|
||||
|
||||
# Feb 2012: New Recipe compatible with the MWJournal 2.0 website
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
__copyright__ = 'Kiavash'
|
||||
__author__ = 'Kaivash'
|
||||
|
||||
'''
|
||||
Microwave Journal Monthly Magazine
|
||||
You need to sign up (free) and get username/password.
|
||||
microwavejournal.com
|
||||
'''
|
||||
|
||||
import re # Import the regular expressions module.
|
||||
from calibre.ptempfile import TemporaryFile # we need this for saving to a temp file
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class MWJournal(BasicNewsRecipe):
|
||||
# Title to use for the ebook.
|
||||
title = u'Microwave Journal'
|
||||
__author__ = 'Kiavash'
|
||||
language = 'en'
|
||||
|
||||
#A brief description for the ebook.
|
||||
description = u'Microwave Journal web site ebook created using rss feeds.'
|
||||
|
||||
# Set publisher and publication type.
|
||||
publisher = 'Horizon House'
|
||||
title = u'Microwave Journal'
|
||||
description = u'Microwave Journal Monthly Magazine'
|
||||
publisher = 'Horizon House'
|
||||
publication_type = 'magazine'
|
||||
INDEX = 'http://www.microwavejournal.com/publications/'
|
||||
|
||||
oldest_article = 31 # monthly published magazine. Some months are 31 days!
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
# Disable stylesheets and javascript from site.
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
asciiize = True # Converts all none ascii characters to their ascii equivalents
|
||||
|
||||
needs_subscription = True # oh yeah... we need to login btw.
|
||||
|
||||
# Timeout for fetching files from the server in seconds. The default of 120 seconds, seems somewhat excessive.
|
||||
language = 'en'
|
||||
timeout = 30
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
Convert_Grayscale = False # Convert images to gray scale or not
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'record'})]
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_tags = [
|
||||
dict(name='font', attrs={'class':'footer'}), # remove fonts
|
||||
]
|
||||
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
@ -72,72 +67,75 @@ class MWJournal(BasicNewsRecipe):
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'boxadzonearea350'}), # Removes banner ads
|
||||
dict(name='font', attrs={'class':'footer'}), # remove fonts if you do like your fonts more! Comment out to use website's fonts
|
||||
dict(name='div', attrs={'class':'newsarticlead'})
|
||||
]
|
||||
|
||||
# Remove various tag attributes to improve the look of the ebook pages.
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Remove the line breaks as well as href links. Books don't have links generally speaking
|
||||
# Remove the line breaks, href links and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
||||
(re.compile(r'</a>'), lambda h2: '')
|
||||
(re.compile(r'</a>'), lambda h2: ''),
|
||||
(re.compile(r'float:.*?'), lambda h3: ''),
|
||||
(re.compile(r'width:.*?px'), lambda h4: ''),
|
||||
(re.compile(r'height:.*?px'), lambda h5: '')
|
||||
]
|
||||
|
||||
# Select the feeds that you are interested.
|
||||
feeds = [
|
||||
(u'Current Issue', u'http://www.mwjournal.com/rss/Rss.asp?type=99'),
|
||||
(u'Industry News', u'http://www.mwjournal.com/rss/Rss.asp?type=1'),
|
||||
(u'Resources', u'http://www.mwjournal.com/rss/Rss.asp?type=3'),
|
||||
(u'Buyer\'s Guide', u'http://www.mwjournal.com/rss/Rss.asp?type=5'),
|
||||
(u'Events', u'http://www.mwjournal.com/rss/Rss.asp?type=2'),
|
||||
(u'All Updates', u'http://www.mwjournal.com/rss/Rss.asp?type=0'),
|
||||
]
|
||||
|
||||
# No magazine is complete without cover. Let's get it then!
|
||||
# The function is adapted from the Economist recipe
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
cover_page_location = 'http://www.mwjournal.com/Journal/' # Cover image is located on this page
|
||||
soup = self.index_to_soup(cover_page_location)
|
||||
cover_item = soup.find('img',attrs={'src':lambda x: x and '/IssueImg/3_MWJ_CurrIss_CoverImg' in x}) # There are three files named cover, we want the highest resolution which is the 3rd image. So we look for the pattern. Remember that the name of the cover image changes every month so we cannot search for the complete name. Instead we are searching for the pattern
|
||||
if cover_item:
|
||||
cover_url = 'http://www.mwjournal.com' + cover_item['src'].strip() # yeah! we found it. Let's fetch the image file and pass it as cover to calibre
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
if url.find('/Journal/article.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/Journal/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
|
||||
elif url.find('/News/article.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/News/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
|
||||
elif url.find('/Resources/TechLib.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/Resources/TechLib.asp?HH_ID=', '/Resources/PrintRessource.asp?Id=')
|
||||
return url.replace('/articles/', '/articles/print/')
|
||||
|
||||
def get_browser(self):
|
||||
'''
|
||||
Microwave Journal website, directs the login page to omeda.com once login info is submitted, omeda.com redirects to mwjournal.com with again the browser logs in into that site (hidden from the user). To overcome this obsticle, first login page is fetch and its output is stored to an HTML file. Then the HTML file is opened again and second login form is submitted (Many thanks to Barty which helped with second page login).
|
||||
'''
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
url = ('http://www.omeda.com/cgi-win/mwjreg.cgi?m=login') # main login page.
|
||||
br.open(url) # fetch the 1st login page
|
||||
br.select_form('login') # finds the login form
|
||||
br['EMAIL_ADDRESS'] = self.username # fills the username
|
||||
br['PASSWORD'] = self.password # fills the password
|
||||
raw = br.submit().read() # submit the form and read the 2nd login form
|
||||
# save it to an htm temp file (from ESPN recipe written by Kovid Goyal kovid@kovidgoyal.net
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
br.select_form(nr=0) # finds submit on the 2nd form
|
||||
didwelogin = br.submit().read() # submit it and read the return html
|
||||
if 'Welcome ' not in didwelogin: # did it login successfully? Is Username/password correct?
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
#login is done
|
||||
return br
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
ts = soup.find('div', attrs={'class':'box1 article publications-show'})
|
||||
ds = self.tag_to_string(ts.find('h2'))
|
||||
self.log('Found Current Issue:', ds)
|
||||
self.timefmt = ' [%s]'%ds
|
||||
|
||||
cover = ts.find('img', src=True)
|
||||
if cover is not None:
|
||||
self.cover_url = 'http://www.microwavejournal.com' + cover['src']
|
||||
self.log('Found Cover image:', self.cover_url)
|
||||
|
||||
feeds = []
|
||||
seen_titles = set([]) # This is used to remove duplicant articles
|
||||
sections = soup.find('div', attrs={'class':'box2 publication'})
|
||||
for section in sections.findAll('div', attrs={'class':'records'}):
|
||||
section_title = self.tag_to_string(section.find('h3'))
|
||||
self.log('Found section:', section_title)
|
||||
articles = []
|
||||
for post in section.findAll('div', attrs={'class':'record'}):
|
||||
h = post.find('h2')
|
||||
title = self.tag_to_string(h)
|
||||
if title.find('The MWJ Puzzler') >=0: #Let's get rid of the useless Puzzler!
|
||||
continue
|
||||
if title in seen_titles:
|
||||
continue
|
||||
seen_titles.add(title)
|
||||
a = post.find('a', href=True)
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.microwavejournal.com'+url
|
||||
abstract = post.find('div', attrs={'class':'abstract'})
|
||||
p = abstract.find('p')
|
||||
desc = None
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\t\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':self.timefmt})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
if self.Convert_Grayscale:
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
@ -1,30 +1,36 @@
|
||||
"""
|
||||
readitlaterlist.com
|
||||
"""
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '''
|
||||
2010, Darko Miletic <darko.miletic at gmail.com>
|
||||
2011, Przemyslaw Kryger <pkryger at gmail.com>
|
||||
'''
|
||||
'''
|
||||
readitlaterlist.com
|
||||
2012, tBunnyMan <Wag That Tail At Me dot com>
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Readitlater(BasicNewsRecipe):
|
||||
title = 'Read It Later'
|
||||
__author__ = 'Darko Miletic, Przemyslaw Kryger'
|
||||
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||
setup up your news. Fill in your account
|
||||
username, and optionally you can add password.'''
|
||||
publisher = 'readitlater.com'
|
||||
title = 'ReadItLater'
|
||||
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
||||
description = '''Personalized news feeds. Go to readitlaterlist.com to setup \
|
||||
up your news. This version displays pages of articles from \
|
||||
oldest to newest, with max & minimum counts, and marks articles \
|
||||
read after downloading.'''
|
||||
publisher = 'readitlaterlist.com'
|
||||
category = 'news, custom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 50
|
||||
minimum_articles = 1
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
INDEX = u'http://readitlaterlist.com'
|
||||
LOGIN = INDEX + u'/l'
|
||||
readList = []
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe):
|
||||
br.select_form(nr=0)
|
||||
br['feed_id'] = self.username
|
||||
if self.password is not None:
|
||||
br['password'] = self.password
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_feeds(self):
|
||||
self.report_progress(0, ('Fetching list of feeds...'))
|
||||
self.report_progress(0, ('Fetching list of pages...'))
|
||||
lfeeds = []
|
||||
i = 1
|
||||
feedurl = self.INDEX + u'/unread/1'
|
||||
while True:
|
||||
title = u'Unread articles, page ' + str(i)
|
||||
lfeeds.append((title, feedurl))
|
||||
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
|
||||
lfeeds.insert(0, (title, feedurl))
|
||||
self.report_progress(0, ('Got ') + str(i) + (' pages'))
|
||||
i += 1
|
||||
soup = self.index_to_soup(feedurl)
|
||||
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
|
||||
ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
|
||||
if ritem is None:
|
||||
break
|
||||
feedurl = self.INDEX + ritem['href']
|
||||
if self.test:
|
||||
return lfeeds[:2]
|
||||
return lfeeds
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
articlesToGrab = self.max_articles_per_feed
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
if articlesToGrab < 1:
|
||||
break
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
ritem = soup.find('ul',attrs={'id':'list'})
|
||||
for item in ritem.findAll('li'):
|
||||
ritem = soup.find('ul', attrs={'id':'list'})
|
||||
for item in reversed(ritem.findAll('li')):
|
||||
if articlesToGrab < 1:
|
||||
break
|
||||
else:
|
||||
articlesToGrab -= 1
|
||||
description = ''
|
||||
atag = item.find('a',attrs={'class':'text'})
|
||||
atag = item.find('a', attrs={'class':'text'})
|
||||
if atag and atag.has_key('href'):
|
||||
url = self.INDEX + atag['href']
|
||||
title = self.tag_to_string(item.div)
|
||||
@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe):
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
readLink = item.find('a', attrs={'class':'check'})['href']
|
||||
self.readList.append(readLink)
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
if len(self.readList) < self.minimum_articles:
|
||||
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
|
||||
return totalfeeds
|
||||
|
||||
def mark_as_read(self, markList):
|
||||
br = self.get_browser()
|
||||
for link in markList:
|
||||
url = self.INDEX + link
|
||||
response = br.open(url)
|
||||
response
|
||||
|
||||
def cleanup(self):
|
||||
self.mark_as_read(self.readList)
|
||||
|
||||
|
@ -5,13 +5,14 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, glob, functools, re
|
||||
from calibre import guess_type
|
||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
|
||||
from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
|
||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase)
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.html.to_zip import HTML2ZIP
|
||||
|
||||
plugins = []
|
||||
|
||||
# To archive plugins {{{
|
||||
|
||||
class PML2PMLZ(FileTypePlugin):
|
||||
@ -86,6 +87,8 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
return list(set(images))
|
||||
|
||||
def run(self, path_to_ebook):
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
|
||||
with open(path_to_ebook, 'rb') as ebf:
|
||||
txt = ebf.read()
|
||||
base_dir = os.path.dirname(path_to_ebook)
|
||||
@ -117,6 +120,7 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
# No images so just import the TXT file.
|
||||
return path_to_ebook
|
||||
|
||||
plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,]
|
||||
# }}}
|
||||
|
||||
# Metadata reader plugins {{{
|
||||
@ -399,6 +403,10 @@ class ZipMetadataReader(MetadataReaderPlugin):
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.zip import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
|
||||
# }}}
|
||||
|
||||
# Metadata writer plugins {{{
|
||||
@ -499,107 +507,51 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
|
||||
from calibre.ebooks.metadata.extz import set_metadata
|
||||
set_metadata(stream, mi)
|
||||
|
||||
# }}}
|
||||
|
||||
from calibre.ebooks.comic.input import ComicInput
|
||||
from calibre.ebooks.djvu.input import DJVUInput
|
||||
from calibre.ebooks.epub.input import EPUBInput
|
||||
from calibre.ebooks.fb2.input import FB2Input
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.ebooks.htmlz.input import HTMLZInput
|
||||
from calibre.ebooks.lit.input import LITInput
|
||||
from calibre.ebooks.mobi.input import MOBIInput
|
||||
from calibre.ebooks.odt.input import ODTInput
|
||||
from calibre.ebooks.pdb.input import PDBInput
|
||||
from calibre.ebooks.azw4.input import AZW4Input
|
||||
from calibre.ebooks.pdf.input import PDFInput
|
||||
from calibre.ebooks.pml.input import PMLInput
|
||||
from calibre.ebooks.rb.input import RBInput
|
||||
from calibre.web.feeds.input import RecipeInput
|
||||
from calibre.ebooks.rtf.input import RTFInput
|
||||
from calibre.ebooks.tcr.input import TCRInput
|
||||
from calibre.ebooks.txt.input import TXTInput
|
||||
from calibre.ebooks.lrf.input import LRFInput
|
||||
from calibre.ebooks.chm.input import CHMInput
|
||||
from calibre.ebooks.snb.input import SNBInput
|
||||
|
||||
from calibre.ebooks.epub.output import EPUBOutput
|
||||
from calibre.ebooks.fb2.output import FB2Output
|
||||
from calibre.ebooks.lit.output import LITOutput
|
||||
from calibre.ebooks.lrf.output import LRFOutput
|
||||
from calibre.ebooks.mobi.output import MOBIOutput
|
||||
from calibre.ebooks.oeb.output import OEBOutput
|
||||
from calibre.ebooks.pdb.output import PDBOutput
|
||||
from calibre.ebooks.pdf.output import PDFOutput
|
||||
from calibre.ebooks.pml.output import PMLOutput
|
||||
from calibre.ebooks.rb.output import RBOutput
|
||||
from calibre.ebooks.rtf.output import RTFOutput
|
||||
from calibre.ebooks.tcr.output import TCROutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
from calibre.ebooks.txt.output import TXTZOutput
|
||||
from calibre.ebooks.html.output import HTMLOutput
|
||||
from calibre.ebooks.htmlz.output import HTMLZOutput
|
||||
from calibre.ebooks.snb.output import SNBOutput
|
||||
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
|
||||
from calibre.devices.apple.driver import ITUNES
|
||||
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
|
||||
from calibre.devices.cybook.driver import CYBOOK, ORIZON
|
||||
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
|
||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
|
||||
POCKETBOOK701, POCKETBOOK360P, PI2)
|
||||
from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
||||
KINDLE_FIRE)
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||
from calibre.devices.prs505.driver import PRS505
|
||||
from calibre.devices.prst1.driver import PRST1
|
||||
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||
from calibre.devices.android.driver import ANDROID, S60, WEBOS
|
||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||
from calibre.devices.eslick.driver import ESLICK, EBK52
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
|
||||
LIBREAIR, ODYSSEY)
|
||||
from calibre.devices.edge.driver import EDGE
|
||||
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
|
||||
|
||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||
|
||||
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||
Epubcheck, ]
|
||||
|
||||
# New metadata download plugins {{{
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
|
||||
# }}}
|
||||
|
||||
# Conversion plugins {{{
|
||||
from calibre.ebooks.conversion.plugins.comic_input import ComicInput
|
||||
from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
|
||||
from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
|
||||
from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
|
||||
from calibre.ebooks.conversion.plugins.html_input import HTMLInput
|
||||
from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput
|
||||
from calibre.ebooks.conversion.plugins.lit_input import LITInput
|
||||
from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
|
||||
from calibre.ebooks.conversion.plugins.odt_input import ODTInput
|
||||
from calibre.ebooks.conversion.plugins.pdb_input import PDBInput
|
||||
from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input
|
||||
from calibre.ebooks.conversion.plugins.pdf_input import PDFInput
|
||||
from calibre.ebooks.conversion.plugins.pml_input import PMLInput
|
||||
from calibre.ebooks.conversion.plugins.rb_input import RBInput
|
||||
from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput
|
||||
from calibre.ebooks.conversion.plugins.rtf_input import RTFInput
|
||||
from calibre.ebooks.conversion.plugins.tcr_input import TCRInput
|
||||
from calibre.ebooks.conversion.plugins.txt_input import TXTInput
|
||||
from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
|
||||
from calibre.ebooks.conversion.plugins.chm_input import CHMInput
|
||||
from calibre.ebooks.conversion.plugins.snb_input import SNBInput
|
||||
|
||||
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
|
||||
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
|
||||
from calibre.ebooks.conversion.plugins.lit_output import LITOutput
|
||||
from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput
|
||||
from calibre.ebooks.conversion.plugins.mobi_output import MOBIOutput
|
||||
from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput
|
||||
from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput
|
||||
from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput
|
||||
from calibre.ebooks.conversion.plugins.pml_output import PMLOutput
|
||||
from calibre.ebooks.conversion.plugins.rb_output import RBOutput
|
||||
from calibre.ebooks.conversion.plugins.rtf_output import RTFOutput
|
||||
from calibre.ebooks.conversion.plugins.tcr_output import TCROutput
|
||||
from calibre.ebooks.conversion.plugins.txt_output import TXTOutput, TXTZOutput
|
||||
from calibre.ebooks.conversion.plugins.html_output import HTMLOutput
|
||||
from calibre.ebooks.conversion.plugins.htmlz_output import HTMLZOutput
|
||||
from calibre.ebooks.conversion.plugins.snb_output import SNBOutput
|
||||
|
||||
plugins += [
|
||||
ComicInput,
|
||||
DJVUInput,
|
||||
@ -642,6 +594,66 @@ plugins += [
|
||||
HTMLZOutput,
|
||||
SNBOutput,
|
||||
]
|
||||
# }}}
|
||||
|
||||
# Catalog plugins {{{
|
||||
from calibre.library.catalogs.csv_xml import CSV_XML
|
||||
from calibre.library.catalogs.bibtex import BIBTEX
|
||||
from calibre.library.catalogs.epub_mobi import EPUB_MOBI
|
||||
plugins += [CSV_XML, BIBTEX, EPUB_MOBI]
|
||||
# }}}
|
||||
|
||||
# EPUB Fix plugins {{{
|
||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||
plugins += [Unmanifested, Epubcheck]
|
||||
# }}}
|
||||
|
||||
# Profiles {{{
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
plugins += input_profiles + output_profiles
|
||||
# }}}
|
||||
|
||||
# Device driver plugins {{{
|
||||
from calibre.devices.apple.driver import ITUNES
|
||||
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
|
||||
from calibre.devices.cybook.driver import CYBOOK, ORIZON
|
||||
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
|
||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
|
||||
POCKETBOOK701, POCKETBOOK360P, PI2)
|
||||
from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
||||
KINDLE_FIRE)
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||
from calibre.devices.prs505.driver import PRS505
|
||||
from calibre.devices.prst1.driver import PRST1
|
||||
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||
from calibre.devices.android.driver import ANDROID, S60, WEBOS
|
||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||
from calibre.devices.eslick.driver import ESLICK, EBK52
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
|
||||
LIBREAIR, ODYSSEY)
|
||||
from calibre.devices.edge.driver import EDGE
|
||||
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
|
||||
|
||||
|
||||
|
||||
# Order here matters. The first matched device is the one used.
|
||||
plugins += [
|
||||
HANLINV3,
|
||||
@ -716,11 +728,20 @@ plugins += [
|
||||
BOEYE_BDX,
|
||||
USER_DEFINED,
|
||||
]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
plugins += input_profiles + output_profiles
|
||||
# }}}
|
||||
|
||||
# New metadata download plugins {{{
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||
|
||||
# }}}
|
||||
|
||||
# Interface Actions {{{
|
||||
|
||||
@ -1623,3 +1644,34 @@ plugins += [
|
||||
]
|
||||
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test load speed
|
||||
import subprocess, textwrap
|
||||
try:
|
||||
subprocess.check_call(['python', '-c', textwrap.dedent(
|
||||
'''
|
||||
from __future__ import print_function
|
||||
import time, sys, init_calibre
|
||||
st = time.time()
|
||||
import calibre.customize.builtins
|
||||
t = time.time() - st
|
||||
ret = 0
|
||||
|
||||
for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid',
|
||||
'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image',
|
||||
'sqlite3', 'mechanize', 'httplib', 'xml'):
|
||||
if x in sys.modules:
|
||||
ret = 1
|
||||
print (x, 'has been loaded by a plugin')
|
||||
if ret:
|
||||
print ('\\nA good way to track down what is loading something is to run'
|
||||
' python -c "import init_calibre; import calibre.customize.builtins"')
|
||||
print()
|
||||
print ('Time taken to import all plugins: %.2f'%t)
|
||||
sys.exit(ret)
|
||||
|
||||
''')])
|
||||
except subprocess.CalledProcessError:
|
||||
raise SystemExit(1)
|
||||
|
||||
|
@ -5,7 +5,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from itertools import izip
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from calibre.customize import Plugin as _Plugin
|
||||
|
||||
@ -268,6 +267,7 @@ class OutputProfile(Plugin):
|
||||
|
||||
@classmethod
|
||||
def tags_to_string(cls, tags):
|
||||
from xml.sax.saxutils import escape
|
||||
return escape(', '.join(tags))
|
||||
|
||||
class iPadOutput(OutputProfile):
|
||||
|
@ -447,11 +447,14 @@ def plugin_for_catalog_format(fmt):
|
||||
|
||||
# }}}
|
||||
|
||||
def device_plugins(): # {{{
|
||||
def device_plugins(include_disabled=False): # {{{
|
||||
for plugin in _initialized_plugins:
|
||||
if isinstance(plugin, DevicePlugin):
|
||||
if not is_disabled(plugin):
|
||||
if include_disabled or not is_disabled(plugin):
|
||||
if platform in plugin.supported_platforms:
|
||||
if getattr(plugin, 'plugin_needs_delayed_initialization',
|
||||
False):
|
||||
plugin.do_delayed_plugin_initialization()
|
||||
yield plugin
|
||||
# }}}
|
||||
|
||||
@ -496,7 +499,7 @@ def initialize_plugin(plugin, path_to_zip_file):
|
||||
def has_external_plugins():
|
||||
return bool(config['plugins'])
|
||||
|
||||
def initialize_plugins():
|
||||
def initialize_plugins(perf=False):
|
||||
global _initialized_plugins
|
||||
_initialized_plugins = []
|
||||
conflicts = [name for name in config['plugins'] if name in
|
||||
@ -504,6 +507,11 @@ def initialize_plugins():
|
||||
for p in conflicts:
|
||||
remove_plugin(p)
|
||||
external_plugins = config['plugins']
|
||||
ostdout, ostderr = sys.stdout, sys.stderr
|
||||
if perf:
|
||||
from collections import defaultdict
|
||||
import time
|
||||
times = defaultdict(lambda:0)
|
||||
for zfp in list(external_plugins) + builtin_plugins:
|
||||
try:
|
||||
if not isinstance(zfp, type):
|
||||
@ -516,12 +524,22 @@ def initialize_plugins():
|
||||
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
|
||||
except PluginNotFound:
|
||||
continue
|
||||
if perf:
|
||||
st = time.time()
|
||||
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
|
||||
if perf:
|
||||
times[plugin.name] = time.time() - st
|
||||
_initialized_plugins.append(plugin)
|
||||
except:
|
||||
print 'Failed to initialize plugin:', repr(zfp)
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
# Prevent a custom plugin from overriding stdout/stderr as this breaks
|
||||
# ipython
|
||||
sys.stdout, sys.stderr = ostdout, ostderr
|
||||
if perf:
|
||||
for x in sorted(times, key=lambda x:times[x]):
|
||||
print ('%50s: %.3f'%(x, times[x]))
|
||||
_initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
|
||||
reread_filetype_plugins()
|
||||
reread_metadata_plugins()
|
||||
|
@ -221,6 +221,20 @@ class ANDROID(USBMS):
|
||||
drives['main'] = letter_a
|
||||
return drives
|
||||
|
||||
@classmethod
|
||||
def configure_for_kindle_app(cls):
|
||||
proxy = cls._configProxy()
|
||||
proxy['format_map'] = ['mobi', 'azw', 'azw1', 'azw4', 'pdf']
|
||||
proxy['use_subdirs'] = False
|
||||
proxy['extra_customization'] = ','.join(['kindle']+cls.EBOOK_DIR_MAIN)
|
||||
|
||||
@classmethod
|
||||
def configure_for_generic_epub_app(cls):
|
||||
proxy = cls._configProxy()
|
||||
del proxy['format_map']
|
||||
del proxy['use_subdirs']
|
||||
del proxy['extra_customization']
|
||||
|
||||
class S60(USBMS):
|
||||
|
||||
name = 'S60 driver'
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Sanda library wrapper
|
||||
'''
|
||||
|
||||
import ctypes, uuid, hashlib, os, sys
|
||||
import ctypes, hashlib, os, sys
|
||||
from threading import Event, Lock
|
||||
from calibre.constants import iswindows
|
||||
from calibre import load_library
|
||||
@ -350,6 +350,7 @@ class Bambook:
|
||||
return None
|
||||
|
||||
def SendFile(self, fileName, guid = None):
|
||||
import uuid
|
||||
if self.handle:
|
||||
taskID = job.NewJob()
|
||||
if guid:
|
||||
|
File diff suppressed because one or more lines are too long
@ -97,3 +97,13 @@ class FOLDER_DEVICE(USBMS):
|
||||
@classmethod
|
||||
def settings(self):
|
||||
return FOLDER_DEVICE_FOR_CONFIG._config().parse()
|
||||
|
||||
@classmethod
|
||||
def config_widget(cls):
|
||||
return FOLDER_DEVICE_FOR_CONFIG.config_widget()
|
||||
|
||||
@classmethod
|
||||
def save_settings(cls, config_widget):
|
||||
return FOLDER_DEVICE_FOR_CONFIG.save_settings(config_widget)
|
||||
|
||||
|
||||
|
@ -9,7 +9,6 @@ Generates and writes an APNX page mapping file.
|
||||
'''
|
||||
|
||||
import struct
|
||||
import uuid
|
||||
|
||||
from calibre.ebooks.mobi.reader import MobiReader
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
@ -51,6 +50,7 @@ class APNXBuilder(object):
|
||||
apnxf.write(apnx)
|
||||
|
||||
def generate_apnx(self, pages):
|
||||
import uuid
|
||||
apnx = ''
|
||||
|
||||
content_vals = {
|
||||
|
@ -10,10 +10,8 @@ Device driver for Amazon's Kindle
|
||||
|
||||
import datetime, os, re, sys, json, hashlib
|
||||
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre import strftime
|
||||
|
||||
'''
|
||||
@ -152,6 +150,7 @@ class KINDLE(USBMS):
|
||||
path_map, book_ext = resolve_bookmark_paths(storage, path_map)
|
||||
|
||||
bookmarked_books = {}
|
||||
|
||||
for id in path_map:
|
||||
bookmark_ext = path_map[id].rpartition('.')[2]
|
||||
myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
|
||||
@ -236,6 +235,8 @@ class KINDLE(USBMS):
|
||||
|
||||
def add_annotation_to_library(self, db, db_id, annotation):
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
bm = annotation
|
||||
ignore_tags = set(['Catalog', 'Clippings'])
|
||||
|
||||
@ -363,6 +364,8 @@ class KINDLE2(KINDLE):
|
||||
'''
|
||||
Hijacking this function to write the apnx file.
|
||||
'''
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
|
||||
opts = self.settings()
|
||||
if not opts.extra_customization[self.OPT_APNX]:
|
||||
return
|
||||
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
from contextlib import closing
|
||||
|
||||
import sqlite3 as sqlite
|
||||
|
||||
class Bookmark(): # {{{
|
||||
'''
|
||||
@ -32,7 +31,7 @@ class Bookmark(): # {{{
|
||||
|
||||
def get_bookmark_data(self):
|
||||
''' Return the timestamp and last_read_location '''
|
||||
|
||||
import sqlite3 as sqlite
|
||||
user_notes = {}
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with closing(sqlite.connect(self.db_path)) as connection:
|
||||
|
@ -6,7 +6,6 @@ __copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time, calendar
|
||||
import sqlite3 as sqlite
|
||||
from contextlib import closing
|
||||
from calibre.devices.usbms.books import BookList
|
||||
from calibre.devices.kobo.books import Book
|
||||
@ -16,7 +15,6 @@ from calibre.devices.mime import mime_type_ext
|
||||
from calibre.devices.usbms.driver import USBMS, debug_print
|
||||
from calibre import prints
|
||||
from calibre.devices.usbms.books import CollectionsBookList
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class KOBO(USBMS):
|
||||
@ -230,6 +228,7 @@ class KOBO(USBMS):
|
||||
traceback.print_exc()
|
||||
return changed
|
||||
|
||||
import sqlite3 as sqlite
|
||||
with closing(sqlite.connect(
|
||||
self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
@ -344,6 +343,7 @@ class KOBO(USBMS):
|
||||
# 2) volume_shorcover
|
||||
# 2) content
|
||||
|
||||
import sqlite3 as sqlite
|
||||
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
@ -739,6 +739,8 @@ class KOBO(USBMS):
|
||||
# Needs to be outside books collection as in the case of removing
|
||||
# the last book from the collection the list of books is empty
|
||||
# and the removal of the last book would not occur
|
||||
|
||||
import sqlite3 as sqlite
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
@ -850,6 +852,7 @@ class KOBO(USBMS):
|
||||
debug_print('FAILED to upload cover', filepath)
|
||||
|
||||
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
if metadata.cover:
|
||||
cover = self.normalize_path(metadata.cover.replace('/', os.sep))
|
||||
|
||||
@ -859,6 +862,7 @@ class KOBO(USBMS):
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
|
||||
ContentID = self.contentid_from_path(filepath, ContentType)
|
||||
|
||||
import sqlite3 as sqlite
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time
|
||||
from base64 import b64decode
|
||||
from uuid import uuid4
|
||||
from lxml import etree
|
||||
from datetime import date
|
||||
|
||||
from calibre import prints, guess_type, isbytestring
|
||||
@ -78,6 +76,7 @@ def strftime(epoch, zone=time.localtime):
|
||||
return ' '.join(src)
|
||||
|
||||
def uuid():
|
||||
from uuid import uuid4
|
||||
return str(uuid4()).replace('-', '', 1).upper()
|
||||
|
||||
# }}}
|
||||
@ -85,6 +84,8 @@ def uuid():
|
||||
class XMLCache(object):
|
||||
|
||||
def __init__(self, paths, ext_paths, prefixes, use_author_sort):
|
||||
from lxml import etree
|
||||
|
||||
if DEBUG:
|
||||
debug_print('Building XMLCache...', paths)
|
||||
self.paths = paths
|
||||
@ -714,6 +715,8 @@ class XMLCache(object):
|
||||
|
||||
|
||||
def write(self):
|
||||
from lxml import etree
|
||||
|
||||
for i, path in self.paths.items():
|
||||
self.move_playlists_to_bottom()
|
||||
self.cleanup_whitespace(i)
|
||||
|
@ -12,8 +12,6 @@ Device driver for the SONY T1 devices
|
||||
'''
|
||||
|
||||
import os, time, re
|
||||
import sqlite3 as sqlite
|
||||
from sqlite3 import DatabaseError
|
||||
from contextlib import closing
|
||||
from datetime import date
|
||||
|
||||
@ -146,6 +144,8 @@ class PRST1(USBMS):
|
||||
return True
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
import sqlite3 as sqlite
|
||||
|
||||
dummy_bl = BookList(None, None, None)
|
||||
|
||||
if (
|
||||
@ -246,6 +246,8 @@ class PRST1(USBMS):
|
||||
debug_print('PRST1: finished sync_booklists')
|
||||
|
||||
def update_device_database(self, booklist, collections_attributes, oncard):
|
||||
import sqlite3 as sqlite
|
||||
|
||||
debug_print('PRST1: starting update_device_database')
|
||||
|
||||
plugboard = None
|
||||
@ -274,6 +276,8 @@ class PRST1(USBMS):
|
||||
|
||||
def update_device_books(self, connection, booklist, source_id, plugboard,
|
||||
dbpath):
|
||||
from sqlite3 import DatabaseError
|
||||
|
||||
opts = self.settings()
|
||||
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
|
||||
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
|
||||
@ -489,6 +493,8 @@ class PRST1(USBMS):
|
||||
debug_print('PRS-T1: finished rebuild_collections')
|
||||
|
||||
def upload_cover(self, path, filename, metadata, filepath):
|
||||
import sqlite3 as sqlite
|
||||
|
||||
debug_print('PRS-T1: uploading cover')
|
||||
|
||||
if filepath.startswith(self._main_prefix):
|
||||
|
@ -10,7 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
|
||||
for a particular device.
|
||||
'''
|
||||
|
||||
import os, re, time, json, uuid, functools, shutil
|
||||
import os, re, time, json, functools, shutil
|
||||
from itertools import cycle
|
||||
|
||||
from calibre.constants import numeric_version
|
||||
@ -58,6 +58,7 @@ class USBMS(CLI, Device):
|
||||
SCAN_FROM_ROOT = False
|
||||
|
||||
def _update_driveinfo_record(self, dinfo, prefix, location_code, name=None):
|
||||
import uuid
|
||||
if not isinstance(dinfo, dict):
|
||||
dinfo = {}
|
||||
if dinfo.get('device_store_uuid', None) is None:
|
||||
|
@ -90,6 +90,10 @@ class USER_DEFINED(USBMS):
|
||||
OPT_CARD_A_FOLDER = 9
|
||||
|
||||
def initialize(self):
|
||||
self.plugin_needs_delayed_initialization = True
|
||||
USBMS.initialize(self)
|
||||
|
||||
def do_delayed_plugin_initialization(self):
|
||||
try:
|
||||
e = self.settings().extra_customization
|
||||
self.VENDOR_ID = int(e[self.OPT_USB_VENDOR_ID], 16)
|
||||
@ -107,4 +111,6 @@ class USER_DEFINED(USBMS):
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
USBMS.initialize(self)
|
||||
self.plugin_needs_delayed_initialization = False
|
||||
|
||||
|
||||
|
@ -8,7 +8,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, codecs
|
||||
from chardet import detect
|
||||
|
||||
ENCODING_PATS = [
|
||||
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
|
||||
@ -34,8 +33,13 @@ def substitute_entites(raw):
|
||||
_CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
||||
"x-sjis" : "shift-jis" }
|
||||
|
||||
def detect(*args, **kwargs):
|
||||
from chardet import detect
|
||||
return detect(*args, **kwargs)
|
||||
|
||||
def force_encoding(raw, verbose, assume_utf8=False):
|
||||
from calibre.constants import preferred_encoding
|
||||
|
||||
try:
|
||||
chardet = detect(raw[:1024*50])
|
||||
except:
|
||||
|
@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en'
|
||||
Based on ideas from comiclrf created by FangornUK.
|
||||
'''
|
||||
|
||||
import os, shutil, traceback, textwrap, time, codecs
|
||||
import os, traceback, time
|
||||
from Queue import Empty
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import extract, CurrentDir, prints, walk
|
||||
from calibre import extract, prints, walk
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.ipc.server import Server
|
||||
@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir):
|
||||
return ans, failures
|
||||
|
||||
|
||||
class ComicInput(InputFormatPlugin):
|
||||
|
||||
name = 'Comic Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
|
||||
file_types = set(['cbz', 'cbr', 'cbc'])
|
||||
is_image_collection = True
|
||||
core_usage = -1
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='colors', recommended_value=256,
|
||||
help=_('Number of colors for grayscale image conversion. Default: '
|
||||
'%default. Values of less than 256 may result in blurred text '
|
||||
'on your device if you are creating your comics in EPUB format.')),
|
||||
OptionRecommendation(name='dont_normalize', recommended_value=False,
|
||||
help=_('Disable normalize (improve contrast) color range '
|
||||
'for pictures. Default: False')),
|
||||
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
|
||||
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
|
||||
OptionRecommendation(name='dont_sharpen', recommended_value=False,
|
||||
help=_('Disable sharpening.')),
|
||||
OptionRecommendation(name='disable_trim', recommended_value=False,
|
||||
help=_('Disable trimming of comic pages. For some comics, '
|
||||
'trimming might remove content as well as borders.')),
|
||||
OptionRecommendation(name='landscape', recommended_value=False,
|
||||
help=_("Don't split landscape images into two portrait images")),
|
||||
OptionRecommendation(name='wide', recommended_value=False,
|
||||
help=_("Keep aspect ratio and scale image using screen height as "
|
||||
"image width for viewing in landscape mode.")),
|
||||
OptionRecommendation(name='right2left', recommended_value=False,
|
||||
help=_('Used for right-to-left publications like manga. '
|
||||
'Causes landscape pages to be split into portrait pages '
|
||||
'from right to left.')),
|
||||
OptionRecommendation(name='despeckle', recommended_value=False,
|
||||
help=_('Enable Despeckle. Reduces speckle noise. '
|
||||
'May greatly increase processing time.')),
|
||||
OptionRecommendation(name='no_sort', recommended_value=False,
|
||||
help=_("Don't sort the files found in the comic "
|
||||
"alphabetically by name. Instead use the order they were "
|
||||
"added to the comic.")),
|
||||
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
|
||||
recommended_value='png', help=_('The format that images in the created ebook '
|
||||
'are converted to. You can experiment to see which format gives '
|
||||
'you optimal size and look on your device.')),
|
||||
OptionRecommendation(name='no_process', recommended_value=False,
|
||||
help=_("Apply no processing to the image")),
|
||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||
help=_('Do not convert the image to grayscale (black and white)')),
|
||||
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||
' an image size is automatically calculated from the output '
|
||||
'profile, this option overrides it.')),
|
||||
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
|
||||
help=_('When converting a CBC do not add links to each page to'
|
||||
' the TOC. Note this only applies if the TOC has more than one'
|
||||
' section')),
|
||||
])
|
||||
|
||||
recommendations = set([
|
||||
('margin_left', 0, OptionRecommendation.HIGH),
|
||||
('margin_top', 0, OptionRecommendation.HIGH),
|
||||
('margin_right', 0, OptionRecommendation.HIGH),
|
||||
('margin_bottom', 0, OptionRecommendation.HIGH),
|
||||
('insert_blank_line', False, OptionRecommendation.HIGH),
|
||||
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
|
||||
('change_justification', 'left', OptionRecommendation.HIGH),
|
||||
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
|
||||
('chapter', None, OptionRecommendation.HIGH),
|
||||
('page_breaks_brefore', None, OptionRecommendation.HIGH),
|
||||
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||
('page_breaks_before', None, OptionRecommendation.HIGH),
|
||||
('disable_font_rescaling', True, OptionRecommendation.HIGH),
|
||||
('linearize_tables', False, OptionRecommendation.HIGH),
|
||||
])
|
||||
|
||||
def get_comics_from_collection(self, stream):
|
||||
from calibre.libunzip import extract as zipextract
|
||||
tdir = PersistentTemporaryDirectory('_comic_collection')
|
||||
zipextract(stream, tdir)
|
||||
comics = []
|
||||
with CurrentDir(tdir):
|
||||
if not os.path.exists('comics.txt'):
|
||||
raise ValueError((
|
||||
'%s is not a valid comic collection'
|
||||
' no comics.txt was found in the file')
|
||||
%stream.name)
|
||||
raw = open('comics.txt', 'rb').read()
|
||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||
raw = raw.decode('utf-16-be')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF16_LE):
|
||||
raw = raw.decode('utf-16-le')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF8):
|
||||
raw = raw.decode('utf-8')[1:]
|
||||
else:
|
||||
raw = raw.decode('utf-8')
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
fname, title = line.partition(':')[0], line.partition(':')[-1]
|
||||
fname = fname.replace('#', '_')
|
||||
fname = os.path.join(tdir, *fname.split('/'))
|
||||
if not title:
|
||||
title = os.path.basename(fname).rpartition('.')[0]
|
||||
if os.access(fname, os.R_OK):
|
||||
comics.append([title, fname])
|
||||
if not comics:
|
||||
raise ValueError('%s has no comics'%stream.name)
|
||||
return comics
|
||||
|
||||
def get_pages(self, comic, tdir2):
|
||||
tdir = extract_comic(comic)
|
||||
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
|
||||
verbose=self.opts.verbose)
|
||||
thumbnail = None
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any pages in the comic: %s'
|
||||
%comic)
|
||||
if self.opts.no_process:
|
||||
n2 = []
|
||||
for page in new_pages:
|
||||
n2.append(os.path.join(tdir2, os.path.basename(page)))
|
||||
shutil.copyfile(page, n2[-1])
|
||||
new_pages = n2
|
||||
else:
|
||||
new_pages, failures = process_pages(new_pages, self.opts,
|
||||
self.report_progress, tdir2)
|
||||
if failures:
|
||||
self.log.warning('Could not process the following pages '
|
||||
'(run with --verbose to see why):')
|
||||
for f in failures:
|
||||
self.log.warning('\t', f)
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any valid pages in comic: %s'
|
||||
% comic)
|
||||
thumbnail = os.path.join(tdir2,
|
||||
'thumbnail.'+self.opts.output_format.lower())
|
||||
if not os.access(thumbnail, os.R_OK):
|
||||
thumbnail = None
|
||||
return new_pages
|
||||
|
||||
def get_images(self):
|
||||
return self._images
|
||||
|
||||
def convert(self, stream, opts, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
self.opts, self.log= opts, log
|
||||
if file_ext == 'cbc':
|
||||
comics_ = self.get_comics_from_collection(stream)
|
||||
else:
|
||||
comics_ = [['Comic', os.path.abspath(stream.name)]]
|
||||
stream.close()
|
||||
comics = []
|
||||
for i, x in enumerate(comics_):
|
||||
title, fname = x
|
||||
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
|
||||
cdir = os.path.abspath(cdir)
|
||||
if not os.path.exists(cdir):
|
||||
os.makedirs(cdir)
|
||||
pages = self.get_pages(fname, cdir)
|
||||
if not pages: continue
|
||||
wrappers = self.create_wrappers(pages)
|
||||
comics.append((title, pages, wrappers))
|
||||
|
||||
if not comics:
|
||||
raise ValueError('No comic pages found in %s'%stream.name)
|
||||
|
||||
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
|
||||
[_('Unknown')])
|
||||
opf = OPFCreator(os.path.abspath('.'), mi)
|
||||
entries = []
|
||||
|
||||
def href(x):
|
||||
if len(comics) == 1: return os.path.basename(x)
|
||||
return '/'.join(x.split(os.sep)[-2:])
|
||||
|
||||
for comic in comics:
|
||||
pages, wrappers = comic[1:]
|
||||
entries += [(w, None) for w in map(href, wrappers)] + \
|
||||
[(x, None) for x in map(href, pages)]
|
||||
opf.create_manifest(entries)
|
||||
spine = []
|
||||
for comic in comics:
|
||||
spine.extend(map(href, comic[2]))
|
||||
self._images = []
|
||||
for comic in comics:
|
||||
self._images.extend(comic[1])
|
||||
opf.create_spine(spine)
|
||||
toc = TOC()
|
||||
if len(comics) == 1:
|
||||
wrappers = comics[0][2]
|
||||
for i, x in enumerate(wrappers):
|
||||
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
|
||||
play_order=i)
|
||||
else:
|
||||
po = 0
|
||||
for comic in comics:
|
||||
po += 1
|
||||
wrappers = comic[2]
|
||||
stoc = toc.add_item(href(wrappers[0]),
|
||||
None, comic[0], play_order=po)
|
||||
if not opts.dont_add_comic_pages_to_toc:
|
||||
for i, x in enumerate(wrappers):
|
||||
stoc.add_item(href(x), None,
|
||||
_('Page')+' %d'%(i+1), play_order=po)
|
||||
po += 1
|
||||
opf.set_toc(toc)
|
||||
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
|
||||
opf.render(m, n, 'toc.ncx')
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
def create_wrappers(self, pages):
|
||||
from calibre.ebooks.oeb.base import XHTML_NS
|
||||
wrappers = []
|
||||
WRAPPER = textwrap.dedent('''\
|
||||
<html xmlns="%s">
|
||||
<head>
|
||||
<title>Page #%d</title>
|
||||
<style type="text/css">
|
||||
@page { margin:0pt; padding: 0pt}
|
||||
body { margin: 0pt; padding: 0pt}
|
||||
div { text-align: center }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<img src="%s" alt="comic page #%d" />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
dir = os.path.dirname(pages[0])
|
||||
for i, page in enumerate(pages):
|
||||
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
||||
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
||||
open(page, 'wb').write(wrapper)
|
||||
wrappers.append(page)
|
||||
return wrappers
|
||||
|
||||
|
11
src/calibre/ebooks/conversion/plugins/__init__.py
Normal file
11
src/calibre/ebooks/conversion/plugins/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.azw4.reader import Reader
|
||||
|
||||
class AZW4Input(InputFormatPlugin):
|
||||
|
||||
@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.azw4.reader import Reader
|
||||
|
||||
header = PdbHeaderReader(stream)
|
||||
reader = Reader(header, stream, log, options)
|
||||
opf = reader.extract_content(os.getcwd())
|
@ -3,9 +3,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, uuid
|
||||
|
||||
from lxml import html
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||
# use HTMLInput plugin to generate book
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.customize.builtins import HTMLInput
|
||||
opts.breadth_first = True
|
||||
htmlinput = HTMLInput(None)
|
||||
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
|
||||
@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
|
||||
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
|
||||
import uuid
|
||||
from lxml import html
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import DirContainer
|
||||
oeb = create_oebbook(log, None, opts,
|
||||
@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin):
|
||||
return oeb
|
||||
|
||||
def _create_html_root(self, hhcpath, log):
|
||||
from lxml import html
|
||||
hhcdata = self._read_file(hhcpath)
|
||||
hhcroot = html.fromstring(hhcdata)
|
||||
chapters = self._process_nodes(hhcroot)
|
259
src/calibre/ebooks/conversion/plugins/comic_input.py
Normal file
259
src/calibre/ebooks/conversion/plugins/comic_input.py
Normal file
@ -0,0 +1,259 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Based on ideas from comiclrf created by FangornUK.
|
||||
'''
|
||||
|
||||
import shutil, textwrap, codecs, os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
|
||||
class ComicInput(InputFormatPlugin):
|
||||
|
||||
name = 'Comic Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
|
||||
file_types = set(['cbz', 'cbr', 'cbc'])
|
||||
is_image_collection = True
|
||||
core_usage = -1
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='colors', recommended_value=256,
|
||||
help=_('Number of colors for grayscale image conversion. Default: '
|
||||
'%default. Values of less than 256 may result in blurred text '
|
||||
'on your device if you are creating your comics in EPUB format.')),
|
||||
OptionRecommendation(name='dont_normalize', recommended_value=False,
|
||||
help=_('Disable normalize (improve contrast) color range '
|
||||
'for pictures. Default: False')),
|
||||
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
|
||||
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
|
||||
OptionRecommendation(name='dont_sharpen', recommended_value=False,
|
||||
help=_('Disable sharpening.')),
|
||||
OptionRecommendation(name='disable_trim', recommended_value=False,
|
||||
help=_('Disable trimming of comic pages. For some comics, '
|
||||
'trimming might remove content as well as borders.')),
|
||||
OptionRecommendation(name='landscape', recommended_value=False,
|
||||
help=_("Don't split landscape images into two portrait images")),
|
||||
OptionRecommendation(name='wide', recommended_value=False,
|
||||
help=_("Keep aspect ratio and scale image using screen height as "
|
||||
"image width for viewing in landscape mode.")),
|
||||
OptionRecommendation(name='right2left', recommended_value=False,
|
||||
help=_('Used for right-to-left publications like manga. '
|
||||
'Causes landscape pages to be split into portrait pages '
|
||||
'from right to left.')),
|
||||
OptionRecommendation(name='despeckle', recommended_value=False,
|
||||
help=_('Enable Despeckle. Reduces speckle noise. '
|
||||
'May greatly increase processing time.')),
|
||||
OptionRecommendation(name='no_sort', recommended_value=False,
|
||||
help=_("Don't sort the files found in the comic "
|
||||
"alphabetically by name. Instead use the order they were "
|
||||
"added to the comic.")),
|
||||
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
|
||||
recommended_value='png', help=_('The format that images in the created ebook '
|
||||
'are converted to. You can experiment to see which format gives '
|
||||
'you optimal size and look on your device.')),
|
||||
OptionRecommendation(name='no_process', recommended_value=False,
|
||||
help=_("Apply no processing to the image")),
|
||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||
help=_('Do not convert the image to grayscale (black and white)')),
|
||||
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||
' an image size is automatically calculated from the output '
|
||||
'profile, this option overrides it.')),
|
||||
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
|
||||
help=_('When converting a CBC do not add links to each page to'
|
||||
' the TOC. Note this only applies if the TOC has more than one'
|
||||
' section')),
|
||||
])
|
||||
|
||||
recommendations = set([
|
||||
('margin_left', 0, OptionRecommendation.HIGH),
|
||||
('margin_top', 0, OptionRecommendation.HIGH),
|
||||
('margin_right', 0, OptionRecommendation.HIGH),
|
||||
('margin_bottom', 0, OptionRecommendation.HIGH),
|
||||
('insert_blank_line', False, OptionRecommendation.HIGH),
|
||||
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
|
||||
('change_justification', 'left', OptionRecommendation.HIGH),
|
||||
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
|
||||
('chapter', None, OptionRecommendation.HIGH),
|
||||
('page_breaks_brefore', None, OptionRecommendation.HIGH),
|
||||
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||
('page_breaks_before', None, OptionRecommendation.HIGH),
|
||||
('disable_font_rescaling', True, OptionRecommendation.HIGH),
|
||||
('linearize_tables', False, OptionRecommendation.HIGH),
|
||||
])
|
||||
|
||||
def get_comics_from_collection(self, stream):
|
||||
from calibre.libunzip import extract as zipextract
|
||||
tdir = PersistentTemporaryDirectory('_comic_collection')
|
||||
zipextract(stream, tdir)
|
||||
comics = []
|
||||
with CurrentDir(tdir):
|
||||
if not os.path.exists('comics.txt'):
|
||||
raise ValueError((
|
||||
'%s is not a valid comic collection'
|
||||
' no comics.txt was found in the file')
|
||||
%stream.name)
|
||||
raw = open('comics.txt', 'rb').read()
|
||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||
raw = raw.decode('utf-16-be')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF16_LE):
|
||||
raw = raw.decode('utf-16-le')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF8):
|
||||
raw = raw.decode('utf-8')[1:]
|
||||
else:
|
||||
raw = raw.decode('utf-8')
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
fname, title = line.partition(':')[0], line.partition(':')[-1]
|
||||
fname = fname.replace('#', '_')
|
||||
fname = os.path.join(tdir, *fname.split('/'))
|
||||
if not title:
|
||||
title = os.path.basename(fname).rpartition('.')[0]
|
||||
if os.access(fname, os.R_OK):
|
||||
comics.append([title, fname])
|
||||
if not comics:
|
||||
raise ValueError('%s has no comics'%stream.name)
|
||||
return comics
|
||||
|
||||
def get_pages(self, comic, tdir2):
|
||||
from calibre.ebooks.comic.input import (extract_comic, process_pages,
|
||||
find_pages)
|
||||
tdir = extract_comic(comic)
|
||||
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
|
||||
verbose=self.opts.verbose)
|
||||
thumbnail = None
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any pages in the comic: %s'
|
||||
%comic)
|
||||
if self.opts.no_process:
|
||||
n2 = []
|
||||
for page in new_pages:
|
||||
n2.append(os.path.join(tdir2, os.path.basename(page)))
|
||||
shutil.copyfile(page, n2[-1])
|
||||
new_pages = n2
|
||||
else:
|
||||
new_pages, failures = process_pages(new_pages, self.opts,
|
||||
self.report_progress, tdir2)
|
||||
if failures:
|
||||
self.log.warning('Could not process the following pages '
|
||||
'(run with --verbose to see why):')
|
||||
for f in failures:
|
||||
self.log.warning('\t', f)
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any valid pages in comic: %s'
|
||||
% comic)
|
||||
thumbnail = os.path.join(tdir2,
|
||||
'thumbnail.'+self.opts.output_format.lower())
|
||||
if not os.access(thumbnail, os.R_OK):
|
||||
thumbnail = None
|
||||
return new_pages
|
||||
|
||||
def get_images(self):
|
||||
return self._images
|
||||
|
||||
def convert(self, stream, opts, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
self.opts, self.log= opts, log
|
||||
if file_ext == 'cbc':
|
||||
comics_ = self.get_comics_from_collection(stream)
|
||||
else:
|
||||
comics_ = [['Comic', os.path.abspath(stream.name)]]
|
||||
stream.close()
|
||||
comics = []
|
||||
for i, x in enumerate(comics_):
|
||||
title, fname = x
|
||||
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
|
||||
cdir = os.path.abspath(cdir)
|
||||
if not os.path.exists(cdir):
|
||||
os.makedirs(cdir)
|
||||
pages = self.get_pages(fname, cdir)
|
||||
if not pages: continue
|
||||
wrappers = self.create_wrappers(pages)
|
||||
comics.append((title, pages, wrappers))
|
||||
|
||||
if not comics:
|
||||
raise ValueError('No comic pages found in %s'%stream.name)
|
||||
|
||||
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
|
||||
[_('Unknown')])
|
||||
opf = OPFCreator(os.path.abspath('.'), mi)
|
||||
entries = []
|
||||
|
||||
def href(x):
|
||||
if len(comics) == 1: return os.path.basename(x)
|
||||
return '/'.join(x.split(os.sep)[-2:])
|
||||
|
||||
for comic in comics:
|
||||
pages, wrappers = comic[1:]
|
||||
entries += [(w, None) for w in map(href, wrappers)] + \
|
||||
[(x, None) for x in map(href, pages)]
|
||||
opf.create_manifest(entries)
|
||||
spine = []
|
||||
for comic in comics:
|
||||
spine.extend(map(href, comic[2]))
|
||||
self._images = []
|
||||
for comic in comics:
|
||||
self._images.extend(comic[1])
|
||||
opf.create_spine(spine)
|
||||
toc = TOC()
|
||||
if len(comics) == 1:
|
||||
wrappers = comics[0][2]
|
||||
for i, x in enumerate(wrappers):
|
||||
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
|
||||
play_order=i)
|
||||
else:
|
||||
po = 0
|
||||
for comic in comics:
|
||||
po += 1
|
||||
wrappers = comic[2]
|
||||
stoc = toc.add_item(href(wrappers[0]),
|
||||
None, comic[0], play_order=po)
|
||||
if not opts.dont_add_comic_pages_to_toc:
|
||||
for i, x in enumerate(wrappers):
|
||||
stoc.add_item(href(x), None,
|
||||
_('Page')+' %d'%(i+1), play_order=po)
|
||||
po += 1
|
||||
opf.set_toc(toc)
|
||||
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
|
||||
opf.render(m, n, 'toc.ncx')
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
def create_wrappers(self, pages):
|
||||
from calibre.ebooks.oeb.base import XHTML_NS
|
||||
wrappers = []
|
||||
WRAPPER = textwrap.dedent('''\
|
||||
<html xmlns="%s">
|
||||
<head>
|
||||
<title>Page #%d</title>
|
||||
<style type="text/css">
|
||||
@page { margin:0pt; padding: 0pt}
|
||||
body { margin: 0pt; padding: 0pt}
|
||||
div { text-align: center }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<img src="%s" alt="comic page #%d" />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
dir = os.path.dirname(pages[0])
|
||||
for i, page in enumerate(pages):
|
||||
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
||||
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
||||
open(page, 'wb').write(wrapper)
|
||||
wrappers.append(page)
|
||||
return wrappers
|
||||
|
@ -12,7 +12,6 @@ from subprocess import Popen, PIPE
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.txt.processor import convert_basic
|
||||
|
||||
class DJVUInput(InputFormatPlugin):
|
||||
|
||||
@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.txt.processor import convert_basic
|
||||
|
||||
stdout = StringIO()
|
||||
ppdjvu = True
|
||||
# using djvutxt is MUCH faster, should make it an option
|
@ -3,11 +3,9 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, uuid
|
||||
import os
|
||||
from itertools import cycle
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
|
||||
class EPUBInput(InputFormatPlugin):
|
||||
@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
f.write(raw[1024:])
|
||||
|
||||
def process_encryption(self, encfile, opf, log):
|
||||
from lxml import etree
|
||||
import uuid
|
||||
key = None
|
||||
for item in opf.identifier_iter():
|
||||
scheme = None
|
||||
@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
return False
|
||||
|
||||
def rationalize_cover(self, opf, log):
|
||||
from lxml import etree
|
||||
guide_cover, guide_elem = None, None
|
||||
for guide_elem in opf.iterguide():
|
||||
if guide_elem.get('type', '').lower() == 'cover':
|
||||
@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
renderer)
|
||||
|
||||
def find_opf(self):
|
||||
from lxml import etree
|
||||
def attr(n, attr):
|
||||
for k, v in n.attrib.items():
|
||||
if k.endswith(attr):
|
@ -8,14 +8,12 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, shutil, re
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.constants import filesystem_encoding
|
||||
|
||||
from lxml import etree
|
||||
|
||||
block_level_tags = (
|
||||
'address',
|
||||
'body',
|
||||
@ -289,6 +287,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
# }}}
|
||||
|
||||
def condense_ncx(self, ncx_path):
|
||||
from lxml import etree
|
||||
if not self.opts.pretty_print:
|
||||
tree = etree.parse(ncx_path)
|
||||
for tag in tree.getroot().iter(tag=etree.Element):
|
@ -6,7 +6,6 @@ Convert .fb2 files to .lrf
|
||||
"""
|
||||
import os, re
|
||||
from base64 import b64decode
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import guess_type
|
||||
@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.fb2.fb2ml import FB2MLizer
|
||||
|
||||
class FB2Output(OutputFormatPlugin):
|
||||
|
||||
@ -162,6 +161,7 @@ class FB2Output(OutputFormatPlugin):
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||
from calibre.ebooks.fb2.fb2ml import FB2MLizer
|
||||
|
||||
try:
|
||||
rasterizer = SVGRasterizer()
|
283
src/calibre/ebooks/conversion/plugins/html_input.py
Normal file
283
src/calibre/ebooks/conversion/plugins/html_input.py
Normal file
@ -0,0 +1,283 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, tempfile, os
|
||||
from functools import partial
|
||||
from itertools import izip
|
||||
from urllib import quote
|
||||
|
||||
from calibre.constants import islinux, isbsd
|
||||
from calibre.customize.conversion import (InputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
class HTMLInput(InputFormatPlugin):
|
||||
|
||||
name = 'HTML Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert HTML and OPF files to an OEB'
|
||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='breadth_first',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Traverse links in HTML files breadth first. Normally, '
|
||||
'they are traversed depth first.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='max_levels',
|
||||
recommended_value=5, level=OptionRecommendation.LOW,
|
||||
help=_('Maximum levels of recursion when following links in '
|
||||
'HTML files. Must be non-negative. 0 implies that no '
|
||||
'links in the root HTML file are followed. Default is '
|
||||
'%default.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='dont_package',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Normally this input plugin re-arranges all the input '
|
||||
'files into a standard folder hierarchy. Only use this option '
|
||||
'if you know what you are doing as it can result in various '
|
||||
'nasty side effects in the rest of the conversion pipeline.'
|
||||
)
|
||||
),
|
||||
|
||||
])
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
accelerators):
|
||||
self._is_case_sensitive = None
|
||||
basedir = os.getcwd()
|
||||
self.opts = opts
|
||||
|
||||
fname = None
|
||||
if hasattr(stream, 'name'):
|
||||
basedir = os.path.dirname(stream.name)
|
||||
fname = os.path.basename(stream.name)
|
||||
|
||||
if file_ext != 'opf':
|
||||
if opts.dont_package:
|
||||
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
||||
from calibre.ebooks.metadata.html import get_metadata
|
||||
mi = get_metadata(stream)
|
||||
if fname:
|
||||
from calibre.ebooks.metadata.meta import metadata_from_filename
|
||||
fmi = metadata_from_filename(fname)
|
||||
fmi.smart_update(mi)
|
||||
mi = fmi
|
||||
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
|
||||
return oeb
|
||||
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
return create_oebbook(log, stream.name, opts,
|
||||
encoding=opts.input_encoding)
|
||||
|
||||
def is_case_sensitive(self, path):
|
||||
if getattr(self, '_is_case_sensitive', None) is not None:
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isbsd
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
||||
and os.path.exists(path.upper()))
|
||||
return self._is_case_sensitive
|
||||
|
||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||
import uuid
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import (DirContainer,
|
||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
||||
xpath)
|
||||
from calibre import guess_type
|
||||
from calibre.ebooks.oeb.transforms.metadata import \
|
||||
meta_info_to_oeb_metadata
|
||||
from calibre.ebooks.html.input import get_filelist
|
||||
import cssutils, logging
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
self.OEB_STYLES = OEB_STYLES
|
||||
oeb = create_oebbook(log, None, opts, self,
|
||||
encoding=opts.input_encoding, populate=False)
|
||||
self.oeb = oeb
|
||||
|
||||
metadata = oeb.metadata
|
||||
meta_info_to_oeb_metadata(mi, metadata, log)
|
||||
if not metadata.language:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
metadata.add('language', get_lang().replace('_', '-'))
|
||||
if not metadata.creator:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', self.oeb.translate(__('Unknown')))
|
||||
bookid = str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.oeb.uid = metadata.identifier[0]
|
||||
break
|
||||
|
||||
filelist = get_filelist(htmlpath, basedir, opts, log)
|
||||
filelist = [f for f in filelist if not f.is_binary]
|
||||
htmlfile_map = {}
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
oeb.container = DirContainer(os.path.dirname(path), log,
|
||||
ignore_opf=True)
|
||||
bname = os.path.basename(path)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(bname))
|
||||
htmlfile_map[path] = href
|
||||
item = oeb.manifest.add(id, href, 'text/html')
|
||||
item.html_input_href = bname
|
||||
oeb.spine.add(item, True)
|
||||
|
||||
self.added_resources = {}
|
||||
self.log = log
|
||||
self.log('Normalizing filename cases')
|
||||
for path, href in htmlfile_map.items():
|
||||
if not self.is_case_sensitive(path):
|
||||
path = path.lower()
|
||||
self.added_resources[path] = href
|
||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||
self.urldefrag = urldefrag
|
||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||
|
||||
self.log('Rewriting HTML links')
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
dpath = os.path.dirname(path)
|
||||
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
||||
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||
|
||||
for item in oeb.manifest.values():
|
||||
if item.media_type in self.OEB_STYLES:
|
||||
dpath = None
|
||||
for path, href in self.added_resources.items():
|
||||
if href == item.href:
|
||||
dpath = os.path.dirname(path)
|
||||
break
|
||||
cssutils.replaceUrls(item.data,
|
||||
partial(self.resource_adder, base=dpath))
|
||||
|
||||
toc = self.oeb.toc
|
||||
self.oeb.auto_generated_toc = True
|
||||
titles = []
|
||||
headers = []
|
||||
for item in self.oeb.spine:
|
||||
if not item.linear: continue
|
||||
html = item.data
|
||||
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
|
||||
title = re.sub(r'\s+', ' ', title.strip())
|
||||
if title:
|
||||
titles.append(title)
|
||||
headers.append('(unlabled)')
|
||||
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
|
||||
expr = '/h:html/h:body//h:%s[position()=1]/text()'
|
||||
header = ''.join(xpath(html, expr % tag))
|
||||
header = re.sub(r'\s+', ' ', header.strip())
|
||||
if header:
|
||||
headers[-1] = header
|
||||
break
|
||||
use = titles
|
||||
if len(titles) > len(set(titles)):
|
||||
use = headers
|
||||
for title, item in izip(use, self.oeb.spine):
|
||||
if not item.linear: continue
|
||||
toc.add(title, item.href)
|
||||
|
||||
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||
return oeb
|
||||
|
||||
def link_to_local_path(self, link_, base=None):
|
||||
from calibre.ebooks.html.input import Link
|
||||
if not isinstance(link_, unicode):
|
||||
try:
|
||||
link_ = link_.decode('utf-8', 'error')
|
||||
except:
|
||||
self.log.warn('Failed to decode link %r. Ignoring'%link_)
|
||||
return None, None
|
||||
try:
|
||||
l = Link(link_, base if base else os.getcwdu())
|
||||
except:
|
||||
self.log.exception('Failed to process link: %r'%link_)
|
||||
return None, None
|
||||
if l.path is None:
|
||||
# Not a local resource
|
||||
return None, None
|
||||
link = l.path.replace('/', os.sep).strip()
|
||||
frag = l.fragment
|
||||
if not link:
|
||||
return None, None
|
||||
return link, frag
|
||||
|
||||
def resource_adder(self, link_, base=None):
|
||||
link, frag = self.link_to_local_path(link_, base=base)
|
||||
if link is None:
|
||||
return link_
|
||||
try:
|
||||
if base and not os.path.isabs(link):
|
||||
link = os.path.join(base, link)
|
||||
link = os.path.abspath(link)
|
||||
except:
|
||||
return link_
|
||||
if not os.access(link, os.R_OK):
|
||||
return link_
|
||||
if os.path.isdir(link):
|
||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||
return link_
|
||||
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||
link = link.lower()
|
||||
if link not in self.added_resources:
|
||||
bhref = os.path.basename(link)
|
||||
id, href = self.oeb.manifest.generate(id='added',
|
||||
href=bhref)
|
||||
guessed = self.guess_type(href)[0]
|
||||
media_type = guessed or self.BINARY_MIME
|
||||
if media_type == 'text/plain':
|
||||
self.log.warn('Ignoring link to text file %r'%link_)
|
||||
return None
|
||||
|
||||
self.oeb.log.debug('Added', link)
|
||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||
self.oeb.log, ignore_opf=True)
|
||||
# Load into memory
|
||||
item = self.oeb.manifest.add(id, href, media_type)
|
||||
# bhref refers to an already existing file. The read() method of
|
||||
# DirContainer will call unquote on it before trying to read the
|
||||
# file, therefore we quote it here.
|
||||
if isinstance(bhref, unicode):
|
||||
bhref = bhref.encode('utf-8')
|
||||
item.html_input_href = quote(bhref).decode('utf-8')
|
||||
if guessed in self.OEB_STYLES:
|
||||
item.override_css_fetch = partial(
|
||||
self.css_import_handler, os.path.dirname(link))
|
||||
item.data
|
||||
self.added_resources[link] = href
|
||||
|
||||
nlink = self.added_resources[link]
|
||||
if frag:
|
||||
nlink = '#'.join((nlink, frag))
|
||||
return nlink
|
||||
|
||||
def css_import_handler(self, base, href):
|
||||
link, frag = self.link_to_local_path(href, base=base)
|
||||
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
|
||||
return (None, None)
|
||||
try:
|
||||
raw = open(link, 'rb').read().decode('utf-8', 'replace')
|
||||
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
|
||||
except:
|
||||
self.log.exception('Failed to read CSS file: %r'%link)
|
||||
return (None, None)
|
||||
return (None, raw)
|
@ -4,22 +4,11 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, shutil
|
||||
|
||||
from calibre.utils import zipfile
|
||||
|
||||
from os.path import dirname, abspath, relpath, exists, basename
|
||||
|
||||
from lxml import etree
|
||||
from templite import Templite
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
from urllib import unquote
|
||||
|
||||
from calibre.ebooks.html.meta import EasyMeta
|
||||
|
||||
class HTMLOutput(OutputFormatPlugin):
|
||||
|
||||
@ -50,6 +39,9 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
'''
|
||||
Generate table of contents
|
||||
'''
|
||||
from lxml import etree
|
||||
from urllib import unquote
|
||||
|
||||
from calibre.ebooks.oeb.base import element
|
||||
with CurrentDir(output_dir):
|
||||
def build_node(current_node, parent=None):
|
||||
@ -72,11 +64,18 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
return wrap
|
||||
|
||||
def generate_html_toc(self, oeb_book, ref_url, output_dir):
|
||||
from lxml import etree
|
||||
|
||||
root = self.generate_toc(oeb_book, ref_url, output_dir)
|
||||
return etree.tostring(root, pretty_print=True, encoding='utf-8',
|
||||
xml_declaration=False)
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from lxml import etree
|
||||
from calibre.utils import zipfile
|
||||
from templite import Templite
|
||||
from urllib import unquote
|
||||
from calibre.ebooks.html.meta import EasyMeta
|
||||
|
||||
# read template files
|
||||
if opts.template_html_index is not None:
|
||||
@ -192,7 +191,7 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
f.write(t)
|
||||
item.unload_data_from_memory(memory=path)
|
||||
|
||||
zfile = ZipFile(output_path, "w")
|
||||
zfile = zipfile.ZipFile(output_path, "w")
|
||||
zfile.add_dir(output_dir, basename(output_dir))
|
||||
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
|
||||
|
@ -10,9 +10,6 @@ import os
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class HTMLZInput(InputFormatPlugin):
|
||||
|
||||
@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
self.log = log
|
||||
html = u''
|
||||
top_levels = []
|
@ -9,13 +9,10 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class HTMLZOutput(OutputFormatPlugin):
|
||||
|
||||
@ -43,7 +40,10 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
|
||||
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
# HTML
|
||||
if opts.htmlz_css_type == 'inline':
|
||||
@ -81,7 +81,7 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
fname = os.path.join(tdir, 'images', images[item.href])
|
||||
with open(fname, 'wb') as img:
|
||||
img.write(data)
|
||||
|
||||
|
||||
# Cover
|
||||
cover_path = None
|
||||
try:
|
87
src/calibre/ebooks/conversion/plugins/lrf_input.py
Normal file
87
src/calibre/ebooks/conversion/plugins/lrf_input.py
Normal file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class LRFInput(InputFormatPlugin):
|
||||
|
||||
name = 'LRF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert LRF files to HTML'
|
||||
file_types = set(['lrf'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
|
||||
Canvas, ImageBlock, RuledLine)
|
||||
self.log = log
|
||||
self.log('Generating XML')
|
||||
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||
d = LRFDocument(stream)
|
||||
d.parse()
|
||||
xml = d.to_xml(write_files=True)
|
||||
if options.verbose > 2:
|
||||
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
||||
try:
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
except:
|
||||
self.log.warn('Failed to parse XML. Trying to recover')
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=True,
|
||||
recover=True)
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
|
||||
|
||||
char_button_map = {}
|
||||
for x in doc.xpath('//CharButton[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
|
||||
if jump_button:
|
||||
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
|
||||
if jump_to:
|
||||
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
|
||||
jump_to[0].get('refobj'))
|
||||
plot_map = {}
|
||||
for x in doc.xpath('//Plot[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
|
||||
if image:
|
||||
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
|
||||
image[0].get('refstream'))
|
||||
if imgstr:
|
||||
plot_map[ro] = imgstr[0].get('file')
|
||||
|
||||
self.log('Converting XML to HTML...')
|
||||
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
|
||||
media_type = MediaType()
|
||||
styles = Styles()
|
||||
text_block = TextBlock(styles, char_button_map, plot_map, log)
|
||||
canvas = Canvas(doc, styles, text_block, log)
|
||||
image_block = ImageBlock(canvas)
|
||||
ruled_line = RuledLine()
|
||||
extensions = {
|
||||
('calibre', 'media-type') : media_type,
|
||||
('calibre', 'text-block') : text_block,
|
||||
('calibre', 'ruled-line') : ruled_line,
|
||||
('calibre', 'styles') : styles,
|
||||
('calibre', 'canvas') : canvas,
|
||||
('calibre', 'image-block'): image_block,
|
||||
}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
try:
|
||||
result = transform(doc)
|
||||
except RuntimeError:
|
||||
sys.setrecursionlimit(5000)
|
||||
result = transform(doc)
|
||||
|
||||
with open('content.opf', 'wb') as f:
|
||||
f.write(result)
|
||||
styles.write()
|
||||
return os.path.abspath('content.opf')
|
25
src/calibre/ebooks/conversion/plugins/odt_input.py
Normal file
25
src/calibre/ebooks/conversion/plugins/odt_input.py
Normal file
@ -0,0 +1,25 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Convert an ODT file into a Open Ebook
|
||||
'''
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class ODTInput(InputFormatPlugin):
|
||||
|
||||
name = 'ODT Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert ODT (OpenOffice) files to HTML'
|
||||
file_types = set(['odt'])
|
||||
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.odt.input import Extract
|
||||
return Extract()(stream, '.', log)
|
||||
|
||||
|
@ -5,13 +5,10 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre import CurrentDir
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from urllib import unquote
|
||||
|
||||
class OEBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -23,6 +20,9 @@ class OEBOutput(OutputFormatPlugin):
|
||||
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from urllib import unquote
|
||||
from lxml import etree
|
||||
|
||||
self.log, self.opts = log, opts
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||
|
||||
class PDBInput(InputFormatPlugin):
|
||||
|
||||
@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||
|
||||
header = PdbHeaderReader(stream)
|
||||
Reader = get_reader(header.ident)
|
||||
|
@ -8,7 +8,7 @@ import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.pdb import PDBError, get_writer, FORMAT_WRITERS
|
||||
from calibre.ebooks.pdb import PDBError, get_writer, ALL_FORMAT_WRITERS
|
||||
|
||||
class PDBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -19,9 +19,9 @@ class PDBOutput(OutputFormatPlugin):
|
||||
options = set([
|
||||
OptionRecommendation(name='format', recommended_value='doc',
|
||||
level=OptionRecommendation.LOW,
|
||||
short_switch='f', choices=FORMAT_WRITERS.keys(),
|
||||
short_switch='f', choices=list(ALL_FORMAT_WRITERS),
|
||||
help=(_('Format to use inside the pdb container. Choices are:')+\
|
||||
' %s' % FORMAT_WRITERS.keys())),
|
||||
' %s' % list(ALL_FORMAT_WRITERS))),
|
||||
OptionRecommendation(name='pdb_output_encoding', recommended_value='cp1252',
|
||||
level=OptionRecommendation.LOW,
|
||||
help=_('Specify the character encoding of the output document. ' \
|
@ -7,10 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.constants import plugins
|
||||
pdfreflow, pdfreflow_err = plugins['pdfreflow']
|
||||
|
||||
class PDFInput(InputFormatPlugin):
|
||||
|
||||
@ -31,6 +27,9 @@ class PDFInput(InputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert_new(self, stream, accelerators):
|
||||
from calibre.constants import plugins
|
||||
pdfreflow, pdfreflow_err = plugins['pdfreflow']
|
||||
|
||||
from calibre.ebooks.pdf.reflow import PDFDocument
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
if pdfreflow_err:
|
||||
@ -43,6 +42,9 @@ class PDFInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
||||
|
||||
log.debug('Converting file to html...')
|
||||
# The main html file will be named index.html
|
||||
self.opts, self.log = options, log
|
@ -13,10 +13,50 @@ import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \
|
||||
ORIENTATIONS
|
||||
|
||||
UNITS = [
|
||||
'millimeter',
|
||||
'point',
|
||||
'inch' ,
|
||||
'pica' ,
|
||||
'didot',
|
||||
'cicero',
|
||||
'devicepixel',
|
||||
]
|
||||
|
||||
PAPER_SIZES = ['b2',
|
||||
'a9',
|
||||
'executive',
|
||||
'tabloid',
|
||||
'b4',
|
||||
'b5',
|
||||
'b6',
|
||||
'b7',
|
||||
'b0',
|
||||
'b1',
|
||||
'letter',
|
||||
'b3',
|
||||
'a7',
|
||||
'a8',
|
||||
'b8',
|
||||
'b9',
|
||||
'a3',
|
||||
'a1',
|
||||
'folio',
|
||||
'c5e',
|
||||
'dle',
|
||||
'a0',
|
||||
'ledger',
|
||||
'legal',
|
||||
'a6',
|
||||
'a2',
|
||||
'b10',
|
||||
'a5',
|
||||
'comm10e',
|
||||
'a4']
|
||||
|
||||
ORIENTATIONS = ['portrait', 'landscape']
|
||||
|
||||
class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
@ -26,23 +66,23 @@ class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='unit', recommended_value='inch',
|
||||
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
|
||||
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS,
|
||||
help=_('The unit of measure. Default is inch. Choices '
|
||||
'are %s '
|
||||
'Note: This does not override the unit for margins!') % UNITS.keys()),
|
||||
'Note: This does not override the unit for margins!') % UNITS),
|
||||
OptionRecommendation(name='paper_size', recommended_value='letter',
|
||||
level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(),
|
||||
level=OptionRecommendation.LOW, choices=PAPER_SIZES,
|
||||
help=_('The size of the paper. This size will be overridden when a '
|
||||
'non default output profile is used. Default is letter. Choices '
|
||||
'are %s') % PAPER_SIZES.keys()),
|
||||
'are %s') % PAPER_SIZES),
|
||||
OptionRecommendation(name='custom_size', recommended_value=None,
|
||||
help=_('Custom size of the document. Use the form widthxheight '
|
||||
'EG. `123x321` to specify the width and height. '
|
||||
'This overrides any specified paper-size.')),
|
||||
OptionRecommendation(name='orientation', recommended_value='portrait',
|
||||
level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(),
|
||||
level=OptionRecommendation.LOW, choices=ORIENTATIONS,
|
||||
help=_('The orientation of the page. Default is portrait. Choices '
|
||||
'are %s') % ORIENTATIONS.keys()),
|
||||
'are %s') % ORIENTATIONS),
|
||||
OptionRecommendation(name='preserve_cover_aspect_ratio',
|
||||
recommended_value=False,
|
||||
help=_('Preserve the aspect ratio of the cover, instead'
|
||||
@ -105,6 +145,8 @@ class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
def convert_text(self, oeb_book):
|
||||
from calibre.ebooks.pdf.writer import PDFWriter
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
|
||||
self.log.debug('Serializing oeb input to disk for processing...')
|
||||
self.get_cover_data()
|
||||
|
@ -11,9 +11,6 @@ import shutil
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
class PMLInput(InputFormatPlugin):
|
||||
|
||||
@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin):
|
||||
file_types = set(['pml', 'pmlz'])
|
||||
|
||||
def process_pml(self, pml_path, html_path, close_all=False):
|
||||
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
|
||||
|
||||
pclose = False
|
||||
hclose = False
|
||||
|
||||
@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
self.options = options
|
||||
self.log = log
|
||||
pages, images = [], []
|
@ -4,21 +4,11 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import os, cStringIO
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
Image
|
||||
except ImportError:
|
||||
import Image
|
||||
|
||||
import cStringIO
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ebooks.pml.pmlml import PMLMLizer
|
||||
|
||||
class PMLOutput(OutputFormatPlugin):
|
||||
|
||||
@ -43,6 +33,9 @@ class PMLOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.pml.pmlml import PMLMLizer
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
with TemporaryDirectory('_pmlz_output') as tdir:
|
||||
pmlmlizer = PMLMLizer(log)
|
||||
pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
|
||||
@ -59,6 +52,13 @@ class PMLOutput(OutputFormatPlugin):
|
||||
pmlz.add_dir(tdir)
|
||||
|
||||
def write_images(self, manifest, image_hrefs, out_dir, opts):
|
||||
try:
|
||||
from PIL import Image
|
||||
Image
|
||||
except ImportError:
|
||||
import Image
|
||||
|
||||
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
for item in manifest:
|
||||
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
|
@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.ebooks.rb.reader import Reader
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class RBInput(InputFormatPlugin):
|
||||
@ -18,6 +17,8 @@ class RBInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.rb.reader import Reader
|
||||
|
||||
reader = Reader(stream, log, options.input_encoding)
|
||||
opf = reader.extract_content(os.getcwd())
|
||||
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.rb.writer import RBWriter
|
||||
|
||||
class RBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -22,6 +21,8 @@ class RBOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.rb.writer import RBWriter
|
||||
|
||||
close = False
|
||||
if not hasattr(output_path, 'write'):
|
||||
close = True
|
298
src/calibre/ebooks/conversion/plugins/rtf_input.py
Normal file
298
src/calibre/ebooks/conversion/plugins/rtf_input.py
Normal file
@ -0,0 +1,298 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, glob, re, textwrap
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
border_style_map = {
|
||||
'single' : 'solid',
|
||||
'double-thickness-border' : 'double',
|
||||
'shadowed-border': 'outset',
|
||||
'double-border': 'double',
|
||||
'dotted-border': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'hairline': 'solid',
|
||||
'inset': 'inset',
|
||||
'dash-small': 'dashed',
|
||||
'dot-dash': 'dotted',
|
||||
'dot-dot-dash': 'dotted',
|
||||
'outset': 'outset',
|
||||
'tripple': 'double',
|
||||
'triple': 'double',
|
||||
'thick-thin-small': 'solid',
|
||||
'thin-thick-small': 'solid',
|
||||
'thin-thick-thin-small': 'solid',
|
||||
'thick-thin-medium': 'solid',
|
||||
'thin-thick-medium': 'solid',
|
||||
'thin-thick-thin-medium': 'solid',
|
||||
'thick-thin-large': 'solid',
|
||||
'thin-thick-thin-large': 'solid',
|
||||
'wavy': 'ridge',
|
||||
'double-wavy': 'ridge',
|
||||
'striped': 'ridge',
|
||||
'emboss': 'inset',
|
||||
'engrave': 'inset',
|
||||
'frame': 'ridge',
|
||||
}
|
||||
|
||||
|
||||
class RTFInput(InputFormatPlugin):
|
||||
|
||||
name = 'RTF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert RTF files to HTML'
|
||||
file_types = set(['rtf'])
|
||||
|
||||
def generate_xml(self, stream):
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
ofile = 'dataxml.xml'
|
||||
run_lev, debug_dir, indent_out = 1, None, 0
|
||||
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
||||
try:
|
||||
os.mkdir('rtfdebug')
|
||||
debug_dir = 'rtfdebug'
|
||||
run_lev = 4
|
||||
indent_out = 1
|
||||
self.log('Running RTFParser in debug mode')
|
||||
except:
|
||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||
parser = ParseRtf(
|
||||
in_file = stream,
|
||||
out_file = ofile,
|
||||
# Convert symbol fonts to unicode equivalents. Default
|
||||
# is 1
|
||||
convert_symbol = 1,
|
||||
|
||||
# Convert Zapf fonts to unicode equivalents. Default
|
||||
# is 1.
|
||||
convert_zapf = 1,
|
||||
|
||||
# Convert Wingding fonts to unicode equivalents.
|
||||
# Default is 1.
|
||||
convert_wingdings = 1,
|
||||
|
||||
# Convert RTF caps to real caps.
|
||||
# Default is 1.
|
||||
convert_caps = 1,
|
||||
|
||||
# Indent resulting XML.
|
||||
# Default is 0 (no indent).
|
||||
indent = indent_out,
|
||||
|
||||
# Form lists from RTF. Default is 1.
|
||||
form_lists = 1,
|
||||
|
||||
# Convert headings to sections. Default is 0.
|
||||
headings_to_sections = 1,
|
||||
|
||||
# Group paragraphs with the same style name. Default is 1.
|
||||
group_styles = 1,
|
||||
|
||||
# Group borders. Default is 1.
|
||||
group_borders = 1,
|
||||
|
||||
# Write or do not write paragraphs. Default is 0.
|
||||
empty_paragraphs = 1,
|
||||
|
||||
#debug
|
||||
deb_dir = debug_dir,
|
||||
run_level = run_lev,
|
||||
)
|
||||
parser.parse_rtf()
|
||||
with open(ofile, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def extract_images(self, picts):
|
||||
import imghdr
|
||||
self.log('Extracting images...')
|
||||
|
||||
with open(picts, 'rb') as f:
|
||||
raw = f.read()
|
||||
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
|
||||
hex = re.compile(r'[^a-fA-F0-9]')
|
||||
encs = [hex.sub('', pict) for pict in picts]
|
||||
|
||||
count = 0
|
||||
imap = {}
|
||||
for enc in encs:
|
||||
if len(enc) % 2 == 1:
|
||||
enc = enc[:-1]
|
||||
data = enc.decode('hex')
|
||||
fmt = imghdr.what(None, data)
|
||||
if fmt is None:
|
||||
fmt = 'wmf'
|
||||
count += 1
|
||||
name = '%04d.%s' % (count, fmt)
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
imap[count] = name
|
||||
# with open(name+'.hex', 'wb') as f:
|
||||
# f.write(enc)
|
||||
return self.convert_images(imap)
|
||||
|
||||
def convert_images(self, imap):
|
||||
self.default_img = None
|
||||
for count, val in imap.iteritems():
|
||||
try:
|
||||
imap[count] = self.convert_image(val)
|
||||
except:
|
||||
self.log.exception('Failed to convert', val)
|
||||
return imap
|
||||
|
||||
def convert_image(self, name):
|
||||
if not name.endswith('.wmf'):
|
||||
return name
|
||||
try:
|
||||
return self.rasterize_wmf(name)
|
||||
except:
|
||||
self.log.exception('Failed to convert WMF image %r'%name)
|
||||
return self.replace_wmf(name)
|
||||
|
||||
def replace_wmf(self, name):
|
||||
from calibre.ebooks import calibre_cover
|
||||
if self.default_img is None:
|
||||
self.default_img = calibre_cover('Conversion of WMF images is not supported',
|
||||
'Use Microsoft Word or OpenOffice to save this RTF file'
|
||||
' as HTML and convert that in calibre.', title_size=36,
|
||||
author_size=20)
|
||||
name = name.replace('.wmf', '.jpg')
|
||||
with open(name, 'wb') as f:
|
||||
f.write(self.default_img)
|
||||
return name
|
||||
|
||||
def rasterize_wmf(self, name):
|
||||
from calibre.utils.wmf.parse import wmf_unwrap
|
||||
with open(name, 'rb') as f:
|
||||
data = f.read()
|
||||
data = wmf_unwrap(data)
|
||||
name = name.replace('.wmf', '.png')
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
return name
|
||||
|
||||
|
||||
def write_inline_css(self, ic, border_styles):
|
||||
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
|
||||
enumerate(ic.font_sizes)]
|
||||
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
|
||||
enumerate(ic.colors)]
|
||||
css = textwrap.dedent('''
|
||||
span.none {
|
||||
text-decoration: none; font-weight: normal;
|
||||
font-style: normal; font-variant: normal
|
||||
}
|
||||
|
||||
span.italics { font-style: italic }
|
||||
|
||||
span.bold { font-weight: bold }
|
||||
|
||||
span.small-caps { font-variant: small-caps }
|
||||
|
||||
span.underlined { text-decoration: underline }
|
||||
|
||||
span.strike-through { text-decoration: line-through }
|
||||
|
||||
''')
|
||||
css += '\n'+'\n'.join(font_size_classes)
|
||||
css += '\n' +'\n'.join(color_classes)
|
||||
|
||||
for cls, val in border_styles.iteritems():
|
||||
css += '\n\n.%s {\n%s\n}'%(cls, val)
|
||||
|
||||
with open('styles.css', 'ab') as f:
|
||||
f.write(css)
|
||||
|
||||
def convert_borders(self, doc):
|
||||
border_styles = []
|
||||
style_map = {}
|
||||
for elem in doc.xpath(r'//*[local-name()="cell"]'):
|
||||
style = ['border-style: hidden', 'border-width: 1px',
|
||||
'border-color: black']
|
||||
for x in ('bottom', 'top', 'left', 'right'):
|
||||
bs = elem.get('border-cell-%s-style'%x, None)
|
||||
if bs:
|
||||
cbs = border_style_map.get(bs, 'solid')
|
||||
style.append('border-%s-style: %s'%(x, cbs))
|
||||
bw = elem.get('border-cell-%s-line-width'%x, None)
|
||||
if bw:
|
||||
style.append('border-%s-width: %spt'%(x, bw))
|
||||
bc = elem.get('border-cell-%s-color'%x, None)
|
||||
if bc:
|
||||
style.append('border-%s-color: %s'%(x, bc))
|
||||
style = ';\n'.join(style)
|
||||
if style not in border_styles:
|
||||
border_styles.append(style)
|
||||
idx = border_styles.index(style)
|
||||
cls = 'border_style%d'%idx
|
||||
style_map[cls] = style
|
||||
elem.set('class', cls)
|
||||
return style_map
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||
from calibre.ebooks.rtf.input import InlineClass
|
||||
self.opts = options
|
||||
self.log = log
|
||||
self.log('Converting RTF to XML...')
|
||||
try:
|
||||
xml = self.generate_xml(stream.name)
|
||||
except RtfInvalidCodeException as e:
|
||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||
'support. Convert it to HTML first and then try it.\n%s')%e)
|
||||
|
||||
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
||||
if d:
|
||||
imap = {}
|
||||
try:
|
||||
imap = self.extract_images(d[0])
|
||||
except:
|
||||
self.log.exception('Failed to extract images...')
|
||||
|
||||
self.log('Parsing XML...')
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
border_styles = self.convert_borders(doc)
|
||||
for pict in doc.xpath('//rtf:pict[@num]',
|
||||
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
|
||||
num = int(pict.get('num'))
|
||||
name = imap.get(num, None)
|
||||
if name is not None:
|
||||
pict.set('num', name)
|
||||
|
||||
self.log('Converting XML to HTML...')
|
||||
inline_class = InlineClass(self.log)
|
||||
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
|
||||
extensions = { ('calibre', 'inline-class') : inline_class }
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
result = transform(doc)
|
||||
html = 'index.xhtml'
|
||||
with open(html, 'wb') as f:
|
||||
res = transform.tostring(result)
|
||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
#clean multiple \n
|
||||
res = re.sub('\n+', '\n', res)
|
||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||
# res = re.sub('\s*<body>', '<body>', res)
|
||||
# res = re.sub('(?<=\n)\n{2}',
|
||||
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
f.write(res)
|
||||
self.write_inline_css(inline_class, border_styles)
|
||||
stream.seek(0)
|
||||
mi = get_metadata(stream, 'rtf')
|
||||
if not mi.title:
|
||||
mi.title = _('Unknown')
|
||||
if not mi.authors:
|
||||
mi.authors = [_('Unknown')]
|
||||
opf = OPFCreator(os.getcwd(), mi)
|
||||
opf.create_manifest([('index.xhtml', None)])
|
||||
opf.create_spine(['index.xhtml'])
|
||||
opf.render(open('metadata.opf', 'wb'))
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
|
@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.ebooks.rtf.rtfml import RTFMLizer
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
|
||||
class RTFOutput(OutputFormatPlugin):
|
||||
@ -16,6 +15,8 @@ class RTFOutput(OutputFormatPlugin):
|
||||
file_type = 'rtf'
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.rtf.rtfml import RTFMLizer
|
||||
|
||||
rtfmlitzer = RTFMLizer(log)
|
||||
content = rtfmlitzer.extract_content(oeb_book, opts)
|
||||
|
@ -4,13 +4,11 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, uuid
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.snb.snbfile import SNBFile
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from lxml import etree
|
||||
|
||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||
|
||||
@ -29,7 +27,12 @@ class SNBInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
import uuid
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.oeb.base import DirContainer
|
||||
from calibre.ebooks.snb.snbfile import SNBFile
|
||||
|
||||
log.debug("Parsing SNB file...")
|
||||
snbFile = SNBFile()
|
||||
try:
|
@ -6,12 +6,9 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, string
|
||||
|
||||
from lxml import etree
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre.ebooks.snb.snbfile import SNBFile
|
||||
from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
|
||||
|
||||
class SNBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -49,6 +46,11 @@ class SNBOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.snb.snbfile import SNBFile
|
||||
from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
|
||||
|
||||
|
||||
self.opts = opts
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||
try:
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.compression.tcr import decompress
|
||||
|
||||
class TCRInput(InputFormatPlugin):
|
||||
|
||||
@ -17,6 +16,8 @@ class TCRInput(InputFormatPlugin):
|
||||
file_types = set(['tcr'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.compression.tcr import decompress
|
||||
|
||||
log.info('Decompressing text...')
|
||||
raw_txt = decompress(stream)
|
||||
|
||||
@ -28,7 +29,7 @@ class TCRInput(InputFormatPlugin):
|
||||
txt_plugin = plugin_for_input_format('txt')
|
||||
for opt in txt_plugin.options:
|
||||
if not hasattr(self.options, opt.option.name):
|
||||
setattr(self.options, opt.option.name, opt.recommended_value)
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
|
||||
stream.seek(0)
|
||||
return txt_plugin.convert(stream, options,
|
@ -8,8 +8,6 @@ import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.txt.txtml import TXTMLizer
|
||||
from calibre.ebooks.compression.tcr import compress
|
||||
|
||||
class TCROutput(OutputFormatPlugin):
|
||||
|
||||
@ -25,6 +23,9 @@ class TCROutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.txt.txtml import TXTMLizer
|
||||
from calibre.ebooks.compression.tcr import compress
|
||||
|
||||
close = False
|
||||
if not hasattr(output_path, 'write'):
|
||||
close = True
|
@ -8,14 +8,6 @@ import os
|
||||
|
||||
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||
from calibre.ebooks.chardet import detect
|
||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
||||
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
|
||||
separate_hard_scene_breaks
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class TXTInput(InputFormatPlugin):
|
||||
|
||||
@ -61,6 +53,17 @@ class TXTInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||
from calibre.ebooks.chardet import detect
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ebooks.txt.processor import (convert_basic,
|
||||
convert_markdown, separate_paragraphs_single_line,
|
||||
separate_paragraphs_print_formatted, preserve_spaces,
|
||||
detect_paragraph_type, detect_formatting_type,
|
||||
normalize_line_endings, convert_textile, remove_indents,
|
||||
block_to_single_line, separate_hard_scene_breaks)
|
||||
|
||||
|
||||
self.log = log
|
||||
txt = ''
|
||||
log.debug('Reading text from file...')
|
@ -7,15 +7,12 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.txt.txtml import TXTMLizer
|
||||
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
|
||||
from calibre.ptempfile import TemporaryDirectory, TemporaryFile
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows']
|
||||
|
||||
class TXTOutput(OutputFormatPlugin):
|
||||
|
||||
@ -26,11 +23,11 @@ class TXTOutput(OutputFormatPlugin):
|
||||
options = set([
|
||||
OptionRecommendation(name='newline', recommended_value='system',
|
||||
level=OptionRecommendation.LOW,
|
||||
short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
|
||||
short_switch='n', choices=NEWLINE_TYPES,
|
||||
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
|
||||
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
||||
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
||||
'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
|
||||
'type used by this OS.') % sorted(NEWLINE_TYPES)),
|
||||
OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8',
|
||||
level=OptionRecommendation.LOW,
|
||||
help=_('Specify the character encoding of the output document. ' \
|
||||
@ -76,6 +73,11 @@ class TXTOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.txt.txtml import TXTMLizer
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ebooks.txt.newlines import specified_newlines, TxtNewlines
|
||||
|
||||
|
||||
if opts.txt_output_formatting.lower() == 'markdown':
|
||||
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
||||
self.writer = MarkdownMLizer(log)
|
||||
@ -116,6 +118,9 @@ class TXTZOutput(TXTOutput):
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from lxml import etree
|
||||
|
||||
with TemporaryDirectory('_txtz_output') as tdir:
|
||||
# TXT
|
||||
txt_name = 'index.txt'
|
@ -6,7 +6,6 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
|
||||
from calibre.utils.date import parse_date, strptime
|
||||
|
||||
|
||||
class Epubcheck(ePubFixer):
|
||||
@ -35,6 +34,8 @@ class Epubcheck(ePubFixer):
|
||||
return 'epubcheck'
|
||||
|
||||
def fix_pubdates(self):
|
||||
from calibre.utils.date import parse_date, strptime
|
||||
|
||||
dirtied = False
|
||||
opf = self.container.opf
|
||||
for dcdate in opf.xpath('//dc:date',
|
||||
|
@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
from lxml.etree import tostring as _tostring
|
||||
|
||||
def tostring(root, strip_comments=False, pretty_print=False):
|
||||
'''
|
||||
Serialize processed XHTML.
|
||||
'''
|
||||
from lxml.etree import tostring as _tostring
|
||||
|
||||
root.set('xmlns', 'http://www.w3.org/1999/xhtml')
|
||||
root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
|
||||
for x in root.iter():
|
||||
|
@ -11,19 +11,13 @@ __docformat__ = 'restructuredtext en'
|
||||
Input plugin for HTML or OPF ebooks.
|
||||
'''
|
||||
|
||||
import os, re, sys, uuid, tempfile, errno as gerrno
|
||||
import os, re, sys, errno as gerrno
|
||||
from urlparse import urlparse, urlunparse
|
||||
from urllib import unquote, quote
|
||||
from functools import partial
|
||||
from itertools import izip
|
||||
from urllib import unquote
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import detect_xml_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.constants import islinux, isbsd, iswindows
|
||||
from calibre.constants import iswindows
|
||||
from calibre import unicode_path, as_unicode
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
class Link(object):
|
||||
'''
|
||||
@ -241,262 +235,4 @@ def get_filelist(htmlfile, dir, opts, log):
|
||||
return filelist
|
||||
|
||||
|
||||
class HTMLInput(InputFormatPlugin):
|
||||
|
||||
name = 'HTML Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert HTML and OPF files to an OEB'
|
||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='breadth_first',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Traverse links in HTML files breadth first. Normally, '
|
||||
'they are traversed depth first.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='max_levels',
|
||||
recommended_value=5, level=OptionRecommendation.LOW,
|
||||
help=_('Maximum levels of recursion when following links in '
|
||||
'HTML files. Must be non-negative. 0 implies that no '
|
||||
'links in the root HTML file are followed. Default is '
|
||||
'%default.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='dont_package',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Normally this input plugin re-arranges all the input '
|
||||
'files into a standard folder hierarchy. Only use this option '
|
||||
'if you know what you are doing as it can result in various '
|
||||
'nasty side effects in the rest of the conversion pipeline.'
|
||||
)
|
||||
),
|
||||
|
||||
])
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
accelerators):
|
||||
self._is_case_sensitive = None
|
||||
basedir = os.getcwd()
|
||||
self.opts = opts
|
||||
|
||||
fname = None
|
||||
if hasattr(stream, 'name'):
|
||||
basedir = os.path.dirname(stream.name)
|
||||
fname = os.path.basename(stream.name)
|
||||
|
||||
if file_ext != 'opf':
|
||||
if opts.dont_package:
|
||||
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
||||
from calibre.ebooks.metadata.html import get_metadata
|
||||
mi = get_metadata(stream)
|
||||
if fname:
|
||||
from calibre.ebooks.metadata.meta import metadata_from_filename
|
||||
fmi = metadata_from_filename(fname)
|
||||
fmi.smart_update(mi)
|
||||
mi = fmi
|
||||
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
|
||||
return oeb
|
||||
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
return create_oebbook(log, stream.name, opts,
|
||||
encoding=opts.input_encoding)
|
||||
|
||||
def is_case_sensitive(self, path):
|
||||
if getattr(self, '_is_case_sensitive', None) is not None:
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isbsd
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
||||
and os.path.exists(path.upper()))
|
||||
return self._is_case_sensitive
|
||||
|
||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import (DirContainer,
|
||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
||||
xpath)
|
||||
from calibre import guess_type
|
||||
from calibre.ebooks.oeb.transforms.metadata import \
|
||||
meta_info_to_oeb_metadata
|
||||
import cssutils, logging
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
self.OEB_STYLES = OEB_STYLES
|
||||
oeb = create_oebbook(log, None, opts, self,
|
||||
encoding=opts.input_encoding, populate=False)
|
||||
self.oeb = oeb
|
||||
|
||||
metadata = oeb.metadata
|
||||
meta_info_to_oeb_metadata(mi, metadata, log)
|
||||
if not metadata.language:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
metadata.add('language', get_lang().replace('_', '-'))
|
||||
if not metadata.creator:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', self.oeb.translate(__('Unknown')))
|
||||
bookid = str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.oeb.uid = metadata.identifier[0]
|
||||
break
|
||||
|
||||
filelist = get_filelist(htmlpath, basedir, opts, log)
|
||||
filelist = [f for f in filelist if not f.is_binary]
|
||||
htmlfile_map = {}
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
oeb.container = DirContainer(os.path.dirname(path), log,
|
||||
ignore_opf=True)
|
||||
bname = os.path.basename(path)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(bname))
|
||||
htmlfile_map[path] = href
|
||||
item = oeb.manifest.add(id, href, 'text/html')
|
||||
item.html_input_href = bname
|
||||
oeb.spine.add(item, True)
|
||||
|
||||
self.added_resources = {}
|
||||
self.log = log
|
||||
self.log('Normalizing filename cases')
|
||||
for path, href in htmlfile_map.items():
|
||||
if not self.is_case_sensitive(path):
|
||||
path = path.lower()
|
||||
self.added_resources[path] = href
|
||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||
self.urldefrag = urldefrag
|
||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||
|
||||
self.log('Rewriting HTML links')
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
dpath = os.path.dirname(path)
|
||||
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
||||
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||
|
||||
for item in oeb.manifest.values():
|
||||
if item.media_type in self.OEB_STYLES:
|
||||
dpath = None
|
||||
for path, href in self.added_resources.items():
|
||||
if href == item.href:
|
||||
dpath = os.path.dirname(path)
|
||||
break
|
||||
cssutils.replaceUrls(item.data,
|
||||
partial(self.resource_adder, base=dpath))
|
||||
|
||||
toc = self.oeb.toc
|
||||
self.oeb.auto_generated_toc = True
|
||||
titles = []
|
||||
headers = []
|
||||
for item in self.oeb.spine:
|
||||
if not item.linear: continue
|
||||
html = item.data
|
||||
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
|
||||
title = re.sub(r'\s+', ' ', title.strip())
|
||||
if title:
|
||||
titles.append(title)
|
||||
headers.append('(unlabled)')
|
||||
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
|
||||
expr = '/h:html/h:body//h:%s[position()=1]/text()'
|
||||
header = ''.join(xpath(html, expr % tag))
|
||||
header = re.sub(r'\s+', ' ', header.strip())
|
||||
if header:
|
||||
headers[-1] = header
|
||||
break
|
||||
use = titles
|
||||
if len(titles) > len(set(titles)):
|
||||
use = headers
|
||||
for title, item in izip(use, self.oeb.spine):
|
||||
if not item.linear: continue
|
||||
toc.add(title, item.href)
|
||||
|
||||
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||
return oeb
|
||||
|
||||
def link_to_local_path(self, link_, base=None):
|
||||
if not isinstance(link_, unicode):
|
||||
try:
|
||||
link_ = link_.decode('utf-8', 'error')
|
||||
except:
|
||||
self.log.warn('Failed to decode link %r. Ignoring'%link_)
|
||||
return None, None
|
||||
try:
|
||||
l = Link(link_, base if base else os.getcwdu())
|
||||
except:
|
||||
self.log.exception('Failed to process link: %r'%link_)
|
||||
return None, None
|
||||
if l.path is None:
|
||||
# Not a local resource
|
||||
return None, None
|
||||
link = l.path.replace('/', os.sep).strip()
|
||||
frag = l.fragment
|
||||
if not link:
|
||||
return None, None
|
||||
return link, frag
|
||||
|
||||
def resource_adder(self, link_, base=None):
|
||||
link, frag = self.link_to_local_path(link_, base=base)
|
||||
if link is None:
|
||||
return link_
|
||||
try:
|
||||
if base and not os.path.isabs(link):
|
||||
link = os.path.join(base, link)
|
||||
link = os.path.abspath(link)
|
||||
except:
|
||||
return link_
|
||||
if not os.access(link, os.R_OK):
|
||||
return link_
|
||||
if os.path.isdir(link):
|
||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||
return link_
|
||||
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||
link = link.lower()
|
||||
if link not in self.added_resources:
|
||||
bhref = os.path.basename(link)
|
||||
id, href = self.oeb.manifest.generate(id='added',
|
||||
href=bhref)
|
||||
guessed = self.guess_type(href)[0]
|
||||
media_type = guessed or self.BINARY_MIME
|
||||
if media_type == 'text/plain':
|
||||
self.log.warn('Ignoring link to text file %r'%link_)
|
||||
return None
|
||||
|
||||
self.oeb.log.debug('Added', link)
|
||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||
self.oeb.log, ignore_opf=True)
|
||||
# Load into memory
|
||||
item = self.oeb.manifest.add(id, href, media_type)
|
||||
# bhref refers to an already existing file. The read() method of
|
||||
# DirContainer will call unquote on it before trying to read the
|
||||
# file, therefore we quote it here.
|
||||
if isinstance(bhref, unicode):
|
||||
bhref = bhref.encode('utf-8')
|
||||
item.html_input_href = quote(bhref).decode('utf-8')
|
||||
if guessed in self.OEB_STYLES:
|
||||
item.override_css_fetch = partial(
|
||||
self.css_import_handler, os.path.dirname(link))
|
||||
item.data
|
||||
self.added_resources[link] = href
|
||||
|
||||
nlink = self.added_resources[link]
|
||||
if frag:
|
||||
nlink = '#'.join((nlink, frag))
|
||||
return nlink
|
||||
|
||||
def css_import_handler(self, base, href):
|
||||
link, frag = self.link_to_local_path(href, base=base)
|
||||
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
|
||||
return (None, None)
|
||||
try:
|
||||
raw = open(link, 'rb').read().decode('utf-8', 'replace')
|
||||
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
|
||||
except:
|
||||
self.log.exception('Failed to read CSS file: %r'%link)
|
||||
return (None, None)
|
||||
return (None, raw)
|
||||
|
@ -4,7 +4,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
This package contains logic to read and write LRF files.
|
||||
The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
|
||||
"""
|
||||
from uuid import uuid4
|
||||
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \
|
||||
@ -60,6 +59,7 @@ def find_custom_fonts(options, logger):
|
||||
|
||||
def Book(options, logger, font_delta=0, header=None,
|
||||
profile=PRS500_PROFILE, **settings):
|
||||
from uuid import uuid4
|
||||
ps = {}
|
||||
ps['topmargin'] = options.top_margin
|
||||
ps['evensidemargin'] = options.left_margin
|
||||
|
@ -6,12 +6,11 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, textwrap, sys, operator
|
||||
import textwrap, operator
|
||||
from copy import deepcopy, copy
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre import guess_type
|
||||
|
||||
class Canvas(etree.XSLTExtension):
|
||||
@ -406,76 +405,4 @@ class Styles(etree.XSLTExtension):
|
||||
|
||||
|
||||
|
||||
class LRFInput(InputFormatPlugin):
|
||||
|
||||
name = 'LRF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert LRF files to HTML'
|
||||
file_types = set(['lrf'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
self.log = log
|
||||
self.log('Generating XML')
|
||||
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||
d = LRFDocument(stream)
|
||||
d.parse()
|
||||
xml = d.to_xml(write_files=True)
|
||||
if options.verbose > 2:
|
||||
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
||||
try:
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
except:
|
||||
self.log.warn('Failed to parse XML. Trying to recover')
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=True,
|
||||
recover=True)
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
|
||||
|
||||
char_button_map = {}
|
||||
for x in doc.xpath('//CharButton[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
|
||||
if jump_button:
|
||||
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
|
||||
if jump_to:
|
||||
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
|
||||
jump_to[0].get('refobj'))
|
||||
plot_map = {}
|
||||
for x in doc.xpath('//Plot[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
|
||||
if image:
|
||||
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
|
||||
image[0].get('refstream'))
|
||||
if imgstr:
|
||||
plot_map[ro] = imgstr[0].get('file')
|
||||
|
||||
self.log('Converting XML to HTML...')
|
||||
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
|
||||
media_type = MediaType()
|
||||
styles = Styles()
|
||||
text_block = TextBlock(styles, char_button_map, plot_map, log)
|
||||
canvas = Canvas(doc, styles, text_block, log)
|
||||
image_block = ImageBlock(canvas)
|
||||
ruled_line = RuledLine()
|
||||
extensions = {
|
||||
('calibre', 'media-type') : media_type,
|
||||
('calibre', 'text-block') : text_block,
|
||||
('calibre', 'ruled-line') : ruled_line,
|
||||
('calibre', 'styles') : styles,
|
||||
('calibre', 'canvas') : canvas,
|
||||
('calibre', 'image-block'): image_block,
|
||||
}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
try:
|
||||
result = transform(doc)
|
||||
except RuntimeError:
|
||||
sys.setrecursionlimit(5000)
|
||||
result = transform(doc)
|
||||
|
||||
with open('content.opf', 'wb') as f:
|
||||
f.write(result)
|
||||
styles.write()
|
||||
return os.path.abspath('content.opf')
|
||||
|
@ -710,7 +710,7 @@ class Metadata(object):
|
||||
fmt('Title sort', self.title_sort)
|
||||
if self.authors:
|
||||
fmt('Author(s)', authors_to_string(self.authors) + \
|
||||
((' [' + self.author_sort + ']')
|
||||
((' [' + self.author_sort + ']')
|
||||
if self.author_sort and self.author_sort != _('Unknown') else ''))
|
||||
if self.publisher:
|
||||
fmt('Publisher', self.publisher)
|
||||
|
@ -12,7 +12,6 @@ from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
|
||||
from calibre.constants import filesystem_encoding, preferred_encoding
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz
|
||||
from calibre.utils.magick import Image
|
||||
from calibre import isbytestring
|
||||
|
||||
# Translate datetimes to and from strings. The string form is the datetime in
|
||||
@ -37,6 +36,8 @@ def encode_thumbnail(thumbnail):
|
||||
'''
|
||||
Encode the image part of a thumbnail, then return the 3 part tuple
|
||||
'''
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
if thumbnail is None:
|
||||
return None
|
||||
if not isinstance(thumbnail, (tuple, list)):
|
||||
|
@ -129,9 +129,57 @@ class OCFDirReader(OCFReader):
|
||||
def open(self, path, *args, **kwargs):
|
||||
return open(os.path.join(self.root, path), *args, **kwargs)
|
||||
|
||||
def get_cover(opf, opf_path, stream, reader=None):
|
||||
def render_cover(opf, opf_path, zf, reader=None):
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
from calibre.utils.logging import default_log
|
||||
|
||||
cpage = opf.first_spine_item()
|
||||
if not cpage:
|
||||
return
|
||||
if reader is not None and reader.encryption_meta.is_encrypted(cpage):
|
||||
return
|
||||
|
||||
with TemporaryDirectory('_epub_meta') as tdir:
|
||||
with CurrentDir(tdir):
|
||||
zf.extractall()
|
||||
opf_path = opf_path.replace('/', os.sep)
|
||||
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
|
||||
if not os.path.exists(cpage):
|
||||
return
|
||||
|
||||
if isosx:
|
||||
# On OS X trying to render a HTML cover which uses embedded
|
||||
# fonts more than once in the same process causes a crash in Qt
|
||||
# so be safe and remove the fonts as well as any @font-face
|
||||
# rules
|
||||
for f in walk('.'):
|
||||
if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
|
||||
os.remove(f)
|
||||
ffpat = re.compile(br'@font-face.*?{.*?}',
|
||||
re.DOTALL|re.IGNORECASE)
|
||||
with open(cpage, 'r+b') as f:
|
||||
raw = f.read()
|
||||
f.truncate(0)
|
||||
raw = ffpat.sub(b'', raw)
|
||||
f.write(raw)
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
raw = xml_to_unicode(raw,
|
||||
strip_encoding_pats=True, resolve_entities=True)[0]
|
||||
from lxml import html
|
||||
for link in html.fromstring(raw).xpath('//link'):
|
||||
href = link.get('href', '')
|
||||
if href:
|
||||
path = os.path.join(os.path.dirname(cpage), href)
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r+b') as f:
|
||||
raw = f.read()
|
||||
f.truncate(0)
|
||||
raw = ffpat.sub(b'', raw)
|
||||
f.write(raw)
|
||||
|
||||
return render_html_svg_workaround(cpage, default_log)
|
||||
|
||||
def get_cover(opf, opf_path, stream, reader=None):
|
||||
raster_cover = opf.raster_cover
|
||||
stream.seek(0)
|
||||
zf = ZipFile(stream)
|
||||
@ -152,27 +200,7 @@ def get_cover(opf, opf_path, stream, reader=None):
|
||||
zf.close()
|
||||
return data
|
||||
|
||||
cpage = opf.first_spine_item()
|
||||
if not cpage:
|
||||
return
|
||||
if reader is not None and reader.encryption_meta.is_encrypted(cpage):
|
||||
return
|
||||
|
||||
with TemporaryDirectory('_epub_meta') as tdir:
|
||||
with CurrentDir(tdir):
|
||||
zf.extractall()
|
||||
if isosx:
|
||||
# On OS X trying to render an HTML cover which uses embedded
|
||||
# fonts more than once in the same process causes a crash in Qt
|
||||
# so be safe and remove the fonts.
|
||||
for f in walk('.'):
|
||||
if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
|
||||
os.remove(f)
|
||||
opf_path = opf_path.replace('/', os.sep)
|
||||
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
|
||||
if not os.path.exists(cpage):
|
||||
return
|
||||
return render_html_svg_workaround(cpage, default_log)
|
||||
return render_cover(opf, opf_path, zf, reader=reader)
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
""" Return metadata as a :class:`Metadata` object """
|
||||
|
@ -222,6 +222,11 @@ def forked_read_metadata(path, tdir):
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
with open(path, 'rb') as f:
|
||||
fmt = os.path.splitext(path)[1][1:].lower()
|
||||
f.seek(0, 2)
|
||||
sz = f.tell()
|
||||
with open(os.path.join(tdir, 'size.txt'), 'wb') as s:
|
||||
s.write(str(sz).encode('ascii'))
|
||||
f.seek(0)
|
||||
mi = get_metadata(f, fmt)
|
||||
if mi.cover_data and mi.cover_data[1]:
|
||||
with open(os.path.join(tdir, 'cover.jpg'), 'wb') as f:
|
||||
|
@ -1019,6 +1019,11 @@ class OPF(object): # {{{
|
||||
mt = item.get('media-type', '')
|
||||
if 'xml' not in mt:
|
||||
return item.get('href', None)
|
||||
for item in self.itermanifest():
|
||||
if item.get('href', None) == cover_id:
|
||||
mt = item.get('media-type', '')
|
||||
if mt.startswith('image/'):
|
||||
return item.get('href', None)
|
||||
|
||||
@dynamic_property
|
||||
def cover(self):
|
||||
|
@ -12,19 +12,14 @@ from urllib import urlencode
|
||||
from threading import Thread
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from lxml.html import tostring
|
||||
|
||||
from calibre import as_unicode
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
|
||||
fixauthors)
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.soupparser import fromstring
|
||||
|
||||
class Worker(Thread): # Get details {{{
|
||||
|
||||
@ -43,6 +38,8 @@ class Worker(Thread): # Get details {{{
|
||||
self.browser = browser.clone_browser()
|
||||
self.cover_url = self.amazon_id = self.isbn = None
|
||||
self.domain = domain
|
||||
from lxml.html import tostring
|
||||
self.tostring = tostring
|
||||
|
||||
months = {
|
||||
'de': {
|
||||
@ -176,6 +173,10 @@ class Worker(Thread): # Get details {{{
|
||||
self.log.exception('get_details failed for url: %r'%self.url)
|
||||
|
||||
def get_details(self):
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.soupparser import fromstring
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
try:
|
||||
raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
|
||||
except Exception as e:
|
||||
@ -210,7 +211,7 @@ class Worker(Thread): # Get details {{{
|
||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||
if errmsg:
|
||||
msg = 'Failed to parse amazon details page: %r'%self.url
|
||||
msg += tostring(errmsg, method='text', encoding=unicode).strip()
|
||||
msg += self.tostring(errmsg, method='text', encoding=unicode).strip()
|
||||
self.log.error(msg)
|
||||
return
|
||||
|
||||
@ -322,10 +323,10 @@ class Worker(Thread): # Get details {{{
|
||||
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
|
||||
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
|
||||
if actual_title:
|
||||
title = tostring(actual_title[0], encoding=unicode,
|
||||
title = self.tostring(actual_title[0], encoding=unicode,
|
||||
method='text').strip()
|
||||
else:
|
||||
title = tostring(tdiv, encoding=unicode, method='text').strip()
|
||||
title = self.tostring(tdiv, encoding=unicode, method='text').strip()
|
||||
return re.sub(r'[(\[].*[)\]]', '', title).strip()
|
||||
|
||||
def parse_authors(self, root):
|
||||
@ -337,7 +338,7 @@ class Worker(Thread): # Get details {{{
|
||||
''')
|
||||
for x in aname:
|
||||
x.tail = ''
|
||||
authors = [tostring(x, encoding=unicode, method='text').strip() for x
|
||||
authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
|
||||
in aname]
|
||||
authors = [a for a in authors if a]
|
||||
return authors
|
||||
@ -356,6 +357,8 @@ class Worker(Thread): # Get details {{{
|
||||
return float(m.group(1))/float(m.group(3)) * 5
|
||||
|
||||
def parse_comments(self, root):
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
|
||||
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
|
||||
if desc:
|
||||
desc = desc[0]
|
||||
@ -365,7 +368,7 @@ class Worker(Thread): # Get details {{{
|
||||
for a in desc.xpath('descendant::a[@href]'):
|
||||
del a.attrib['href']
|
||||
a.tag = 'span'
|
||||
desc = tostring(desc, method='html', encoding=unicode).strip()
|
||||
desc = self.tostring(desc, method='html', encoding=unicode).strip()
|
||||
|
||||
# Encoding bug in Amazon data U+fffd (replacement char)
|
||||
# in some examples it is present in place of '
|
||||
@ -602,6 +605,11 @@ class Amazon(Source):
|
||||
Note this method will retry without identifiers automatically if no
|
||||
match is found with identifiers.
|
||||
'''
|
||||
from lxml.html import tostring
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.soupparser import fromstring
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
query, domain = self.create_query(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if query is None:
|
||||
|
@ -12,7 +12,6 @@ from future_builtins import map
|
||||
|
||||
from calibre import browser, random_user_agent
|
||||
from calibre.customize import Plugin
|
||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||
from calibre.utils.config import JSONConfig
|
||||
from calibre.utils.titlecase import titlecase
|
||||
from calibre.utils.icu import capitalize, lower, upper
|
||||
@ -34,6 +33,7 @@ msprefs.defaults['fewer_tags'] = True
|
||||
msprefs.defaults['cover_priorities'] = {'Google':2}
|
||||
|
||||
def create_log(ostream=None):
|
||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
|
||||
log.outputs = [FileStream(ostream)]
|
||||
return log
|
||||
|
@ -12,14 +12,10 @@ from urllib import urlencode
|
||||
from functools import partial
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre import as_unicode
|
||||
|
||||
NAMESPACES = {
|
||||
@ -28,22 +24,6 @@ NAMESPACES = {
|
||||
'db': 'http://www.douban.com/xmlns/',
|
||||
'gd': 'http://schemas.google.com/g/2005'
|
||||
}
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
total_results = XPath('//openSearch:totalResults')
|
||||
start_index = XPath('//openSearch:startIndex')
|
||||
items_per_page = XPath('//openSearch:itemsPerPage')
|
||||
entry = XPath('//atom:entry')
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
title = XPath('descendant::atom:title')
|
||||
description = XPath('descendant::atom:summary')
|
||||
publisher = XPath("descendant::db:attribute[@name='publisher']")
|
||||
isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
||||
date = XPath("descendant::db:attribute[@name='pubdate']")
|
||||
creator = XPath("descendant::db:attribute[@name='author']")
|
||||
booktag = XPath("descendant::db:tag/attribute::name")
|
||||
rating = XPath("descendant::gd:rating/attribute::average")
|
||||
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
||||
|
||||
def get_details(browser, url, timeout): # {{{
|
||||
try:
|
||||
if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '':
|
||||
@ -61,6 +41,25 @@ def get_details(browser, url, timeout): # {{{
|
||||
# }}}
|
||||
|
||||
def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
from lxml import etree
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
entry = XPath('//atom:entry')
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
title = XPath('descendant::atom:title')
|
||||
description = XPath('descendant::atom:summary')
|
||||
publisher = XPath("descendant::db:attribute[@name='publisher']")
|
||||
isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
||||
date = XPath("descendant::db:attribute[@name='pubdate']")
|
||||
creator = XPath("descendant::db:attribute[@name='author']")
|
||||
booktag = XPath("descendant::db:tag/attribute::name")
|
||||
rating = XPath("descendant::gd:rating/attribute::average")
|
||||
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
||||
|
||||
|
||||
def get_text(extra, x):
|
||||
try:
|
||||
ans = x(extra)
|
||||
@ -275,6 +274,7 @@ class Douban(Source):
|
||||
|
||||
def get_all_details(self, br, log, entries, abort, # {{{
|
||||
result_queue, timeout):
|
||||
from lxml import etree
|
||||
for relevance, i in enumerate(entries):
|
||||
try:
|
||||
ans = to_metadata(br, log, i, timeout)
|
||||
@ -298,6 +298,13 @@ class Douban(Source):
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
entry = XPath('//atom:entry')
|
||||
|
||||
query = self.create_query(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if not query:
|
||||
|
@ -12,8 +12,6 @@ from urllib import urlencode
|
||||
from functools import partial
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
@ -29,23 +27,6 @@ NAMESPACES = {
|
||||
'dc' : 'http://purl.org/dc/terms',
|
||||
'gd' : 'http://schemas.google.com/g/2005'
|
||||
}
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
|
||||
total_results = XPath('//openSearch:totalResults')
|
||||
start_index = XPath('//openSearch:startIndex')
|
||||
items_per_page = XPath('//openSearch:itemsPerPage')
|
||||
entry = XPath('//atom:entry')
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
creator = XPath('descendant::dc:creator')
|
||||
identifier = XPath('descendant::dc:identifier')
|
||||
title = XPath('descendant::dc:title')
|
||||
date = XPath('descendant::dc:date')
|
||||
publisher = XPath('descendant::dc:publisher')
|
||||
subject = XPath('descendant::dc:subject')
|
||||
description = XPath('descendant::dc:description')
|
||||
language = XPath('descendant::dc:language')
|
||||
rating = XPath('descendant::gd:rating[@average]')
|
||||
|
||||
def get_details(browser, url, timeout): # {{{
|
||||
try:
|
||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||
@ -61,6 +42,24 @@ def get_details(browser, url, timeout): # {{{
|
||||
# }}}
|
||||
|
||||
def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
from lxml import etree
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
|
||||
# total_results = XPath('//openSearch:totalResults')
|
||||
# start_index = XPath('//openSearch:startIndex')
|
||||
# items_per_page = XPath('//openSearch:itemsPerPage')
|
||||
entry = XPath('//atom:entry')
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
creator = XPath('descendant::dc:creator')
|
||||
identifier = XPath('descendant::dc:identifier')
|
||||
title = XPath('descendant::dc:title')
|
||||
date = XPath('descendant::dc:date')
|
||||
publisher = XPath('descendant::dc:publisher')
|
||||
subject = XPath('descendant::dc:subject')
|
||||
description = XPath('descendant::dc:description')
|
||||
language = XPath('descendant::dc:language')
|
||||
rating = XPath('descendant::gd:rating[@average]')
|
||||
|
||||
|
||||
def get_text(extra, x):
|
||||
try:
|
||||
@ -266,6 +265,7 @@ class GoogleBooks(Source):
|
||||
|
||||
def get_all_details(self, br, log, entries, abort, # {{{
|
||||
result_queue, timeout):
|
||||
from lxml import etree
|
||||
for relevance, i in enumerate(entries):
|
||||
try:
|
||||
ans = to_metadata(br, log, i, timeout)
|
||||
@ -289,6 +289,10 @@ class GoogleBooks(Source):
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
from lxml import etree
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
entry = XPath('//atom:entry')
|
||||
|
||||
query = self.create_query(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if not query:
|
||||
|
@ -9,12 +9,9 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from urllib import quote
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.icu import lower
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
|
||||
@ -122,6 +119,7 @@ class ISBNDB(Source):
|
||||
result_queue.put(result)
|
||||
|
||||
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
|
||||
from lxml import etree
|
||||
|
||||
def tostring(x):
|
||||
if x is None:
|
||||
@ -198,6 +196,10 @@ class ISBNDB(Source):
|
||||
|
||||
def make_query(self, q, abort, title=None, authors=None, identifiers={},
|
||||
max_pages=10, timeout=30):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
page_num = 1
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
br = self.browser
|
||||
|
@ -9,18 +9,14 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Fetch metadata using Overdrive Content Reserve
|
||||
'''
|
||||
import re, random, mechanize, copy, json
|
||||
import re, random, copy, json
|
||||
from threading import RLock
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from lxml import html
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
from calibre.utils.soupparser import fromstring
|
||||
|
||||
ovrdrv_data_cache = {}
|
||||
cache_lock = RLock()
|
||||
@ -80,6 +76,7 @@ class OverDrive(Source):
|
||||
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
import mechanize
|
||||
cached_url = self.get_cached_cover_url(identifiers)
|
||||
if cached_url is None:
|
||||
log.info('No cached cover found, running identify')
|
||||
@ -170,6 +167,7 @@ class OverDrive(Source):
|
||||
this page attempts to set a cookie that Mechanize doesn't like
|
||||
copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
|
||||
'''
|
||||
import mechanize
|
||||
goodcookies = br._ua_handlers['_cookies'].cookiejar
|
||||
clean_cj = mechanize.CookieJar()
|
||||
cookies_to_copy = []
|
||||
@ -187,6 +185,7 @@ class OverDrive(Source):
|
||||
br.set_cookiejar(clean_cj)
|
||||
|
||||
def overdrive_search(self, br, log, q, title, author):
|
||||
import mechanize
|
||||
# re-initialize the cookiejar to so that it's clean
|
||||
clean_cj = mechanize.CookieJar()
|
||||
br.set_cookiejar(clean_cj)
|
||||
@ -303,6 +302,7 @@ class OverDrive(Source):
|
||||
return ''
|
||||
|
||||
def overdrive_get_record(self, br, log, q, ovrdrv_id):
|
||||
import mechanize
|
||||
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
|
||||
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
|
||||
|
||||
@ -393,6 +393,11 @@ class OverDrive(Source):
|
||||
|
||||
|
||||
def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
|
||||
from lxml import html
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.soupparser import fromstring
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
|
||||
try:
|
||||
raw = br.open_novisit(metadata_url).read()
|
||||
except Exception, e:
|
||||
|
@ -6,15 +6,11 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
import urllib2
|
||||
import datetime
|
||||
from urllib import quote_plus
|
||||
from Queue import Queue, Empty
|
||||
from lxml import etree, html
|
||||
|
||||
from calibre import as_unicode
|
||||
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
@ -43,6 +39,7 @@ class Ozon(Source):
|
||||
isbnRegex = re.compile(isbnPattern)
|
||||
|
||||
def get_book_url(self, identifiers): # {{{
|
||||
import urllib2
|
||||
ozon_id = identifiers.get('ozon', None)
|
||||
res = None
|
||||
if ozon_id:
|
||||
@ -81,6 +78,9 @@ class Ozon(Source):
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||
identifiers={}, timeout=30): # {{{
|
||||
from lxml import etree
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
if not self.is_configured():
|
||||
return
|
||||
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
|
||||
@ -283,6 +283,9 @@ class Ozon(Source):
|
||||
# }}}
|
||||
|
||||
def get_book_details(self, log, metadata, timeout): # {{{
|
||||
from lxml import html, etree
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
url = self.get_book_url(metadata.get_identifiers())[2]
|
||||
|
||||
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
|
@ -12,7 +12,6 @@ from lxml import etree
|
||||
from odf.odf2xhtml import ODF2XHTML
|
||||
|
||||
from calibre import CurrentDir, walk
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class Extract(ODF2XHTML):
|
||||
|
||||
@ -178,16 +177,4 @@ class Extract(ODF2XHTML):
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
|
||||
class ODTInput(InputFormatPlugin):
|
||||
|
||||
name = 'ODT Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert ODT (OpenOffice) files to HTML'
|
||||
file_types = set(['odt'])
|
||||
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
return Extract()(stream, '.', log)
|
||||
|
||||
|
||||
|
@ -7,31 +7,38 @@ __docformat__ = 'restructuredtext en'
|
||||
class PDBError(Exception):
|
||||
pass
|
||||
|
||||
FORMAT_READERS = None
|
||||
|
||||
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
||||
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
|
||||
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
|
||||
def _import_readers():
|
||||
global FORMAT_READERS
|
||||
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
|
||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
|
||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
||||
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
|
||||
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
|
||||
|
||||
FORMAT_READERS = {
|
||||
'PNPdPPrs': ereader_reader,
|
||||
'PNRdPPrs': ereader_reader,
|
||||
'zTXTGPlm': ztxt_reader,
|
||||
'TEXtREAd': palmdoc_reader,
|
||||
'.pdfADBE': pdf_reader,
|
||||
'DataPlkr': plucker_reader,
|
||||
}
|
||||
FORMAT_READERS = {
|
||||
'PNPdPPrs': ereader_reader,
|
||||
'PNRdPPrs': ereader_reader,
|
||||
'zTXTGPlm': ztxt_reader,
|
||||
'TEXtREAd': palmdoc_reader,
|
||||
'.pdfADBE': pdf_reader,
|
||||
'DataPlkr': plucker_reader,
|
||||
}
|
||||
|
||||
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
||||
from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
|
||||
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
|
||||
ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
|
||||
FORMAT_WRITERS = None
|
||||
def _import_writers():
|
||||
global FORMAT_WRITERS
|
||||
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
|
||||
from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
|
||||
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
|
||||
|
||||
FORMAT_WRITERS = {
|
||||
'doc': palmdoc_writer,
|
||||
'ztxt': ztxt_writer,
|
||||
'ereader': ereader_writer,
|
||||
}
|
||||
FORMAT_WRITERS = {
|
||||
'doc': palmdoc_writer,
|
||||
'ztxt': ztxt_writer,
|
||||
'ereader': ereader_writer,
|
||||
}
|
||||
|
||||
IDENTITY_TO_NAME = {
|
||||
'PNPdPPrs': 'eReader',
|
||||
@ -69,11 +76,17 @@ def get_reader(identity):
|
||||
'''
|
||||
Returns None if no reader is found for the identity.
|
||||
'''
|
||||
global FORMAT_READERS
|
||||
if FORMAT_READERS is None:
|
||||
_import_readers()
|
||||
return FORMAT_READERS.get(identity, None)
|
||||
|
||||
def get_writer(extension):
|
||||
'''
|
||||
Returns None if no writer is found for extension.
|
||||
'''
|
||||
global FORMAT_WRITERS
|
||||
if FORMAT_WRITERS is None:
|
||||
_import_writers()
|
||||
return FORMAT_WRITERS.get(extension, None)
|
||||
|
||||
|
@ -65,7 +65,7 @@ class Reader(object):
|
||||
name = urlunquote(self.stream.read(32).strip('\x00'))
|
||||
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
|
||||
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
|
||||
|
||||
|
||||
return toc
|
||||
|
||||
def get_text(self, toc_item, output_dir):
|
||||
@ -89,7 +89,7 @@ class Reader(object):
|
||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||
html.write(output.encode('utf-8'))
|
||||
html.write(output.replace('<TITLE>', '<TITLE> ').encode('utf-8'))
|
||||
|
||||
def get_image(self, toc_item, output_dir):
|
||||
if toc_item.flags != 0:
|
||||
@ -105,7 +105,7 @@ class Reader(object):
|
||||
self.log.debug('Extracting content from file...')
|
||||
html = []
|
||||
images = []
|
||||
|
||||
|
||||
for item in self.toc:
|
||||
if item.name.lower().endswith('html'):
|
||||
self.log.debug('HTML item %s found...' % item.name)
|
||||
|
@ -2,42 +2,9 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, glob, re, textwrap
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
border_style_map = {
|
||||
'single' : 'solid',
|
||||
'double-thickness-border' : 'double',
|
||||
'shadowed-border': 'outset',
|
||||
'double-border': 'double',
|
||||
'dotted-border': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'hairline': 'solid',
|
||||
'inset': 'inset',
|
||||
'dash-small': 'dashed',
|
||||
'dot-dash': 'dotted',
|
||||
'dot-dot-dash': 'dotted',
|
||||
'outset': 'outset',
|
||||
'tripple': 'double',
|
||||
'triple': 'double',
|
||||
'thick-thin-small': 'solid',
|
||||
'thin-thick-small': 'solid',
|
||||
'thin-thick-thin-small': 'solid',
|
||||
'thick-thin-medium': 'solid',
|
||||
'thin-thick-medium': 'solid',
|
||||
'thin-thick-thin-medium': 'solid',
|
||||
'thick-thin-large': 'solid',
|
||||
'thin-thick-thin-large': 'solid',
|
||||
'wavy': 'ridge',
|
||||
'double-wavy': 'ridge',
|
||||
'striped': 'ridge',
|
||||
'emboss': 'inset',
|
||||
'engrave': 'inset',
|
||||
'frame': 'ridge',
|
||||
}
|
||||
|
||||
class InlineClass(etree.XSLTExtension):
|
||||
|
||||
@ -71,261 +38,3 @@ class InlineClass(etree.XSLTExtension):
|
||||
output_parent.text = ' '.join(classes)
|
||||
|
||||
|
||||
class RTFInput(InputFormatPlugin):
|
||||
|
||||
name = 'RTF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert RTF files to HTML'
|
||||
file_types = set(['rtf'])
|
||||
|
||||
def generate_xml(self, stream):
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
ofile = 'dataxml.xml'
|
||||
run_lev, debug_dir, indent_out = 1, None, 0
|
||||
if getattr(self.opts, 'debug_pipeline', None) is not None:
|
||||
try:
|
||||
os.mkdir('rtfdebug')
|
||||
debug_dir = 'rtfdebug'
|
||||
run_lev = 4
|
||||
indent_out = 1
|
||||
self.log('Running RTFParser in debug mode')
|
||||
except:
|
||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||
parser = ParseRtf(
|
||||
in_file = stream,
|
||||
out_file = ofile,
|
||||
# Convert symbol fonts to unicode equivalents. Default
|
||||
# is 1
|
||||
convert_symbol = 1,
|
||||
|
||||
# Convert Zapf fonts to unicode equivalents. Default
|
||||
# is 1.
|
||||
convert_zapf = 1,
|
||||
|
||||
# Convert Wingding fonts to unicode equivalents.
|
||||
# Default is 1.
|
||||
convert_wingdings = 1,
|
||||
|
||||
# Convert RTF caps to real caps.
|
||||
# Default is 1.
|
||||
convert_caps = 1,
|
||||
|
||||
# Indent resulting XML.
|
||||
# Default is 0 (no indent).
|
||||
indent = indent_out,
|
||||
|
||||
# Form lists from RTF. Default is 1.
|
||||
form_lists = 1,
|
||||
|
||||
# Convert headings to sections. Default is 0.
|
||||
headings_to_sections = 1,
|
||||
|
||||
# Group paragraphs with the same style name. Default is 1.
|
||||
group_styles = 1,
|
||||
|
||||
# Group borders. Default is 1.
|
||||
group_borders = 1,
|
||||
|
||||
# Write or do not write paragraphs. Default is 0.
|
||||
empty_paragraphs = 1,
|
||||
|
||||
#debug
|
||||
deb_dir = debug_dir,
|
||||
run_level = run_lev,
|
||||
)
|
||||
parser.parse_rtf()
|
||||
with open(ofile, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def extract_images(self, picts):
|
||||
import imghdr
|
||||
self.log('Extracting images...')
|
||||
|
||||
with open(picts, 'rb') as f:
|
||||
raw = f.read()
|
||||
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
|
||||
hex = re.compile(r'[^a-fA-F0-9]')
|
||||
encs = [hex.sub('', pict) for pict in picts]
|
||||
|
||||
count = 0
|
||||
imap = {}
|
||||
for enc in encs:
|
||||
if len(enc) % 2 == 1:
|
||||
enc = enc[:-1]
|
||||
data = enc.decode('hex')
|
||||
fmt = imghdr.what(None, data)
|
||||
if fmt is None:
|
||||
fmt = 'wmf'
|
||||
count += 1
|
||||
name = '%04d.%s' % (count, fmt)
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
imap[count] = name
|
||||
# with open(name+'.hex', 'wb') as f:
|
||||
# f.write(enc)
|
||||
return self.convert_images(imap)
|
||||
|
||||
def convert_images(self, imap):
|
||||
self.default_img = None
|
||||
for count, val in imap.iteritems():
|
||||
try:
|
||||
imap[count] = self.convert_image(val)
|
||||
except:
|
||||
self.log.exception('Failed to convert', val)
|
||||
return imap
|
||||
|
||||
def convert_image(self, name):
|
||||
if not name.endswith('.wmf'):
|
||||
return name
|
||||
try:
|
||||
return self.rasterize_wmf(name)
|
||||
except:
|
||||
self.log.exception('Failed to convert WMF image %r'%name)
|
||||
return self.replace_wmf(name)
|
||||
|
||||
def replace_wmf(self, name):
|
||||
from calibre.ebooks import calibre_cover
|
||||
if self.default_img is None:
|
||||
self.default_img = calibre_cover('Conversion of WMF images is not supported',
|
||||
'Use Microsoft Word or OpenOffice to save this RTF file'
|
||||
' as HTML and convert that in calibre.', title_size=36,
|
||||
author_size=20)
|
||||
name = name.replace('.wmf', '.jpg')
|
||||
with open(name, 'wb') as f:
|
||||
f.write(self.default_img)
|
||||
return name
|
||||
|
||||
def rasterize_wmf(self, name):
|
||||
from calibre.utils.wmf.parse import wmf_unwrap
|
||||
with open(name, 'rb') as f:
|
||||
data = f.read()
|
||||
data = wmf_unwrap(data)
|
||||
name = name.replace('.wmf', '.png')
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
return name
|
||||
|
||||
|
||||
def write_inline_css(self, ic, border_styles):
|
||||
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
|
||||
enumerate(ic.font_sizes)]
|
||||
color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
|
||||
enumerate(ic.colors)]
|
||||
css = textwrap.dedent('''
|
||||
span.none {
|
||||
text-decoration: none; font-weight: normal;
|
||||
font-style: normal; font-variant: normal
|
||||
}
|
||||
|
||||
span.italics { font-style: italic }
|
||||
|
||||
span.bold { font-weight: bold }
|
||||
|
||||
span.small-caps { font-variant: small-caps }
|
||||
|
||||
span.underlined { text-decoration: underline }
|
||||
|
||||
span.strike-through { text-decoration: line-through }
|
||||
|
||||
''')
|
||||
css += '\n'+'\n'.join(font_size_classes)
|
||||
css += '\n' +'\n'.join(color_classes)
|
||||
|
||||
for cls, val in border_styles.iteritems():
|
||||
css += '\n\n.%s {\n%s\n}'%(cls, val)
|
||||
|
||||
with open('styles.css', 'ab') as f:
|
||||
f.write(css)
|
||||
|
||||
def convert_borders(self, doc):
|
||||
border_styles = []
|
||||
style_map = {}
|
||||
for elem in doc.xpath(r'//*[local-name()="cell"]'):
|
||||
style = ['border-style: hidden', 'border-width: 1px',
|
||||
'border-color: black']
|
||||
for x in ('bottom', 'top', 'left', 'right'):
|
||||
bs = elem.get('border-cell-%s-style'%x, None)
|
||||
if bs:
|
||||
cbs = border_style_map.get(bs, 'solid')
|
||||
style.append('border-%s-style: %s'%(x, cbs))
|
||||
bw = elem.get('border-cell-%s-line-width'%x, None)
|
||||
if bw:
|
||||
style.append('border-%s-width: %spt'%(x, bw))
|
||||
bc = elem.get('border-cell-%s-color'%x, None)
|
||||
if bc:
|
||||
style.append('border-%s-color: %s'%(x, bc))
|
||||
style = ';\n'.join(style)
|
||||
if style not in border_styles:
|
||||
border_styles.append(style)
|
||||
idx = border_styles.index(style)
|
||||
cls = 'border_style%d'%idx
|
||||
style_map[cls] = style
|
||||
elem.set('class', cls)
|
||||
return style_map
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||
self.opts = options
|
||||
self.log = log
|
||||
self.log('Converting RTF to XML...')
|
||||
try:
|
||||
xml = self.generate_xml(stream.name)
|
||||
except RtfInvalidCodeException as e:
|
||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||
'support. Convert it to HTML first and then try it.\n%s')%e)
|
||||
|
||||
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
||||
if d:
|
||||
imap = {}
|
||||
try:
|
||||
imap = self.extract_images(d[0])
|
||||
except:
|
||||
self.log.exception('Failed to extract images...')
|
||||
|
||||
self.log('Parsing XML...')
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
border_styles = self.convert_borders(doc)
|
||||
for pict in doc.xpath('//rtf:pict[@num]',
|
||||
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
|
||||
num = int(pict.get('num'))
|
||||
name = imap.get(num, None)
|
||||
if name is not None:
|
||||
pict.set('num', name)
|
||||
|
||||
self.log('Converting XML to HTML...')
|
||||
inline_class = InlineClass(self.log)
|
||||
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
|
||||
extensions = { ('calibre', 'inline-class') : inline_class }
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
result = transform(doc)
|
||||
html = 'index.xhtml'
|
||||
with open(html, 'wb') as f:
|
||||
res = transform.tostring(result)
|
||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
#clean multiple \n
|
||||
res = re.sub('\n+', '\n', res)
|
||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||
# res = re.sub('\s*<body>', '<body>', res)
|
||||
# res = re.sub('(?<=\n)\n{2}',
|
||||
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
f.write(res)
|
||||
self.write_inline_css(inline_class, border_styles)
|
||||
stream.seek(0)
|
||||
mi = get_metadata(stream, 'rtf')
|
||||
if not mi.title:
|
||||
mi.title = _('Unknown')
|
||||
if not mi.authors:
|
||||
mi.authors = [_('Unknown')]
|
||||
opf = OPFCreator(os.getcwd(), mi)
|
||||
opf.create_manifest([('index.xhtml', None)])
|
||||
opf.create_spine(['index.xhtml'])
|
||||
opf.render(open('metadata.opf', 'wb'))
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
#ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
|
||||
# os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
|
||||
# debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
|
||||
|
@ -16,7 +16,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
|
||||
|
||||
def clean_txt(txt):
|
||||
'''
|
||||
@ -28,7 +28,7 @@ def clean_txt(txt):
|
||||
# Strip whitespace from the end of the line. Also replace
|
||||
# all line breaks with \n.
|
||||
txt = '\n'.join([line.rstrip() for line in txt.splitlines()])
|
||||
|
||||
|
||||
# Replace whitespace at the beginning of the line with
|
||||
txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', ' ' * 4, txt)
|
||||
|
||||
@ -75,7 +75,7 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
||||
'''
|
||||
Converts plain text to html by putting all paragraphs in
|
||||
<p> tags. It condense and retains blank lines when necessary.
|
||||
|
||||
|
||||
Requires paragraphs to be in single line format.
|
||||
'''
|
||||
txt = clean_txt(txt)
|
||||
@ -215,7 +215,7 @@ def detect_paragraph_type(txt):
|
||||
def detect_formatting_type(txt):
|
||||
'''
|
||||
Tries to determine the formatting of the document.
|
||||
|
||||
|
||||
markdown: Markdown formatting is used.
|
||||
textile: Textile formatting is used.
|
||||
heuristic: When none of the above formatting types are
|
||||
|
@ -102,6 +102,7 @@ gprefs.defaults['cb_fullscreen'] = False
|
||||
gprefs.defaults['worker_max_time'] = 0
|
||||
gprefs.defaults['show_files_after_save'] = True
|
||||
gprefs.defaults['auto_add_path'] = None
|
||||
gprefs.defaults['auto_add_check_for_duplicates'] = False
|
||||
# }}}
|
||||
|
||||
NONE = QVariant() #: Null value to return from the data function of item models
|
||||
|
@ -382,7 +382,8 @@ class Adder(QObject): # {{{
|
||||
if not duplicates:
|
||||
return self.duplicates_processed()
|
||||
self.pd.hide()
|
||||
files = [x[0].title for x in duplicates]
|
||||
files = [_('%s by %s')%(x[0].title, x[0].format_field('authors')[1])
|
||||
for x in duplicates]
|
||||
if question_dialog(self._parent, _('Duplicates found!'),
|
||||
_('Books with the same title as the following already '
|
||||
'exist in the database. Add them anyway?'),
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, tempfile, shutil
|
||||
import os, tempfile, shutil, time
|
||||
from threading import Thread, Event
|
||||
|
||||
from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
|
||||
@ -15,6 +15,7 @@ from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
|
||||
from calibre import prints
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.gui2 import question_dialog, gprefs
|
||||
|
||||
class Worker(Thread):
|
||||
|
||||
@ -41,25 +42,58 @@ class Worker(Thread):
|
||||
traceback.print_exc()
|
||||
|
||||
def auto_add(self):
|
||||
from calibre.utils.ipc.simple_worker import fork_job
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.metadata.meta import metadata_from_filename
|
||||
|
||||
files = [x for x in os.listdir(self.path) if x not in self.staging
|
||||
and os.path.isfile(os.path.join(self.path, x)) and
|
||||
os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and
|
||||
os.path.splitext(x)[1][1:].lower() in self.be]
|
||||
files = [x for x in os.listdir(self.path) if
|
||||
# Must not be in the process of being added to the db
|
||||
x not in self.staging
|
||||
# Firefox creates 0 byte placeholder files when downloading
|
||||
and os.stat(os.path.join(self.path, x)).st_size > 0
|
||||
# Must be a file
|
||||
and os.path.isfile(os.path.join(self.path, x))
|
||||
# Must have read and write permissions
|
||||
and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK)
|
||||
# Must be a known ebook file type
|
||||
and os.path.splitext(x)[1][1:].lower() in self.be
|
||||
]
|
||||
data = {}
|
||||
# Give any in progress copies time to complete
|
||||
time.sleep(2)
|
||||
|
||||
for fname in files:
|
||||
f = os.path.join(self.path, fname)
|
||||
|
||||
# Try opening the file for reading, if the OS prevents us, then at
|
||||
# least on windows, it means the file is open in another
|
||||
# application for writing. We will get notified by
|
||||
# QFileSystemWatcher when writing is completed, so ignore for now.
|
||||
try:
|
||||
open(f, 'rb').close()
|
||||
except:
|
||||
continue
|
||||
tdir = tempfile.mkdtemp(dir=self.tdir)
|
||||
try:
|
||||
fork_job('calibre.ebooks.metadata.meta',
|
||||
'forked_read_metadata', (f, tdir), no_output=True)
|
||||
except WorkerError as e:
|
||||
prints('Failed to read metadata from:', fname)
|
||||
prints(e.orig_tb)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Ensure that the pre-metadata file size is present. If it isn't,
|
||||
# write 0 so that the file is rescanned
|
||||
szpath = os.path.join(tdir, 'size.txt')
|
||||
try:
|
||||
with open(szpath, 'rb') as f:
|
||||
int(f.read())
|
||||
except:
|
||||
with open(szpath, 'wb') as f:
|
||||
f.write(b'0')
|
||||
|
||||
opfpath = os.path.join(tdir, 'metadata.opf')
|
||||
try:
|
||||
if os.stat(opfpath).st_size < 30:
|
||||
@ -125,25 +159,71 @@ class AutoAdder(QObject):
|
||||
m = gui.library_view.model()
|
||||
count = 0
|
||||
|
||||
needs_rescan = False
|
||||
duplicates = []
|
||||
|
||||
for fname, tdir in data.iteritems():
|
||||
paths = [os.path.join(self.worker.path, fname)]
|
||||
sz = os.path.join(tdir, 'size.txt')
|
||||
try:
|
||||
with open(sz, 'rb') as f:
|
||||
sz = int(f.read())
|
||||
if sz != os.stat(paths[0]).st_size:
|
||||
raise Exception('Looks like the file was written to after'
|
||||
' we tried to read metadata')
|
||||
except:
|
||||
needs_rescan = True
|
||||
try:
|
||||
self.worker.staging.remove(fname)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
continue
|
||||
|
||||
mi = os.path.join(tdir, 'metadata.opf')
|
||||
if not os.access(mi, os.R_OK):
|
||||
continue
|
||||
mi = [OPF(open(mi, 'rb'), tdir,
|
||||
populate_spine=False).to_book_metadata()]
|
||||
m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi,
|
||||
add_duplicates=True)
|
||||
dups, num = m.add_books(paths,
|
||||
[os.path.splitext(fname)[1][1:].upper()], mi,
|
||||
add_duplicates=not gprefs['auto_add_check_for_duplicates'])
|
||||
if dups:
|
||||
path = dups[0][0]
|
||||
with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
|
||||
'wb') as dest, open(path, 'rb') as src:
|
||||
shutil.copyfileobj(src, dest)
|
||||
dups[0][0] = dest.name
|
||||
duplicates.append(dups)
|
||||
|
||||
try:
|
||||
os.remove(paths[0])
|
||||
self.worker.staging.remove(fname)
|
||||
except:
|
||||
pass
|
||||
count += num
|
||||
|
||||
if duplicates:
|
||||
paths, formats, metadata = [], [], []
|
||||
for p, f, mis in duplicates:
|
||||
paths.extend(p)
|
||||
formats.extend(f)
|
||||
metadata.extend(mis)
|
||||
files = [_('%s by %s')%(mi.title, mi.format_field('authors')[1])
|
||||
for mi in metadata]
|
||||
if question_dialog(self.parent(), _('Duplicates found!'),
|
||||
_('Books with the same title as the following already '
|
||||
'exist in the database. Add them anyway?'),
|
||||
'\n'.join(files)):
|
||||
dups, num = m.add_books(paths, formats, metadata,
|
||||
add_duplicates=True)
|
||||
count += num
|
||||
|
||||
for tdir in data.itervalues():
|
||||
try:
|
||||
os.remove(os.path.join(self.worker.path, fname))
|
||||
try:
|
||||
self.worker.staging.remove(fname)
|
||||
except KeyError:
|
||||
pass
|
||||
shutil.rmtree(tdir)
|
||||
except:
|
||||
pass
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
m.books_added(count)
|
||||
@ -153,4 +233,7 @@ class AutoAdder(QObject):
|
||||
if hasattr(gui, 'db_images'):
|
||||
gui.db_images.reset()
|
||||
|
||||
if needs_rescan:
|
||||
QTimer.singleShot(2000, self.dir_changed)
|
||||
|
||||
|
||||
|
@ -38,14 +38,24 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
|
||||
ans = unicode(col.name())
|
||||
return ans
|
||||
|
||||
f = QFontInfo(QApplication.font(widget)).pixelSize()
|
||||
fi = QFontInfo(QApplication.font(widget))
|
||||
f = fi.pixelSize()+1
|
||||
fam = unicode(fi.family()).strip().replace('"', '')
|
||||
if not fam:
|
||||
fam = 'sans-serif'
|
||||
|
||||
c = color_to_string(QApplication.palette().color(QPalette.Normal,
|
||||
QPalette.WindowText))
|
||||
templ = u'''\
|
||||
<html>
|
||||
<head>
|
||||
<style type="text/css">
|
||||
body, td {background-color: transparent; font-size: %dpx; color: %s }
|
||||
body, td {
|
||||
background-color: transparent;
|
||||
font-size: %dpx;
|
||||
font-family: "%s",sans-serif;
|
||||
color: %s
|
||||
}
|
||||
</style>
|
||||
<style type="text/css">
|
||||
%s
|
||||
@ -55,7 +65,7 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
|
||||
%%s
|
||||
</body>
|
||||
<html>
|
||||
'''%(f, c, css)
|
||||
'''%(f, fam, c, css)
|
||||
fm = getattr(mi, 'field_metadata', field_metadata)
|
||||
fl = dict(get_field_list(fm))
|
||||
show_comments = (all_fields or fl.get('comments', True))
|
||||
|
@ -31,7 +31,7 @@ class PluginWidget(QWidget, Ui_Form):
|
||||
self.setupUi(self)
|
||||
|
||||
def initialize(self, name, db): #not working properly to update
|
||||
from calibre.library.catalog import FIELDS
|
||||
from calibre.library.catalogs import FIELDS
|
||||
|
||||
self.all_fields = [x for x in FIELDS if x != 'all']
|
||||
#add custom columns
|
||||
|
@ -21,7 +21,7 @@ class PluginWidget(QWidget, Ui_Form):
|
||||
def __init__(self, parent=None):
|
||||
QWidget.__init__(self, parent)
|
||||
self.setupUi(self)
|
||||
from calibre.library.catalog import FIELDS
|
||||
from calibre.library.catalogs import FIELDS
|
||||
self.all_fields = []
|
||||
for x in FIELDS:
|
||||
if x != 'all':
|
||||
|
@ -251,8 +251,12 @@ class EditorWidget(QWebView): # {{{
|
||||
|
||||
def fset(self, val):
|
||||
self.setHtml(val)
|
||||
f = QFontInfo(QApplication.font(self)).pixelSize()
|
||||
style = 'font-size: %dpx;' % (f,)
|
||||
fi = QFontInfo(QApplication.font(self))
|
||||
f = fi.pixelSize()+1
|
||||
fam = unicode(fi.family()).strip().replace('"', '')
|
||||
if not fam:
|
||||
fam = 'sans-serif'
|
||||
style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
|
||||
|
||||
# toList() is needed because PyQt on Debian is old/broken
|
||||
for body in self.page().mainFrame().documentElement().findAll('body').toList():
|
||||
|
@ -7,7 +7,8 @@ import os, traceback, Queue, time, cStringIO, re, sys
|
||||
from threading import Thread
|
||||
|
||||
from PyQt4.Qt import (QMenu, QAction, QActionGroup, QIcon, SIGNAL,
|
||||
Qt, pyqtSignal, QDialog, QObject)
|
||||
Qt, pyqtSignal, QDialog, QObject, QVBoxLayout,
|
||||
QDialogButtonBox)
|
||||
|
||||
from calibre.customize.ui import (available_input_formats, available_output_formats,
|
||||
device_plugins)
|
||||
@ -718,6 +719,31 @@ class DeviceMixin(object): # {{{
|
||||
def disconnect_mounted_device(self):
|
||||
self.device_manager.umount_device()
|
||||
|
||||
def configure_connected_device(self):
|
||||
if not self.device_manager.is_device_connected: return
|
||||
if self.job_manager.has_device_jobs(queued_also=True):
|
||||
return error_dialog(self, _('Running jobs'),
|
||||
_('Cannot configure the device while there are running'
|
||||
' device jobs.'), show=True)
|
||||
dev = self.device_manager.connected_device
|
||||
cw = dev.config_widget()
|
||||
d = QDialog(self)
|
||||
d.setWindowTitle(_('Configure %s')%dev.get_gui_name())
|
||||
d.setWindowIcon(QIcon(I('config.png')))
|
||||
l = QVBoxLayout(d)
|
||||
d.setLayout(l)
|
||||
bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel)
|
||||
bb.accepted.connect(d.accept)
|
||||
bb.rejected.connect(d.reject)
|
||||
l.addWidget(cw)
|
||||
l.addWidget(bb)
|
||||
if d.exec_() == d.Accepted:
|
||||
dev.save_settings(cw)
|
||||
warning_dialog(self, _('Disconnect device'),
|
||||
_('Disconnect and re-connect the %s for your changes to'
|
||||
' be applied.')%dev.get_gui_name(), show=True,
|
||||
show_copy_button=False)
|
||||
|
||||
def _sync_action_triggered(self, *args):
|
||||
m = getattr(self, '_sync_menu', None)
|
||||
if m is not None:
|
||||
|
@ -97,6 +97,7 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
|
||||
l.setWordWrap(True)
|
||||
if settings.extra_customization:
|
||||
self.opt_extra_customization.setText(settings.extra_customization)
|
||||
self.opt_extra_customization.setCursorPosition(0)
|
||||
self.extra_layout.addWidget(l, 0, 0)
|
||||
self.extra_layout.addWidget(self.opt_extra_customization, 1, 0)
|
||||
self.opt_save_template.setText(settings.save_template)
|
||||
|
@ -25,6 +25,7 @@ class LocationManager(QObject): # {{{
|
||||
locations_changed = pyqtSignal()
|
||||
unmount_device = pyqtSignal()
|
||||
location_selected = pyqtSignal(object)
|
||||
configure_device = pyqtSignal()
|
||||
|
||||
def __init__(self, parent=None):
|
||||
QObject.__init__(self, parent)
|
||||
@ -57,6 +58,10 @@ class LocationManager(QObject): # {{{
|
||||
a = m.addAction(QIcon(I('eject.png')), _('Eject this device'))
|
||||
a.triggered.connect(self._eject_requested)
|
||||
self._mem.append(a)
|
||||
a = m.addAction(QIcon(I('config.png')), _('Configure this device'))
|
||||
a.triggered.connect(self._configure_requested)
|
||||
self._mem.append(a)
|
||||
|
||||
else:
|
||||
ac.setToolTip(tooltip)
|
||||
ac.setMenu(m)
|
||||
@ -109,6 +114,9 @@ class LocationManager(QObject): # {{{
|
||||
def _eject_requested(self, *args):
|
||||
self.unmount_device.emit()
|
||||
|
||||
def _configure_requested(self):
|
||||
self.configure_device.emit()
|
||||
|
||||
def update_devices(self, cp=(None, None), fs=[-1, -1, -1], icon=None):
|
||||
if icon is None:
|
||||
icon = I('reader.png')
|
||||
|
@ -138,8 +138,8 @@ class GuiRunner(QObject):
|
||||
if self.splash_screen is not None:
|
||||
self.splash_screen.finish(main)
|
||||
if DEBUG:
|
||||
prints('Started up in', time.time() - self.startup_time, 'with',
|
||||
len(db.data), 'books')
|
||||
prints('Started up in %.2f seconds'%(time.time() -
|
||||
self.startup_time), 'with', len(db.data), 'books')
|
||||
add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
|
||||
sys.excepthook = main.unhandled_exception
|
||||
if len(self.args) > 1:
|
||||
|
@ -323,14 +323,19 @@ class Comments(QWebView): # {{{
|
||||
ans = unicode(col.name())
|
||||
return ans
|
||||
|
||||
f = QFontInfo(QApplication.font(self.parent())).pixelSize()
|
||||
fi = QFontInfo(QApplication.font(self.parent()))
|
||||
f = fi.pixelSize()+1
|
||||
fam = unicode(fi.family()).strip().replace('"', '')
|
||||
if not fam:
|
||||
fam = 'sans-serif'
|
||||
|
||||
c = color_to_string(QApplication.palette().color(QPalette.Normal,
|
||||
QPalette.WindowText))
|
||||
templ = '''\
|
||||
<html>
|
||||
<head>
|
||||
<style type="text/css">
|
||||
body, td {background-color: transparent; font-size: %dpx; color: %s }
|
||||
body, td {background-color: transparent; font-family: %s; font-size: %dpx; color: %s }
|
||||
a { text-decoration: none; color: blue }
|
||||
div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
|
||||
table { margin-bottom: 0; padding-bottom: 0; }
|
||||
@ -342,7 +347,7 @@ class Comments(QWebView): # {{{
|
||||
</div>
|
||||
</body>
|
||||
<html>
|
||||
'''%(f, c)
|
||||
'''%(fam, f, c)
|
||||
self.setHtml(templ%html)
|
||||
# }}}
|
||||
|
||||
|
@ -32,6 +32,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
r('automerge', gprefs, choices=choices)
|
||||
r('new_book_tags', prefs, setting=CommaSeparatedList)
|
||||
r('auto_add_path', gprefs, restart_required=True)
|
||||
r('auto_add_check_for_duplicates', gprefs)
|
||||
|
||||
self.filename_pattern = FilenamePattern(self)
|
||||
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
||||
|
@ -193,6 +193,19 @@ Author matching is exact.</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
|
||||
<property name="toolTip">
|
||||
<string>If set, this option will causes calibre to check if a file
|
||||
being auto-added is already in the calibre library.
|
||||
If it is, a meesage will pop up asking you whether
|
||||
you want to add it anyway.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Check for &duplicates when auto-adding files</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="verticalSpacer_2">
|
||||
<property name="orientation">
|
||||
|
@ -171,7 +171,7 @@ class Preferences(QMainWindow):
|
||||
self.committed = False
|
||||
self.close_after_initial = close_after_initial
|
||||
|
||||
self.resize(900, 720)
|
||||
self.resize(930, 720)
|
||||
nh, nw = min_available_height()-25, available_width()-10
|
||||
if nh < 0:
|
||||
nh = 800
|
||||
|
@ -45,6 +45,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.cache_pool = CacheUpdateThreadPool(self.cache_thread_count)
|
||||
self.results_view.model().cover_pool.set_thread_count(self.cover_thread_count)
|
||||
self.results_view.model().details_pool.set_thread_count(self.details_thread_count)
|
||||
self.results_view.setCursor(Qt.PointingHandCursor)
|
||||
|
||||
# Check for results and hung threads.
|
||||
self.checker = QTimer()
|
||||
@ -90,7 +91,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
|
||||
def setup_store_checks(self):
|
||||
first_run = self.config.get('first_run', True)
|
||||
|
||||
|
||||
# Add check boxes for each store so the user
|
||||
# can disable searching specific stores on a
|
||||
# per search basis.
|
||||
@ -117,7 +118,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.store_checks[x] = cbox
|
||||
store_list_layout.setRowStretch(store_list_layout.rowCount(), 10)
|
||||
self.store_list.setWidget(stores_check_widget)
|
||||
|
||||
|
||||
self.config['first_run'] = False
|
||||
|
||||
def build_adv_search(self):
|
||||
@ -340,7 +341,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
|
||||
def result_item_activated(self, index):
|
||||
result = self.results_view.model().get_result(index)
|
||||
|
||||
|
||||
if result.downloads:
|
||||
self.download_book(result)
|
||||
else:
|
||||
@ -353,7 +354,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
fname = result.title[:60] + '.' + ext.lower()
|
||||
fname = ascii_filename(fname)
|
||||
self.gui.download_ebook(result.downloads[ext], filename=fname)
|
||||
|
||||
|
||||
def open_store(self, result):
|
||||
self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
|
||||
|
||||
|
@ -265,6 +265,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
|
||||
####################### Location Manager ########################
|
||||
self.location_manager.location_selected.connect(self.location_selected)
|
||||
self.location_manager.unmount_device.connect(self.device_manager.umount_device)
|
||||
self.location_manager.configure_device.connect(self.configure_connected_device)
|
||||
self.eject_action.triggered.connect(self.device_manager.umount_device)
|
||||
|
||||
#################### Update notification ###################
|
||||
|
@ -28,6 +28,7 @@ from calibre.gui2 import min_available_height, available_width
|
||||
from calibre.utils.config import dynamic, prefs
|
||||
from calibre.gui2 import NONE, choose_dir, error_dialog
|
||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||
from calibre.customize.ui import device_plugins
|
||||
|
||||
# Devices {{{
|
||||
|
||||
@ -251,15 +252,39 @@ class Android(Device):
|
||||
id = 'android'
|
||||
supports_color = True
|
||||
|
||||
class AndroidTablet(Device):
|
||||
@classmethod
|
||||
def commit(cls):
|
||||
super(Android, cls).commit()
|
||||
for plugin in device_plugins(include_disabled=True):
|
||||
if plugin.name == 'Android driver':
|
||||
plugin.configure_for_generic_epub_app()
|
||||
|
||||
class AndroidTablet(Android):
|
||||
|
||||
name = 'Android tablet'
|
||||
output_format = 'EPUB'
|
||||
manufacturer = 'Android'
|
||||
id = 'android_tablet'
|
||||
supports_color = True
|
||||
output_profile = 'tablet'
|
||||
|
||||
class AndroidPhoneWithKindle(Android):
|
||||
|
||||
name = 'Android phone with Kindle reader'
|
||||
output_format = 'MOBI'
|
||||
id = 'android_phone_with_kindle'
|
||||
output_profile = 'kindle'
|
||||
|
||||
@classmethod
|
||||
def commit(cls):
|
||||
super(Android, cls).commit()
|
||||
for plugin in device_plugins(include_disabled=True):
|
||||
if plugin.name == 'Android driver':
|
||||
plugin.configure_for_kindle_app()
|
||||
|
||||
class AndroidTabletWithKindle(AndroidPhoneWithKindle):
|
||||
|
||||
name = 'Android tablet with Kindle reader'
|
||||
id = 'android_tablet_with_kindle'
|
||||
output_profile = 'kindle_fire'
|
||||
|
||||
class HanlinV3(Device):
|
||||
|
||||
name = 'Hanlin V3'
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user