Merge from trunk

This commit is contained in:
Charles Haley 2011-05-05 20:05:58 +01:00
commit 022925988a
21 changed files with 296 additions and 450 deletions

View File

@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe):
recursion = 0 recursion = 0
no_stylesheets = True no_stylesheets = True
encoding = "utf-8" encoding = "utf-8"
language = 'de_AT' language = 'de'
use_embedded_content =False use_embedded_content =False
remove_empty_feeds = True remove_empty_feeds = True

View File

@ -7,13 +7,11 @@ usatoday.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
import re
class USAToday(BasicNewsRecipe): class USAToday(BasicNewsRecipe):
title = 'USA Today' title = 'USA Today'
__author__ = 'GRiker' __author__ = 'Kovid Goyal'
oldest_article = 1 oldest_article = 1
timefmt = '' timefmt = ''
max_articles_per_feed = 20 max_articles_per_feed = 20
@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe):
margin-bottom: 0em; \ margin-bottom: 0em; \
font-size: smaller;}\n \ font-size: smaller;}\n \
.articleBody {text-align: left;}\n ' .articleBody {text-align: left;}\n '
conversion_options = { 'linearize_tables' : True }
#simultaneous_downloads = 1 #simultaneous_downloads = 1
feeds = [ feeds = [
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'), ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe):
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'), ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'), ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
] ]
keep_only_tags = [dict(attrs={'class':[ keep_only_tags = [dict(attrs={'class':'story'})]
'byLine', remove_tags = [
'inside-copy', dict(attrs={'class':[
'inside-head', 'share',
'inside-head2', 'reprints',
'item', 'inline-h3',
'item-block', 'info-extras',
'photo-container', 'ppy-outer',
]}), 'ppy-caption',
dict(id=[ 'comments',
'applyMainStoryPhoto', 'jump',
'permalink', 'pagetools',
])] 'post-attributes',
'tags',
'bottom-tools',
'sponsoredlinks',
]}),
dict(id=['pluck']),
]
remove_tags = [dict(attrs={'class':[
'comments',
'jump',
'pagetools',
'post-attributes',
'tags',
]}),
dict(id=[])]
#feeds = [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')]
def dump_hex(self, src, length=16):
''' Diagnostic '''
FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
N=0; result=''
while src:
s,src = src[:length],src[length:]
hexa = ' '.join(["%02X"%ord(x) for x in s])
s = s.translate(FILTER)
result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
N+=length
print result
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","‘",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","’",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","–",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
return fixed
def get_masthead_url(self): def get_masthead_url(self):
masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif' masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe):
masthead = None masthead = None
return masthead return masthead
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def parse_feeds(self, *args, **kwargs):
parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs)
# Count articles for progress dialog
article_count = 0
for feed in parsed_feeds:
article_count += len(feed)
self.log( "Queued %d articles" % article_count)
return parsed_feeds
def preprocess_html(self, soup):
soup = self.strip_anchors(soup)
return soup
def postprocess_html(self, soup, first_fetch):
# Remove navLinks <div class="inside-copy" style="padding-bottom:3px">
navLinks = soup.find(True,{'style':'padding-bottom:3px'})
if navLinks:
navLinks.extract()
# Remove <div class="inside-copy" style="margin-bottom:10px">
gibberish = soup.find(True,{'style':'margin-bottom:10px'})
if gibberish:
gibberish.extract()
# Change <inside-head> to <h2>
headline = soup.find(True, {'class':['inside-head','inside-head2']})
if not headline:
headline = soup.find('h3')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, headline.contents[0])
headline.replaceWith(tag)
else:
print "unable to find headline:\n%s\n" % soup
# Change byLine to byline, change commas to middot
# Kindle renders commas in byline as '&'
byline = soup.find(True, {'class':'byLine'})
if byline:
byline['class'] = 'byline'
# Replace comma with middot
byline.contents[0].replaceWith(re.sub(","," &middot;", byline.renderContents()))
jumpout_punc_list = [':','?']
# Remove the inline jumpouts in <div class="inside-copy">
paras = soup.findAll(True, {'class':'inside-copy'})
for para in paras:
if re.match("<b>[\w\W]+ ",para.renderContents()):
p = para.find('b')
for punc in jumpout_punc_list:
punc_offset = p.contents[0].find(punc)
if punc_offset == -1:
continue
if punc_offset > 1:
if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
#print "extracting \n%s\n" % para.prettify()
para.extract()
# Reset class for remaining
paras = soup.findAll(True, {'class':'inside-copy'})
for para in paras:
para['class'] = 'articleBody'
# Remove inline jumpouts in <p>
paras = soup.findAll(['p'])
for p in paras:
if hasattr(p,'contents') and len(p.contents):
for punc in jumpout_punc_list:
punc_offset = p.contents[0].find(punc)
if punc_offset == -1:
continue
if punc_offset > 2 and hasattr(p,'a') and len(p.contents):
#print "evaluating %s\n" % p.contents[0][:punc_offset+1]
if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
#print "extracting \n%s\n" % p.prettify()
p.extract()
# Capture the first img, insert after headline
imgs = soup.findAll('img')
print "postprocess_html(): %d images" % len(imgs)
if imgs:
divTag = Tag(soup, 'div')
divTag['class'] = 'image'
body = soup.find('body')
img = imgs[0]
#print "img: \n%s\n" % img.prettify()
# Table for photo and credit
tableTag = Tag(soup,'table')
# Photo
trimgTag = Tag(soup, 'tr')
tdimgTag = Tag(soup, 'td')
tdimgTag.insert(0,img)
trimgTag.insert(0,tdimgTag)
tableTag.insert(0,trimgTag)
# Credit
trcreditTag = Tag(soup, 'tr')
tdcreditTag = Tag(soup, 'td')
tdcreditTag['class'] = 'credit'
credit = soup.find('td',{'class':'photoCredit'})
if credit:
tdcreditTag.insert(0,NavigableString(credit.renderContents()))
else:
credit = img['credit']
if credit:
tdcreditTag.insert(0,NavigableString(credit))
else:
tdcreditTag.insert(0,NavigableString(''))
trcreditTag.insert(0,tdcreditTag)
tableTag.insert(1,trcreditTag)
dtc = 0
divTag.insert(dtc,tableTag)
dtc += 1
if False:
# Add the caption in the table
tableCaptionTag = Tag(soup,'caption')
tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents())
tableTag.insert(1,tableCaptionTag)
divTag.insert(dtc,tableTag)
dtc += 1
body.insert(1,divTag)
else:
# Add the caption below the table
#print "Looking for caption in this soup:\n%s" % img.prettify()
captionTag = Tag(soup,'p')
captionTag['class'] = 'caption'
if hasattr(img,'alt') and img['alt']:
captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['alt']))
divTag.insert(dtc, captionTag)
dtc += 1
else:
try:
captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['cutline']))
divTag.insert(dtc, captionTag)
dtc += 1
except:
pass
hrTag = Tag(soup, 'hr')
divTag.insert(dtc, hrTag)
dtc += 1
# Delete <div id="applyMainStoryPhoto"
photoJunk = soup.find('div',{'id':'applyMainStoryPhoto'})
if photoJunk:
photoJunk.extract()
# Insert img after headline
tag = body.find(True)
insertLoc = 0
headline_found = False
while True:
# Scan the top-level tags
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'headline':
headline_found = True
body.insert(insertLoc,divTag)
break
tag = tag.nextSibling
if not tag:
break
if not headline_found:
# Monolithic <div> - restructure
tag = body.find(True)
while True:
insertLoc += 1
try:
if hasattr(tag,'class') and tag['class'] == 'headline':
headline_found = True
tag.insert(insertLoc,divTag)
break
except:
pass
tag = tag.next
if not tag:
break
# Yank out headline, img and caption
headline = body.find('h2','headline')
img = body.find('div','image')
caption = body.find('p''class')
# body(0) is calibre_navbar
# body(1) is <div class="item">
btc = 1
headline.extract()
body.insert(1, headline)
btc += 1
if img:
img.extract()
body.insert(btc, img)
btc += 1
if caption:
caption.extract()
body.insert(btc, caption)
btc += 1
if len(imgs) > 1:
if True:
[img.extract() for img in imgs[1:]]
else:
# Format the remaining images
# This doesn't work yet
for img in imgs[1:]:
print "img:\n%s\n" % img.prettify()
divTag = Tag(soup, 'div')
divTag['class'] = 'image'
# Table for photo and credit
tableTag = Tag(soup,'table')
# Photo
trimgTag = Tag(soup, 'tr')
tdimgTag = Tag(soup, 'td')
tdimgTag.insert(0,img)
trimgTag.insert(0,tdimgTag)
tableTag.insert(0,trimgTag)
# Credit
trcreditTag = Tag(soup, 'tr')
tdcreditTag = Tag(soup, 'td')
tdcreditTag['class'] = 'credit'
try:
tdcreditTag.insert(0,NavigableString(img['credit']))
except:
tdcreditTag.insert(0,NavigableString(''))
trcreditTag.insert(0,tdcreditTag)
tableTag.insert(1,trcreditTag)
divTag.insert(0,tableTag)
soup.img.replaceWith(divTag)
return soup
def postprocess_book(self, oeb, opts, log) :
def extract_byline(href) :
# <meta name="byline" content=
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find('div',attrs={'class':'byline'})
if byline:
byline['class'] = 'byline'
# Replace comma with middot
byline.contents[0].replaceWith(re.sub(u",", u" &middot;",
byline.renderContents(encoding=None)))
return byline.renderContents(encoding=None)
else :
paras = soup.findAll(text=True)
for para in paras:
if para.startswith("Copyright"):
return para[len('Copyright xxxx '):para.find('.')]
return None
def extract_description(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
description = soup.find('meta',attrs={'name':'description'})
if description :
return self.massageNCXText(description['content'])
else:
# Take first paragraph of article
articleBody = soup.find('div',attrs={'id':['articleBody','item']})
if articleBody:
paras = articleBody.findAll('p')
for p in paras:
if p.renderContents() > '' :
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
else:
print "Didn't find <div id='articleBody'> in this soup:\n%s" % soup.prettify()
return None
# Method entry point here
# Single section toc looks different than multi-section tocs
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
article.author = extract_byline(article.href)
'''
if article.author is None :
article.author = self.massageNCXText(extract_byline(article.href))
else:
article.author = self.massageNCXText(article.author)
'''
if article.description is None :
article.description = extract_description(article.href)
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup

View File

@ -164,7 +164,7 @@ class APNXBuilder(object):
if c == '/': if c == '/':
closing = True closing = True
continue continue
elif c in ('d', 'p'): elif c == 'p':
if closing: if closing:
in_p = False in_p = False
else: else:

View File

@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os
import posixpath
from calibre import walk from calibre import guess_type, walk
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.opf2 import OPF
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
class HTMLZInput(InputFormatPlugin): class HTMLZInput(InputFormatPlugin):
@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin):
# Extract content from zip archive. # Extract content from zip archive.
zf = ZipFile(stream) zf = ZipFile(stream)
zf.extractall('.') zf.extractall()
for x in walk('.'): for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'): if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin):
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
mi = get_file_type_metadata(stream, file_ext) mi = get_file_type_metadata(stream, file_ext)
meta_info_to_oeb_metadata(mi, oeb.metadata, log) meta_info_to_oeb_metadata(mi, oeb.metadata, log)
# Get the cover path from the OPF.
cover_href = None
opf = None
for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.opf'):
opf = x
break
if opf:
opf = OPF(opf)
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
# Set the cover.
if cover_href:
cdata = None
with open(cover_href, 'rb') as cf:
cdata = cf.read()
id, href = oeb.manifest.generate('cover', cover_href)
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
oeb.guide.add('cover', 'Cover', href)
return oeb return oeb

View File

@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os
from cStringIO import StringIO
from lxml import etree from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \ from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation OptionRecommendation
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin):
fname = os.path.join(tdir, 'images', images[item.href]) fname = os.path.join(tdir, 'images', images[item.href])
with open(fname, 'wb') as img: with open(fname, 'wb') as img:
img.write(data) img.write(data)
# Cover
cover_path = None
try:
cover_data = None
if oeb_book.metadata.cover:
term = oeb_book.metadata.cover[0].term
cover_data = oeb_book.guide[term].item.data
if cover_data:
from calibre.utils.magick.draw import save_cover_data_to
cover_path = os.path.join(tdir, 'cover.jpg')
with open(cover_path, 'w') as cf:
cf.write('')
save_cover_data_to(cover_data, cover_path)
except:
import traceback
traceback.print_exc()
# Metadata # Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1())) opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
mi = opf.to_book_metadata()
if cover_path:
mi.cover = 'cover.jpg'
mdataf.write(metadata_to_opf(mi))
htmlz = ZipFile(output_path, 'w') htmlz = ZipFile(output_path, 'w')
htmlz.add_dir(tdir) htmlz.add_dir(tdir)

View File

@ -13,7 +13,7 @@ import posixpath
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile, safe_replace from calibre.utils.zipfile import ZipFile, safe_replace
@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True):
opf = OPF(opf_stream) opf = OPF(opf_stream)
mi = opf.to_book_metadata() mi = opf.to_book_metadata()
if extract_cover: if extract_cover:
cover_name = opf.raster_cover cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
if cover_name: if cover_href:
mi.cover_data = ('jpg', zf.read(cover_name)) mi.cover_data = ('jpg', zf.read(cover_href))
except: except:
return mi return mi
return mi return mi
@ -59,17 +59,20 @@ def set_metadata(stream, mi):
except: except:
pass pass
if new_cdata: if new_cdata:
raster_cover = opf.raster_cover cover = opf.cover
if not raster_cover: if not cover:
raster_cover = 'cover.jpg' cover = 'cover.jpg'
cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover) cpath = posixpath.join(posixpath.dirname(opf_path), cover)
new_cover = _write_new_cover(new_cdata, cpath) new_cover = _write_new_cover(new_cdata, cpath)
replacements[cpath] = open(new_cover.name, 'rb') replacements[cpath] = open(new_cover.name, 'rb')
mi.cover = cover
# Update the metadata. # Update the metadata.
opf.smart_update(mi, replace_metadata=True) old_mi = opf.to_book_metadata()
old_mi.smart_update(mi)
opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True)
newopf = StringIO(opf.render()) newopf = StringIO(opf.render())
safe_replace(stream, opf_path, newopf, extra_replacements=replacements) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)
# Cleanup temporary files. # Cleanup temporary files.
try: try:

View File

@ -966,7 +966,9 @@ class OPF(object): # {{{
cover_id = covers[0].get('content') cover_id = covers[0].get('content')
for item in self.itermanifest(): for item in self.itermanifest():
if item.get('id', None) == cover_id: if item.get('id', None) == cover_id:
return item.get('href', None) mt = item.get('media-type', '')
if 'xml' not in mt:
return item.get('href', None)
@dynamic_property @dynamic_property
def cover(self): def cover(self):

View File

@ -13,6 +13,7 @@ from Queue import Queue, Empty
from threading import Thread from threading import Thread
from io import BytesIO from io import BytesIO
from operator import attrgetter from operator import attrgetter
from urlparse import urlparse
from calibre.customize.ui import metadata_plugins, all_metadata_plugins from calibre.customize.ui import metadata_plugins, all_metadata_plugins
from calibre.ebooks.metadata.sources.base import create_log, msprefs from calibre.ebooks.metadata.sources.base import create_log, msprefs
@ -458,6 +459,14 @@ def urls_from_identifiers(identifiers): # {{{
if oclc: if oclc:
ans.append(('OCLC', 'oclc', oclc, ans.append(('OCLC', 'oclc', oclc,
'http://www.worldcat.org/oclc/'+oclc)) 'http://www.worldcat.org/oclc/'+oclc))
url = identifiers.get('uri', None)
if url is None:
url = identifiers.get('url', None)
if url and url.startswith('http'):
url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
parts = urlparse(url)
name = parts.netloc
ans.append((name, 'url', url, url))
return ans return ans
# }}} # }}}

View File

@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en'
Convert an ODT file into a Open Ebook Convert an ODT file into a Open Ebook
''' '''
import os import os
from lxml import etree
from odf.odf2xhtml import ODF2XHTML from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk from calibre import CurrentDir, walk
@ -23,7 +25,51 @@ class Extract(ODF2XHTML):
with open(name, 'wb') as f: with open(name, 'wb') as f:
f.write(data) f.write(data)
def __call__(self, stream, odir): def filter_css(self, html, log):
root = etree.fromstring(html)
style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
if style:
style = style[0]
css = style.text
if css:
style.text, sel_map = self.do_filter_css(css)
for x in root.xpath('//*[@class]'):
extra = []
orig = x.get('class')
for cls in orig.split():
extra.extend(sel_map.get(cls, []))
if extra:
x.set('class', orig + ' ' + ' '.join(extra))
html = etree.tostring(root, encoding='utf-8',
xml_declaration=True)
return html
def do_filter_css(self, css):
from cssutils import parseString
from cssutils.css import CSSRule
sheet = parseString(css)
rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
sel_map = {}
count = 0
for r in rules:
# Check if we have only class selectors for this rule
nc = [x for x in r.selectorList if not
x.selectorText.startswith('.')]
if len(r.selectorList) > 1 and not nc:
# Replace all the class selectors with a single class selector
# This will be added to the class attribute of all elements
# that have one of these selectors.
replace_name = 'c_odt%d'%count
count += 1
for sel in r.selectorList:
s = sel.selectorText[1:]
if s not in sel_map:
sel_map[s] = []
sel_map[s].append(replace_name)
r.selectorText = '.'+replace_name
return sheet.cssText, sel_map
def __call__(self, stream, odir, log):
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
@ -32,13 +78,17 @@ class Extract(ODF2XHTML):
if not os.path.exists(odir): if not os.path.exists(odir):
os.makedirs(odir) os.makedirs(odir)
with CurrentDir(odir): with CurrentDir(odir):
print 'Extracting ODT file...' log('Extracting ODT file...')
html = self.odf2xhtml(stream) html = self.odf2xhtml(stream)
# A blanket img specification like this causes problems # A blanket img specification like this causes problems
# with EPUB output as the contaiing element often has # with EPUB output as the containing element often has
# an absolute height and width set that is larger than # an absolute height and width set that is larger than
# the available screen real estate # the available screen real estate
html = html.replace('img { width: 100%; height: 100%; }', '') html = html.replace('img { width: 100%; height: 100%; }', '')
try:
html = self.filter_css(html, log)
except:
log.exception('Failed to filter CSS, conversion may be slow')
with open('index.xhtml', 'wb') as f: with open('index.xhtml', 'wb') as f:
f.write(html.encode('utf-8')) f.write(html.encode('utf-8'))
zf = ZipFile(stream, 'r') zf = ZipFile(stream, 'r')
@ -67,7 +117,7 @@ class ODTInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
return Extract()(stream, '.') return Extract()(stream, '.', log)
def postprocess_book(self, oeb, opts, log): def postprocess_book(self, oeb, opts, log):
# Fix <p><div> constructs as the asinine epubchecker complains # Fix <p><div> constructs as the asinine epubchecker complains

View File

@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
m.clear('description') m.clear('description')
m.add('description', mi.comments) m.add('description', mi.comments)
elif override_input_metadata: elif override_input_metadata:
m.clear('description') m.clear('description')
if not mi.is_null('publisher'): if not mi.is_null('publisher'):
m.clear('publisher') m.clear('publisher')
m.add('publisher', mi.publisher) m.add('publisher', mi.publisher)

View File

@ -15,7 +15,6 @@ import cStringIO
from lxml import etree from lxml import etree
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.utils.filenames import ascii_text
from calibre.utils.magick.draw import save_cover_data_to, identify_data from calibre.utils.magick.draw import save_cover_data_to, identify_data
TAGS = { TAGS = {
@ -79,8 +78,7 @@ def txt2rtf(text):
elif val <= 127: elif val <= 127:
buf.write(x) buf.write(x)
else: else:
repl = ascii_text(x) c = r'\u{0:d}?'.format(val)
c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl))
buf.write(c) buf.write(c)
return buf.getvalue() return buf.getvalue()

View File

@ -246,7 +246,7 @@ class ChooseLibraryAction(InterfaceAction):
def delete_requested(self, name, location): def delete_requested(self, name, location):
loc = location.replace('/', os.sep) loc = location.replace('/', os.sep)
if not question_dialog(self.gui, _('Are you sure?'), '<p>'+ if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
_('All files from %s will be ' _('<b style="color: red">All files</b> from <br><br><b>%s</b><br><br> will be '
'<b>permanently deleted</b>. Are you sure?') % loc, '<b>permanently deleted</b>. Are you sure?') % loc,
show_copy_button=False): show_copy_button=False):
return return

View File

@ -7,16 +7,16 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, shutil import os, shutil
from contextlib import closing
from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
from PyQt4.Qt import QDialog from PyQt4.Qt import QDialog
from calibre.constants import isosx from calibre.constants import isosx
from calibre.gui2 import open_local_file from calibre.gui2 import open_local_file, error_dialog
from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog
from calibre.libunzip import extract as zipextract from calibre.libunzip import extract as zipextract
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import (PersistentTemporaryDirectory,
PersistentTemporaryFile)
class TweakEpub(QDialog, Ui_Dialog): class TweakEpub(QDialog, Ui_Dialog):
''' '''
@ -37,11 +37,15 @@ class TweakEpub(QDialog, Ui_Dialog):
self.cancel_button.clicked.connect(self.reject) self.cancel_button.clicked.connect(self.reject)
self.explode_button.clicked.connect(self.explode) self.explode_button.clicked.connect(self.explode)
self.rebuild_button.clicked.connect(self.rebuild) self.rebuild_button.clicked.connect(self.rebuild)
self.preview_button.clicked.connect(self.preview)
# Position update dialog overlaying top left of app window # Position update dialog overlaying top left of app window
parent_loc = parent.pos() parent_loc = parent.pos()
self.move(parent_loc.x(),parent_loc.y()) self.move(parent_loc.x(),parent_loc.y())
self.gui = parent
self._preview_files = []
def cleanup(self): def cleanup(self):
if isosx: if isosx:
try: try:
@ -55,6 +59,11 @@ class TweakEpub(QDialog, Ui_Dialog):
# Delete directory containing exploded ePub # Delete directory containing exploded ePub
if self._exploded is not None: if self._exploded is not None:
shutil.rmtree(self._exploded, ignore_errors=True) shutil.rmtree(self._exploded, ignore_errors=True)
for x in self._preview_files:
try:
os.remove(x)
except:
pass
def display_exploded(self): def display_exploded(self):
''' '''
@ -71,9 +80,8 @@ class TweakEpub(QDialog, Ui_Dialog):
self.rebuild_button.setEnabled(True) self.rebuild_button.setEnabled(True)
self.explode_button.setEnabled(False) self.explode_button.setEnabled(False)
def rebuild(self, *args): def do_rebuild(self, src):
self._output = os.path.join(self._exploded, 'rebuilt.epub') with ZipFile(src, 'w', compression=ZIP_DEFLATED) as zf:
with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf:
# Write mimetype # Write mimetype
zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED) zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED)
# Write everything else # Write everything else
@ -86,5 +94,23 @@ class TweakEpub(QDialog, Ui_Dialog):
zfn = os.path.relpath(absfn, zfn = os.path.relpath(absfn,
self._exploded).replace(os.sep, '/') self._exploded).replace(os.sep, '/')
zf.write(absfn, zfn) zf.write(absfn, zfn)
def preview(self):
if not self._exploded:
return error_dialog(self, _('Cannot preview'),
_('You must first explode the epub before previewing.'),
show=True)
tf = PersistentTemporaryFile('.epub')
tf.close()
self._preview_files.append(tf.name)
self.do_rebuild(tf.name)
self.gui.iactions['View']._view_file(tf.name)
def rebuild(self, *args):
self._output = os.path.join(self._exploded, 'rebuilt.epub')
self.do_rebuild(self._output)
return QDialog.accept(self) return QDialog.accept(self)

View File

@ -23,6 +23,16 @@
<bool>false</bool> <bool>false</bool>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" colspan="2">
<widget class="QLabel" name="label">
<property name="text">
<string>&lt;p&gt;Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the epub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0"> <item row="1" column="0">
<widget class="QPushButton" name="explode_button"> <widget class="QPushButton" name="explode_button">
<property name="statusTip"> <property name="statusTip">
@ -37,23 +47,6 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0">
<widget class="QPushButton" name="rebuild_button">
<property name="enabled">
<bool>false</bool>
</property>
<property name="statusTip">
<string>Rebuild ePub from exploded contents</string>
</property>
<property name="text">
<string>&amp;Rebuild ePub</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
</property>
</widget>
</item>
<item row="3" column="0"> <item row="3" column="0">
<widget class="QPushButton" name="cancel_button"> <widget class="QPushButton" name="cancel_button">
<property name="statusTip"> <property name="statusTip">
@ -68,13 +61,31 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="0" column="0"> <item row="3" column="1">
<widget class="QLabel" name="label"> <widget class="QPushButton" name="rebuild_button">
<property name="text"> <property name="enabled">
<string>&lt;p&gt;Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the epub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string> <bool>false</bool>
</property> </property>
<property name="wordWrap"> <property name="statusTip">
<bool>true</bool> <string>Rebuild ePub from exploded contents</string>
</property>
<property name="text">
<string>&amp;Rebuild ePub</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="preview_button">
<property name="text">
<string>&amp;Preview ePub</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/view.png</normaloff>:/images/view.png</iconset>
</property> </property>
</widget> </widget>
</item> </item>

View File

@ -44,18 +44,19 @@ class LocationManager(QObject): # {{{
receiver = partial(self._location_selected, name) receiver = partial(self._location_selected, name)
ac.triggered.connect(receiver) ac.triggered.connect(receiver)
self.tooltips[name] = tooltip self.tooltips[name] = tooltip
m = QMenu(parent)
self._mem.append(m)
a = m.addAction(icon, tooltip)
a.triggered.connect(receiver)
if name != 'library': if name != 'library':
m = QMenu(parent)
self._mem.append(m)
a = m.addAction(icon, tooltip)
a.triggered.connect(receiver)
self._mem.append(a) self._mem.append(a)
a = m.addAction(QIcon(I('eject.png')), _('Eject this device')) a = m.addAction(QIcon(I('eject.png')), _('Eject this device'))
a.triggered.connect(self._eject_requested) a.triggered.connect(self._eject_requested)
ac.setMenu(m)
self._mem.append(a) self._mem.append(a)
else: else:
ac.setToolTip(tooltip) ac.setToolTip(tooltip)
ac.setMenu(m)
ac.calibre_name = name ac.calibre_name = name
return ac return ac
@ -71,7 +72,12 @@ class LocationManager(QObject): # {{{
def set_switch_actions(self, quick_actions, rename_actions, delete_actions, def set_switch_actions(self, quick_actions, rename_actions, delete_actions,
switch_actions, choose_action): switch_actions, choose_action):
self.switch_menu = QMenu() self.switch_menu = self.library_action.menu()
if self.switch_menu:
self.switch_menu.addSeparator()
else:
self.switch_menu = QMenu()
self.switch_menu.addAction(choose_action) self.switch_menu.addAction(choose_action)
self.cs_menus = [] self.cs_menus = []
for t, acs in [(_('Quick switch'), quick_actions), for t, acs in [(_('Quick switch'), quick_actions),
@ -85,7 +91,9 @@ class LocationManager(QObject): # {{{
self.switch_menu.addSeparator() self.switch_menu.addSeparator()
for ac in switch_actions: for ac in switch_actions:
self.switch_menu.addAction(ac) self.switch_menu.addAction(ac)
self.library_action.setMenu(self.switch_menu)
if self.switch_menu != self.library_action.menu():
self.library_action.setMenu(self.switch_menu)
def _location_selected(self, location, *args): def _location_selected(self, location, *args):
if location != self.current_location and hasattr(self, if location != self.current_location and hasattr(self,

View File

@ -439,10 +439,16 @@ class BooksView(QTableView): # {{{
if tweaks['sort_columns_at_startup'] is not None: if tweaks['sort_columns_at_startup'] is not None:
sh = [] sh = []
for c,d in tweaks['sort_columns_at_startup']: try:
if not isinstance(d, bool): for c,d in tweaks['sort_columns_at_startup']:
d = True if d == 0 else False if not isinstance(d, bool):
sh.append((c, d)) d = True if d == 0 else False
sh.append((c, d))
except:
# Ignore invalid tweak values as users seem to often get them
# wrong
import traceback
traceback.print_exc()
old_state['sort_history'] = sh old_state['sort_history'] = sh
self.apply_state(old_state) self.apply_state(old_state)

View File

@ -190,7 +190,15 @@ class FieldsModel(QAbstractListModel): # {{{
return ans | Qt.ItemIsUserCheckable return ans | Qt.ItemIsUserCheckable
def restore_defaults(self): def restore_defaults(self):
self.overrides = dict([(f, self.state(f, True)) for f in self.fields]) self.overrides = dict([(f, self.state(f, Qt.Checked)) for f in self.fields])
self.reset()
def select_all(self):
self.overrides = dict([(f, Qt.Checked) for f in self.fields])
self.reset()
def clear_all(self):
self.overrides = dict([(f, Qt.Unchecked) for f in self.fields])
self.reset() self.reset()
def setData(self, index, val, role): def setData(self, index, val, role):
@ -273,6 +281,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.fields_view.setModel(self.fields_model) self.fields_view.setModel(self.fields_model)
self.fields_model.dataChanged.connect(self.changed_signal) self.fields_model.dataChanged.connect(self.changed_signal)
self.select_all_button.clicked.connect(self.fields_model.select_all)
self.clear_all_button.clicked.connect(self.fields_model.clear_all)
def configure_plugin(self): def configure_plugin(self):
for index in self.sources_view.selectionModel().selectedRows(): for index in self.sources_view.selectionModel().selectedRows():
plugin = self.sources_model.data(index, Qt.UserRole) plugin = self.sources_model.data(index, Qt.UserRole)

View File

@ -77,8 +77,8 @@
<property name="title"> <property name="title">
<string>Downloaded metadata fields</string> <string>Downloaded metadata fields</string>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout_2"> <layout class="QGridLayout" name="gridLayout_2">
<item> <item row="0" column="0" colspan="2">
<widget class="QListView" name="fields_view"> <widget class="QListView" name="fields_view">
<property name="toolTip"> <property name="toolTip">
<string>If you uncheck any fields, metadata for those fields will not be downloaded</string> <string>If you uncheck any fields, metadata for those fields will not be downloaded</string>
@ -88,6 +88,20 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="0">
<widget class="QPushButton" name="select_all_button">
<property name="text">
<string>&amp;Select all</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="clear_all_button">
<property name="text">
<string>&amp;Clear all</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

View File

@ -155,6 +155,7 @@ class SearchDialog(QDialog, Ui_Dialog):
self.config['results_view_column_width'] = [self.results_view.columnWidth(i) for i in range(self.results_view.model().columnCount())] self.config['results_view_column_width'] = [self.results_view.columnWidth(i) for i in range(self.results_view.model().columnCount())]
self.config['sort_col'] = self.results_view.model().sort_col self.config['sort_col'] = self.results_view.model().sort_col
self.config['sort_order'] = self.results_view.model().sort_order self.config['sort_order'] = self.results_view.model().sort_order
self.config['open_external'] = self.open_external.isChecked()
store_check = {} store_check = {}
for n in self.store_plugins: for n in self.store_plugins:
@ -179,6 +180,8 @@ class SearchDialog(QDialog, Ui_Dialog):
else: else:
self.resize_columns() self.resize_columns()
self.open_external.setChecked(self.config.get('open_external', False))
store_check = self.config.get('store_checked', None) store_check = self.config.get('store_checked', None)
if store_check: if store_check:
for n in store_check: for n in store_check:
@ -212,7 +215,7 @@ class SearchDialog(QDialog, Ui_Dialog):
def open_store(self, index): def open_store(self, index):
result = self.results_view.model().get_result(index) result = self.results_view.model().get_result(index)
self.store_plugins[result.store_name].open(self, result.detail_item) self.store_plugins[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
def check_progress(self): def check_progress(self):
if not self.search_pool.threads_running() and not self.results_view.model().cover_pool.threads_running() and not self.results_view.model().details_pool.threads_running(): if not self.search_pool.threads_running() and not self.results_view.model().cover_pool.threads_running() and not self.results_view.model().details_pool.threads_running():

View File

@ -70,7 +70,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>215</width> <width>215</width>
<height>116</height> <height>93</height>
</rect> </rect>
</property> </property>
</widget> </widget>
@ -101,6 +101,16 @@
</item> </item>
</layout> </layout>
</item> </item>
<item>
<widget class="QCheckBox" name="open_external">
<property name="toolTip">
<string>Open a selected book in the system's web browser</string>
</property>
<property name="text">
<string>Open in &amp;external browser</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<widget class="QSplitter" name="splitter_2"> <widget class="QSplitter" name="splitter_2">

View File

@ -841,11 +841,19 @@ ol, ul { padding-left: 2em; }
self.styledict[name] = styles self.styledict[name] = styles
# Write the styles to HTML # Write the styles to HTML
self.writeout(self.default_styles) self.writeout(self.default_styles)
# Changed by Kovid to not write out endless copies of the same style
css_styles = {}
for name in self.stylestack: for name in self.stylestack:
styles = self.styledict.get(name) styles = self.styledict.get(name)
css2 = self.cs.convert_styles(styles) css2 = tuple(self.cs.convert_styles(styles).iteritems())
self.writeout("%s {\n" % name) if css2 in css_styles:
for style, val in css2.items(): css_styles[css2].append(name)
else:
css_styles[css2] = [name]
for css2, names in css_styles.iteritems():
self.writeout("%s {\n" % ', '.join(names))
for style, val in css2:
self.writeout("\t%s: %s;\n" % (style, val) ) self.writeout("\t%s: %s;\n" % (style, val) )
self.writeout("}\n") self.writeout("}\n")