Merge from trunk

This commit is contained in:
Kovid Goyal 2013-04-12 10:26:18 +05:30
commit fd50d44cc2
12 changed files with 137 additions and 66 deletions

View File

@ -1,4 +1,4 @@
# vim:fileencoding=UTF-8:ts=2:sw=2:sta:et:sts=2:ai
# vim:fileencoding=utf-8:ts=2:sw=2:sta:et:sts=2:ai
# Each release can have new features and bug fixes. Each of which
# must have a title and can optionally have linked tickets and a description.
# In addition they can have a type field which defaults to minor, but should be major
@ -20,6 +20,66 @@
# new recipes:
# - title:
- version: 0.9.27
date: 2013-04-12
new features:
- title: "Metadata download: Add two new sources for covers: Google Image Search and bigbooksearch.com."
description: "To enable them go to Preferences->Metadata download and enable the 'Google Image' and 'Big Book Search' sources. Google Images is useful for finding larger covers as well as alternate versions of the cover. Big Book Search searches for alternate covers from amazon.com. It can occasionally find nicer covers than the direct Amazon source. Note that both these sources download multiple covers for a single book. Some of these covers can be wrong (i.e. they may be of a different book or not covers at all, so you should inspect the results and manually pick the best match). When bulk downloading, these sources are only used if the other sources find no covers."
type: major
- title: "Content server: Allow specifying a reestriction to use for the server when embedding it as a WSGI app."
tickets: [1167951]
- title: "Get Books: Add a plugin for the Koobe Polish book store"
- title: "calibredb add_format: Add an option to not replace existing formats. Also pep8 compliance."
- title: "Allow restoring of the ORIGINAL_XXX format by right-clicking it in the book details panel"
bug fixes:
- title: "AZW3 Input: Do not fail to identify JPEG images with 8BIM headers created with Adobe Photoshop."
tickets: [1167985]
- title: "Amazon metadata download: Ignore Spanish edition entries when searching for a book on amazon.com"
- title: "TXT Input: When converting a txt file with a Byte Order Mark, remove the Byte Order Mark before further processing as it can cause the first line of the text to be mis-interpreted."
- title: "Get Books: Fix searching for current book/title/author by right clicking the get books icon"
- title: "Get Books: Update nexto, gutenberg, and virtualo store plugins for website changes"
- title: "Amazon metadata download: When downloading from amazon.co.jp handle the 'Black curtain redirect' for adult titles."
tickets: [1165628]
- title: "When extracting zip files do not allow maliciously created zip files to overwrite other files on the system"
- title: "RTF Input: Handle RTF files with invalid border style specifications"
tickets: [1021270]
improved recipes:
- The Escapist
- San Francisco Chronicle
- The Onion
- Fronda
- Tom's Hardware
- New Yorker
- Financial Times UK
- Business Week Magazine
- Victoria Times
- tvxs
- The Independent
new recipes:
- title: Economia
author: Manish Bhattarai
- title: Universe Today
author: seird
- title: The Galaxy's Edge
author: Krittika Goyal
- version: 0.9.26
date: 2013-04-05

View File

@ -802,6 +802,12 @@ Downloading from the Internet can sometimes result in a corrupted download. If t
* Try temporarily disabling your antivirus program (Microsoft Security Essentials, or Kaspersky or Norton or McAfee or whatever). This is most likely the culprit if the upgrade process is hanging in the middle.
* Try rebooting your computer and running a registry cleaner like `Wise registry cleaner <http://www.wisecleaner.com>`_.
* Try downloading the installer with an alternate browser. For example if you are using Internet Explorer, try using Firefox or Chrome instead.
* If you get an error about a missing DLL on windows, then most likely, the
permissions on your temporary folder are incorrect. Go to the folder
:file:`C:\\Users\\USERNAME\\AppData\\Local` in Windows explorer and then
right click on the :file:`Temp` folder and select :guilabel:`Properties` and go to
the :guilabel:`Security` tab. Make sure that your user account has full control
for this folder.
If you still cannot get the installer to work and you are on windows, you can use the `calibre portable install <http://calibre-ebook.com/download_portable>`_, which does not need an installer (it is just a zip file).

View File

@ -41,6 +41,7 @@ class TheIndependentNew(BasicNewsRecipe):
publication_type = 'newspaper'
masthead_url = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
encoding = 'utf-8'
compress_news_images = True
remove_tags =[
dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
dict(attrs={'class' : ['autoplay','openBiogPopup']}),

View File

@ -1,5 +1,6 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TVXS(BasicNewsRecipe):
@ -8,19 +9,30 @@ class TVXS(BasicNewsRecipe):
description = 'News from Greece'
max_articles_per_feed = 100
oldest_article = 3
simultaneous_downloads = 1
publisher = 'TVXS'
category = 'news, GR'
category = 'news, sport, greece'
language = 'el'
encoding = None
use_embedded_content = False
remove_empty_feeds = True
#conversion_options = { 'linearize_tables': True}
conversion_options = {'smarten_punctuation': True}
no_stylesheets = True
publication_type = 'newspaper'
remove_tags_before = dict(name='h1',attrs={'class':'print-title'})
remove_tags_after = dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'})
remove_attributes = ['width', 'src', 'header', 'footer']
remove_tags = [dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'}),
dict(name='div',attrs={'class':'field field-type-filefield field-field-image-gallery'}),
dict(name='div',attrs={'class':'filefield-file'})]
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
table { width: 100%; } \
td img { display: block; margin: 5px auto; } \
ul { padding-top: 10px; } \
ol { padding-top: 10px; } \
li { padding-top: 5px; padding-bottom: 5px; } \
h1 { text-align: center; font-size: 125%; font-weight: bold; } \
h2, h3, h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''), (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
feeds = [(u'Ελλάδα', 'http://tvxs.gr/feeds/2/feed.xml'),
(u'Κόσμος', 'http://tvxs.gr/feeds/5/feed.xml'),
@ -35,17 +47,10 @@ class TVXS(BasicNewsRecipe):
(u'Ιστορία', 'http://tvxs.gr/feeds/1573/feed.xml'),
(u'Χιούμορ', 'http://tvxs.gr/feeds/692/feed.xml')]
def print_version(self, url):
import urllib2, urlparse, StringIO, gzip
fp = urllib2.urlopen(url)
data = fp.read()
if fp.info()['content-encoding'] == 'gzip':
gzip_data = StringIO.StringIO(data)
gzipper = gzip.GzipFile(fileobj=gzip_data)
data = gzipper.read()
fp.close()
br = self.get_browser()
response = br.open(url)
data = response.read()
pos_1 = data.find('<a href="/print/')
if pos_1 == -1:
@ -57,5 +62,5 @@ class TVXS(BasicNewsRecipe):
pos_1 += len('<a href="')
new_url = data[pos_1:pos_2]
print_url = urlparse.urljoin(url, new_url)
print_url = "http://tvxs.gr" + new_url
return print_url

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 9, 26)
numeric_version = (0, 9, 27)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -68,7 +68,6 @@ class Resource(object): # {{{
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
self.fragment = url[-1]
def href(self, basedir=None):
'''
Return a URL pointing to this resource. If it is a file on the filesystem
@ -180,7 +179,6 @@ class ManifestItem(Resource): # {{{
self.mime_type = val
return property(fget=fget, fset=fset)
def __unicode__(self):
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
@ -190,7 +188,6 @@ class ManifestItem(Resource): # {{{
def __repr__(self):
return unicode(self)
def __getitem__(self, index):
if index == 0:
return self.href()
@ -245,7 +242,6 @@ class Manifest(ResourceCollection): # {{{
ResourceCollection.__init__(self)
self.next_id = 1
def item(self, id):
for i in self:
if i.id == id:
@ -309,13 +305,10 @@ class Spine(ResourceCollection): # {{{
continue
return s
def __init__(self, manifest):
ResourceCollection.__init__(self)
self.manifest = manifest
def replace(self, start, end, ids):
'''
Replace the items between start (inclusive) and end (not inclusive) with
@ -363,7 +356,6 @@ class Guide(ResourceCollection): # {{{
ans += 'title="%s" '%self.title
return ans + '/>'
@staticmethod
def from_opf_guide(references, base_dir=os.getcwdu()):
coll = Guide()
@ -489,9 +481,9 @@ class OPF(object): # {{{
MIMETYPE = 'application/oebps-package+xml'
PARSER = etree.XMLParser(recover=True)
NAMESPACES = {
None : "http://www.idpf.org/2007/opf",
'dc' : "http://purl.org/dc/elements/1.1/",
'opf' : "http://www.idpf.org/2007/opf",
None: "http://www.idpf.org/2007/opf",
'dc': "http://purl.org/dc/elements/1.1/",
'opf': "http://www.idpf.org/2007/opf",
}
META = '{%s}meta' % NAMESPACES['opf']
xpn = NAMESPACES.copy()
@ -501,9 +493,10 @@ class OPF(object): # {{{
CONTENT = XPath('self::*[re:match(name(), "meta$", "i")]/@content')
TEXT = XPath('string()')
metadata_path = XPath('descendant::*[re:match(name(), "metadata", "i")]')
metadata_elem_path = XPath('descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
metadata_elem_path = XPath(
'descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") '
'and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
title_path = XPath('descendant::*[re:match(name(), "title", "i")]')
authors_path = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut" or (not(@role) and not(@opf:role)))]')
bkp_path = XPath('descendant::*[re:match(name(), "contributor", "i") and (@role="bkp" or @opf:role="bkp")]')
@ -640,7 +633,8 @@ class OPF(object): # {{{
if 'toc' in item.href().lower():
toc = item.path
if toc is None: return
if toc is None:
return
self.toc = TOC(base_path=self.base_dir)
is_ncx = getattr(self, 'manifest', None) is not None and \
self.manifest.type_for_id(toc) is not None and \
@ -976,7 +970,6 @@ class OPF(object): # {{{
return property(fget=fget, fset=fset)
@dynamic_property
def language(self):
@ -990,7 +983,6 @@ class OPF(object): # {{{
return property(fget=fget, fset=fset)
@dynamic_property
def languages(self):
@ -1015,7 +1007,6 @@ class OPF(object): # {{{
return property(fget=fget, fset=fset)
@dynamic_property
def book_producer(self):
@ -1196,7 +1187,6 @@ class OPFCreator(Metadata):
if self.cover:
self.guide.set_cover(self.cover)
def create_manifest(self, entries):
'''
Create <manifest>
@ -1615,9 +1605,9 @@ def test_user_metadata():
from cStringIO import StringIO
mi = Metadata('Test title', ['test author1', 'test author2'])
um = {
'#myseries': { '#value#': u'test series\xe4', 'datatype':'text',
'#myseries': {'#value#': u'test series\xe4', 'datatype':'text',
'is_multiple': None, 'name': u'My Series'},
'#myseries_index': { '#value#': 2.45, 'datatype': 'float',
'#myseries_index': {'#value#': 2.45, 'datatype': 'float',
'is_multiple': None},
'#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
'is_multiple': '|', 'name': u'My Tags'}

View File

@ -51,9 +51,11 @@ def reverse_tag_iter(block):
end = len(block)
while True:
pgt = block.rfind(b'>', 0, end)
if pgt == -1: break
if pgt == -1:
break
plt = block.rfind(b'<', 0, pgt)
if plt == -1: break
if plt == -1:
break
yield block[plt:pgt+1]
end = plt
@ -231,12 +233,12 @@ class Mobi8Reader(object):
flowpart = self.flows[j]
nstr = '%04d' % j
m = svg_tag_pattern.search(flowpart)
if m != None:
if m is not None:
# svg
typ = 'svg'
start = m.start()
m2 = image_tag_pattern.search(flowpart)
if m2 != None:
if m2 is not None:
format = 'inline'
dir = None
fname = None
@ -406,6 +408,10 @@ class Mobi8Reader(object):
else:
imgtype = what(None, data)
if imgtype is None:
from calibre.utils.magick.draw import identify_data
try:
imgtype = identify_data(data)[2]
except Exception:
imgtype = 'unknown'
href = 'images/%05d.%s'%(fname_idx, imgtype)
with open(href.replace('/', os.sep), 'wb') as f:

View File

@ -72,7 +72,8 @@ def explode(path, dest, question=lambda x:True):
dest), no_output=True)['result']
def set_cover(oeb):
if 'cover' not in oeb.guide or oeb.metadata['cover']: return
if 'cover' not in oeb.guide or oeb.metadata['cover']:
return
cover = oeb.guide['cover']
if cover.href in oeb.manifest.hrefs:
item = oeb.manifest.hrefs[cover.href]
@ -95,8 +96,9 @@ def rebuild(src_dir, dest_path):
if not opf:
raise ValueError('No OPF file found in %s'%src_dir)
opf = opf[0]
# For debugging, uncomment the following line
# def fork_job(a, b, args=None, no_output=True): do_rebuild(*args)
# For debugging, uncomment the following two lines
# def fork_job(a, b, args=None, no_output=True):
# do_rebuild(*args)
fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
no_output=True)

View File

@ -69,7 +69,8 @@ class Resources(object):
cover_href = item.href
for item in self.oeb.manifest.values():
if item.media_type not in OEB_RASTER_IMAGES: continue
if item.media_type not in OEB_RASTER_IMAGES:
continue
try:
data = self.process_image(item.data)
except:
@ -116,8 +117,8 @@ class Resources(object):
Add any images that were created after the call to add_resources()
'''
for item in self.oeb.manifest.values():
if (item.media_type not in OEB_RASTER_IMAGES or item.href in
self.item_map): continue
if (item.media_type not in OEB_RASTER_IMAGES or item.href in self.item_map):
continue
try:
data = self.process_image(item.data)
except:

View File

@ -270,7 +270,7 @@ BINARY_MIME = 'application/octet-stream'
XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css', 'xhtml/css'])
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
'text/x-oeb-document'])
OEB_RASTER_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME])

View File

@ -7,7 +7,6 @@ __license__ = 'GPL 3'
__copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from base64 import b64encode
from contextlib import closing

View File

@ -24,7 +24,8 @@ def what(file, h=None):
if res:
return res
finally:
if f: f.close()
if f:
f.close()
return None
@ -38,7 +39,7 @@ def test_jpeg(h, f):
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
the original code was failing with some jpegs that included ICC_PROFILE
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and b'JFIF' in h[:32]):
if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and (b'JFIF' in h[:32] or b'8BIM' in h[:32])):
return 'jpeg'
tests.append(test_jpeg)