mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
c013ece5af
@ -586,6 +586,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
|
|||||||
- Focus the book list
|
- Focus the book list
|
||||||
* - :kbd:`Ctrl+Esc`
|
* - :kbd:`Ctrl+Esc`
|
||||||
- Clear the virtual library
|
- Clear the virtual library
|
||||||
|
* - :kbd:`Alt+Esc`
|
||||||
|
- Clear the additional restriction
|
||||||
* - :kbd:`N or F3`
|
* - :kbd:`N or F3`
|
||||||
- Find the next book that matches the current search (only works if the highlight checkbox next to the search bar is checked)
|
- Find the next book that matches the current search (only works if the highlight checkbox next to the search bar is checked)
|
||||||
* - :kbd:`Shift+N or Shift+F3`
|
* - :kbd:`Shift+N or Shift+F3`
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import re, random
|
import random
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
@ -8,7 +8,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
title = u'The Sun UK'
|
title = u'The Sun UK'
|
||||||
description = 'Articles from The Sun tabloid UK'
|
description = 'Articles from The Sun tabloid UK'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 19/10/12 better cover fetch
|
# last updated 5/5/13 better cover fetch
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 15
|
max_articles_per_feed = 15
|
||||||
@ -29,16 +29,12 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
dict(name='div',attrs={'class' : 'intro'}),
|
dict(name='div',attrs={'class' : 'intro'}),
|
||||||
dict(name='h3'),
|
dict(name='h3'),
|
||||||
dict(name='div',attrs={'id' : 'articlebody'}),
|
dict(name='div',attrs={'id' : 'articlebody'}),
|
||||||
#dict(attrs={'class' : ['right_col_branding','related-stories','mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
|
|
||||||
# dict(name='div',attrs={'class' : 'cf'}),
|
|
||||||
# dict(attrs={'title' : 'download flash'}),
|
|
||||||
# dict(attrs={'style' : 'padding: 5px'})
|
|
||||||
|
|
||||||
]
|
]
|
||||||
remove_tags_after = [dict(id='bodyText')]
|
remove_tags_after = [dict(id='bodyText')]
|
||||||
remove_tags=[
|
remove_tags=[
|
||||||
dict(name='li'),
|
dict(name='li'),
|
||||||
dict(attrs={'class' : 'grid-4 right-hand-column'}),
|
dict(attrs={'class' : 'grid-4 right-hand-column'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -47,40 +43,24 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
(u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
|
(u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
|
||||||
(u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
|
(u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
|
||||||
]
|
]
|
||||||
# starsons code
|
# starsons code
|
||||||
def parse_feeds (self):
|
def parse_feeds(self):
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
for feed in feeds:
|
for feed in feeds:
|
||||||
for article in feed.articles[:]:
|
for article in feed.articles[:]:
|
||||||
print 'article.title is: ', article.title
|
if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
|
||||||
if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
|
feed.articles.remove(article)
|
||||||
feed.articles.remove(article)
|
if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
|
||||||
if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
|
feed.articles.remove(article)
|
||||||
feed.articles.remove(article)
|
return feeds
|
||||||
return feeds
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
|
||||||
# look for the block containing the sun button and url
|
|
||||||
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
|
|
||||||
|
|
||||||
#cov = soup.find(attrs={'id' : 'large'})
|
|
||||||
cov2 = str(cov)
|
|
||||||
|
|
||||||
cov2='http://www.politicshome.com'+cov2[9:-133]
|
|
||||||
#cov2 now contains url of the page containing pic
|
|
||||||
#cov2 now contains url of the page containing pic
|
|
||||||
soup = self.index_to_soup(cov2)
|
|
||||||
cov = soup.find(attrs={'id' : 'large'})
|
|
||||||
cov=str(cov)
|
|
||||||
cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
|
|
||||||
cov2 = str(cov2)
|
|
||||||
cov2=cov2[2:len(cov2)-2]
|
|
||||||
br = browser()
|
br = browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
|
cover_url = 'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
br.open_novisit(cov2)
|
br.open_novisit('http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg')
|
||||||
cover_url = cov2
|
|
||||||
except:
|
except:
|
||||||
cover_url = random.choice([
|
cover_url = random.choice([
|
||||||
'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg'
|
'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg'
|
||||||
@ -88,6 +68,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg'
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg'
|
||||||
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg'
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg'
|
||||||
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg'
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg'
|
||||||
])
|
])
|
||||||
|
|
||||||
return cover_url
|
return cover_url
|
||||||
|
11
src/calibre/ebooks/docx/__init__.py
Normal file
11
src/calibre/ebooks/docx/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
class InvalidDOCX(ValueError):
|
||||||
|
pass
|
||||||
|
|
203
src/calibre/ebooks/docx/container.py
Normal file
203
src/calibre/ebooks/docx/container.py
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import os, sys, shutil
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre import walk, guess_type
|
||||||
|
from calibre.ebooks.metadata import string_to_authors
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
from calibre.ebooks.docx import InvalidDOCX
|
||||||
|
from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
||||||
|
|
||||||
|
def fromstring(raw, parser=RECOVER_PARSER):
|
||||||
|
return etree.fromstring(raw, parser=parser)
|
||||||
|
|
||||||
|
# Read metadata {{{
|
||||||
|
def read_doc_props(raw, mi):
|
||||||
|
root = fromstring(raw)
|
||||||
|
titles = XPath('//dc:title')(root)
|
||||||
|
if titles:
|
||||||
|
title = titles[0].text
|
||||||
|
if title and title.strip():
|
||||||
|
mi.title = title.strip()
|
||||||
|
tags = []
|
||||||
|
for subject in XPath('//dc:subject')(root):
|
||||||
|
if subject.text and subject.text.strip():
|
||||||
|
tags.append(subject.text.strip().replace(',', '_'))
|
||||||
|
for keywords in XPath('//cp:keywords')(root):
|
||||||
|
if keywords.text and keywords.text.strip():
|
||||||
|
for x in keywords.text.split():
|
||||||
|
tags.extend(y.strip() for y in x.split(','))
|
||||||
|
if tags:
|
||||||
|
mi.tags = tags
|
||||||
|
authors = XPath('//dc:creator')(root)
|
||||||
|
aut = []
|
||||||
|
for author in authors:
|
||||||
|
if author.text and author.text.strip():
|
||||||
|
aut.extend(string_to_authors(author.text))
|
||||||
|
if aut:
|
||||||
|
mi.authors = aut
|
||||||
|
|
||||||
|
desc = XPath('//dc:description')(root)
|
||||||
|
if desc:
|
||||||
|
raw = etree.tostring(desc[0], method='text', encoding=unicode)
|
||||||
|
mi.comments = raw
|
||||||
|
|
||||||
|
langs = []
|
||||||
|
for lang in XPath('//dc:language')(root):
|
||||||
|
if lang.text and lang.text.strip():
|
||||||
|
l = canonicalize_lang(lang.text)
|
||||||
|
if l:
|
||||||
|
langs.append(l)
|
||||||
|
if langs:
|
||||||
|
mi.languages = langs
|
||||||
|
|
||||||
|
def read_app_props(raw, mi):
|
||||||
|
root = fromstring(raw)
|
||||||
|
company = root.xpath('//*[local-name()="Company"]')
|
||||||
|
if company and company[0].text and company[0].text.strip():
|
||||||
|
mi.publisher = company[0].text.strip()
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class DOCX(object):
|
||||||
|
|
||||||
|
def __init__(self, path_or_stream, log=None, extract=True):
|
||||||
|
stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb')
|
||||||
|
self.name = getattr(stream, 'name', None) or '<stream>'
|
||||||
|
self.log = log or default_log
|
||||||
|
if extract:
|
||||||
|
self.extract(stream)
|
||||||
|
else:
|
||||||
|
self.init_zipfile(stream)
|
||||||
|
self.read_content_types()
|
||||||
|
self.read_package_relationships()
|
||||||
|
|
||||||
|
def init_zipfile(self, stream):
|
||||||
|
self.zipf = ZipFile(stream)
|
||||||
|
self.names = frozenset(self.zipf.namelist())
|
||||||
|
|
||||||
|
def extract(self, stream):
|
||||||
|
self.tdir = PersistentTemporaryDirectory('docx_container')
|
||||||
|
try:
|
||||||
|
zf = ZipFile(stream)
|
||||||
|
zf.extractall(self.tdir)
|
||||||
|
except:
|
||||||
|
self.log.exception('DOCX appears to be invalid ZIP file, trying a'
|
||||||
|
' more forgiving ZIP parser')
|
||||||
|
from calibre.utils.localunzip import extractall
|
||||||
|
stream.seek(0)
|
||||||
|
extractall(stream, self.tdir)
|
||||||
|
|
||||||
|
self.names = {}
|
||||||
|
for f in walk(self.tdir):
|
||||||
|
name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
|
||||||
|
self.names[name] = f
|
||||||
|
|
||||||
|
def read(self, name):
|
||||||
|
if hasattr(self, 'zipf'):
|
||||||
|
return self.zipf.open(name).read()
|
||||||
|
path = self.names[name]
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def read_content_types(self):
|
||||||
|
try:
|
||||||
|
raw = self.read('[Content_Types].xml')
|
||||||
|
except KeyError:
|
||||||
|
raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name)
|
||||||
|
root = fromstring(raw)
|
||||||
|
self.content_types = {}
|
||||||
|
self.default_content_types = {}
|
||||||
|
for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'):
|
||||||
|
self.default_content_types[item.get('Extension').lower()] = item.get('ContentType')
|
||||||
|
for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'):
|
||||||
|
name = item.get('PartName').lstrip('/')
|
||||||
|
self.content_types[name] = item.get('ContentType')
|
||||||
|
|
||||||
|
def content_type(self, name):
|
||||||
|
if name in self.content_types:
|
||||||
|
return self.content_types[name]
|
||||||
|
ext = name.rpartition('.')[-1].lower()
|
||||||
|
if ext in self.default_content_types:
|
||||||
|
return self.default_content_types[ext]
|
||||||
|
return guess_type(name)[0]
|
||||||
|
|
||||||
|
def read_package_relationships(self):
|
||||||
|
try:
|
||||||
|
raw = self.read('_rels/.rels')
|
||||||
|
except KeyError:
|
||||||
|
raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name)
|
||||||
|
root = fromstring(raw)
|
||||||
|
self.relationships = {}
|
||||||
|
self.relationships_rmap = {}
|
||||||
|
for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
|
||||||
|
target = item.get('Target').lstrip('/')
|
||||||
|
typ = item.get('Type')
|
||||||
|
self.relationships[typ] = target
|
||||||
|
self.relationships_rmap[target] = typ
|
||||||
|
|
||||||
|
@property
|
||||||
|
def document(self):
|
||||||
|
name = self.relationships.get(DOCUMENT, None)
|
||||||
|
if name is None:
|
||||||
|
names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
|
||||||
|
if not names:
|
||||||
|
raise InvalidDOCX('The file %s docx file has no main document' % self.name)
|
||||||
|
name = names[0]
|
||||||
|
return fromstring(self.read(name))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def metadata(self):
|
||||||
|
mi = Metadata(_('Unknown'))
|
||||||
|
name = self.relationships.get(DOCPROPS, None)
|
||||||
|
if name is None:
|
||||||
|
names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
|
||||||
|
if names:
|
||||||
|
name = names[0]
|
||||||
|
if name:
|
||||||
|
try:
|
||||||
|
raw = self.read(name)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
read_doc_props(raw, mi)
|
||||||
|
|
||||||
|
name = self.relationships.get(APPPROPS, None)
|
||||||
|
if name is None:
|
||||||
|
names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
|
||||||
|
if names:
|
||||||
|
name = names[0]
|
||||||
|
if name:
|
||||||
|
try:
|
||||||
|
raw = self.read(name)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
read_app_props(raw, mi)
|
||||||
|
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if hasattr(self, 'zipf'):
|
||||||
|
self.zipf.close()
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
shutil.rmtree(self.tdir)
|
||||||
|
except EnvironmentError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
d = DOCX(sys.argv[-1], extract=False)
|
||||||
|
print (d.metadata)
|
47
src/calibre/ebooks/docx/names.py
Normal file
47
src/calibre/ebooks/docx/names.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from lxml.etree import XPath as X
|
||||||
|
|
||||||
|
DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
|
||||||
|
DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
|
||||||
|
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
|
||||||
|
|
||||||
|
namespaces = {
|
||||||
|
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||||
|
'o': 'urn:schemas-microsoft-com:office:office',
|
||||||
|
've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
|
||||||
|
# Text Content
|
||||||
|
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
|
||||||
|
'w10': 'urn:schemas-microsoft-com:office:word',
|
||||||
|
'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
|
||||||
|
# Drawing
|
||||||
|
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
|
||||||
|
'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
|
||||||
|
'mv': 'urn:schemas-microsoft-com:mac:vml',
|
||||||
|
'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
|
||||||
|
'v': 'urn:schemas-microsoft-com:vml',
|
||||||
|
'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
|
||||||
|
# Properties (core and extended)
|
||||||
|
'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
|
||||||
|
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||||
|
'ep': 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
|
||||||
|
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
|
||||||
|
# Content Types
|
||||||
|
'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
|
||||||
|
# Package Relationships
|
||||||
|
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
|
||||||
|
'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
|
||||||
|
# Dublin Core document properties
|
||||||
|
'dcmitype': 'http://purl.org/dc/dcmitype/',
|
||||||
|
'dcterms': 'http://purl.org/dc/terms/'
|
||||||
|
}
|
||||||
|
|
||||||
|
def XPath(expr):
|
||||||
|
return X(expr, namespaces=namespaces)
|
||||||
|
|
41
src/calibre/ebooks/docx/to_html.py
Normal file
41
src/calibre/ebooks/docx/to_html.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
from lxml.html.builder import (HTML, HEAD, TITLE, BODY, LINK, META)
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.container import Container
|
||||||
|
|
||||||
|
class Convert(object):
|
||||||
|
|
||||||
|
def __init__(self, path_or_stream, dest_dir=None, log=None):
|
||||||
|
self.container = Container(path_or_stream, log=log)
|
||||||
|
self.log = self.container.log
|
||||||
|
self.dest_dir = dest_dir or os.getcwdu()
|
||||||
|
self.body = BODY()
|
||||||
|
self.html = HTML(
|
||||||
|
HEAD(
|
||||||
|
META(charset='utf-8'),
|
||||||
|
TITLE('TODO: read from metadata'),
|
||||||
|
LINK(rel='stylesheet', type='text/css', href='docx.css'),
|
||||||
|
),
|
||||||
|
self.body
|
||||||
|
)
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
self.write()
|
||||||
|
|
||||||
|
def write(self):
|
||||||
|
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
||||||
|
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
||||||
|
f.write(raw)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
Convert(sys.argv[-1])()
|
@ -178,6 +178,8 @@ class Metadata(object):
|
|||||||
return key in object.__getattribute__(self, '_data')
|
return key in object.__getattribute__(self, '_data')
|
||||||
|
|
||||||
def deepcopy(self):
|
def deepcopy(self):
|
||||||
|
''' Do not use this method unless you know what you are doing, if you want to create a simple clone of
|
||||||
|
this object, use :method:`deepcopy_metadata` instead. '''
|
||||||
m = Metadata(None)
|
m = Metadata(None)
|
||||||
m.__dict__ = copy.deepcopy(self.__dict__)
|
m.__dict__ = copy.deepcopy(self.__dict__)
|
||||||
object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
|
object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
|
||||||
|
@ -14,16 +14,15 @@ class SafeFormat(TemplateFormatter):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
TemplateFormatter.__init__(self)
|
TemplateFormatter.__init__(self)
|
||||||
from calibre.ebooks.metadata.book.base import field_metadata
|
|
||||||
self.field_metadata = field_metadata
|
|
||||||
|
|
||||||
def get_value(self, orig_key, args, kwargs):
|
def get_value(self, orig_key, args, kwargs):
|
||||||
if not orig_key:
|
if not orig_key:
|
||||||
return ''
|
return ''
|
||||||
key = orig_key = orig_key.lower()
|
key = orig_key = orig_key.lower()
|
||||||
if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and \
|
if (key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and
|
||||||
key not in ALL_METADATA_FIELDS:
|
key not in ALL_METADATA_FIELDS):
|
||||||
key = self.field_metadata.search_term_to_field_key(key)
|
from calibre.ebooks.metadata.book.base import field_metadata
|
||||||
|
key = field_metadata.search_term_to_field_key(key)
|
||||||
if key is None or (self.book and
|
if key is None or (self.book and
|
||||||
key not in self.book.all_field_keys()):
|
key not in self.book.all_field_keys()):
|
||||||
if hasattr(self.book, orig_key):
|
if hasattr(self.book, orig_key):
|
||||||
|
@ -7,70 +7,21 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from lxml import etree
|
from calibre.ebooks.docx.container import DOCX
|
||||||
|
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.utils.magick.draw import identify_data
|
from calibre.utils.magick.draw import identify_data
|
||||||
from calibre.ebooks.oeb.base import DC11_NS
|
|
||||||
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
|
||||||
|
|
||||||
NSMAP = {'dc':DC11_NS,
|
|
||||||
'cp':'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'}
|
|
||||||
|
|
||||||
def XPath(expr):
|
|
||||||
return etree.XPath(expr, namespaces=NSMAP)
|
|
||||||
|
|
||||||
def _read_doc_props(raw, mi):
|
|
||||||
from calibre.ebooks.metadata import string_to_authors
|
|
||||||
root = etree.fromstring(raw, parser=RECOVER_PARSER)
|
|
||||||
titles = XPath('//dc:title')(root)
|
|
||||||
if titles:
|
|
||||||
title = titles[0].text
|
|
||||||
if title and title.strip():
|
|
||||||
mi.title = title.strip()
|
|
||||||
tags = []
|
|
||||||
for subject in XPath('//dc:subject')(root):
|
|
||||||
if subject.text and subject.text.strip():
|
|
||||||
tags.append(subject.text.strip().replace(',', '_'))
|
|
||||||
for keywords in XPath('//cp:keywords')(root):
|
|
||||||
if keywords.text and keywords.text.strip():
|
|
||||||
for x in keywords.text.split():
|
|
||||||
tags.extend(y.strip() for y in x.split(','))
|
|
||||||
if tags:
|
|
||||||
mi.tags = tags
|
|
||||||
authors = XPath('//dc:creator')(root)
|
|
||||||
aut = []
|
|
||||||
for author in authors:
|
|
||||||
if author.text and author.text.strip():
|
|
||||||
aut.extend(string_to_authors(author.text))
|
|
||||||
if aut:
|
|
||||||
mi.authors = aut
|
|
||||||
|
|
||||||
desc = XPath('//dc:description')(root)
|
|
||||||
if desc:
|
|
||||||
raw = etree.tostring(desc[0], method='text', encoding=unicode)
|
|
||||||
mi.comments = raw
|
|
||||||
|
|
||||||
def _read_app_props(raw, mi):
|
|
||||||
root = etree.fromstring(raw, parser=RECOVER_PARSER)
|
|
||||||
company = root.xpath('//*[local-name()="Company"]')
|
|
||||||
if company and company[0].text and company[0].text.strip():
|
|
||||||
mi.publisher = company[0].text.strip()
|
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
|
c = DOCX(stream, extract=False)
|
||||||
|
mi = c.metadata
|
||||||
|
c.close()
|
||||||
|
stream.seek(0)
|
||||||
|
cdata = None
|
||||||
with ZipFile(stream, 'r') as zf:
|
with ZipFile(stream, 'r') as zf:
|
||||||
|
|
||||||
mi = Metadata(_('Unknown'))
|
|
||||||
cdata = None
|
|
||||||
|
|
||||||
for zi in zf.infolist():
|
for zi in zf.infolist():
|
||||||
ext = zi.filename.rpartition('.')[-1].lower()
|
ext = zi.filename.rpartition('.')[-1].lower()
|
||||||
if zi.filename.lower() == 'docprops/core.xml':
|
if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
|
||||||
_read_doc_props(zf.read(zi), mi)
|
|
||||||
elif zi.filename.lower() == 'docprops/app.xml':
|
|
||||||
_read_app_props(zf.read(zi), mi)
|
|
||||||
elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
|
|
||||||
raw = zf.read(zi)
|
raw = zf.read(zi)
|
||||||
try:
|
try:
|
||||||
width, height, fmt = identify_data(raw)
|
width, height, fmt = identify_data(raw)
|
||||||
|
@ -24,6 +24,7 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
|
|||||||
urlnormalize, BINARY_MIME, \
|
urlnormalize, BINARY_MIME, \
|
||||||
OEBError, OEBBook, DirContainer
|
OEBError, OEBBook, DirContainer
|
||||||
from calibre.ebooks.oeb.writer import OEBWriter
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
@ -106,7 +107,7 @@ class OEBReader(object):
|
|||||||
try:
|
try:
|
||||||
opf = etree.fromstring(data)
|
opf = etree.fromstring(data)
|
||||||
except etree.XMLSyntaxError:
|
except etree.XMLSyntaxError:
|
||||||
data = xml_replace_entities(data, encoding=None)
|
data = xml_replace_entities(clean_xml_chars(data), encoding=None)
|
||||||
try:
|
try:
|
||||||
opf = etree.fromstring(data)
|
opf = etree.fromstring(data)
|
||||||
self.logger.warn('OPF contains invalid HTML named entities')
|
self.logger.warn('OPF contains invalid HTML named entities')
|
||||||
|
@ -240,9 +240,10 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
opf, cov = id_map[book_id]
|
opf, cov = id_map[book_id]
|
||||||
cfile = mi.cover
|
cfile = mi.cover
|
||||||
mi.cover, mi.cover_data = None, (None, None)
|
mi.cover, mi.cover_data = None, (None, None)
|
||||||
with open(opf, 'wb') as f:
|
if opf is not None:
|
||||||
f.write(metadata_to_opf(mi))
|
with open(opf, 'wb') as f:
|
||||||
if cfile:
|
f.write(metadata_to_opf(mi))
|
||||||
|
if cfile and cov:
|
||||||
shutil.copyfile(cfile, cov)
|
shutil.copyfile(cfile, cov)
|
||||||
os.remove(cfile)
|
os.remove(cfile)
|
||||||
nid_map[book_id] = id_map[book_id]
|
nid_map[book_id] = id_map[book_id]
|
||||||
|
@ -549,6 +549,9 @@ class SearchRestrictionMixin(object):
|
|||||||
restriction = ''
|
restriction = ''
|
||||||
self._apply_search_restriction(restriction, r)
|
self._apply_search_restriction(restriction, r)
|
||||||
|
|
||||||
|
def clear_additional_restriction(self):
|
||||||
|
self._apply_search_restriction('', '')
|
||||||
|
|
||||||
def _apply_search_restriction(self, restriction, name):
|
def _apply_search_restriction(self, restriction, name):
|
||||||
self.saved_search.clear()
|
self.saved_search.clear()
|
||||||
# The order below is important. Set the restriction, force a '' search
|
# The order below is important. Set the restriction, force a '' search
|
||||||
@ -561,6 +564,10 @@ class SearchRestrictionMixin(object):
|
|||||||
self.set_number_of_books_shown()
|
self.set_number_of_books_shown()
|
||||||
self.current_view().setFocus(Qt.OtherFocusReason)
|
self.current_view().setFocus(Qt.OtherFocusReason)
|
||||||
self.set_window_title()
|
self.set_window_title()
|
||||||
|
v = self.current_view()
|
||||||
|
if not v.currentIndex().isValid():
|
||||||
|
v.set_current_row()
|
||||||
|
v.refresh_book_details()
|
||||||
|
|
||||||
def set_number_of_books_shown(self):
|
def set_number_of_books_shown(self):
|
||||||
db = self.library_view.model().db
|
db = self.library_view.model().db
|
||||||
|
@ -279,6 +279,13 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
|
|||||||
action=self.ctrl_esc_action)
|
action=self.ctrl_esc_action)
|
||||||
self.ctrl_esc_action.triggered.connect(self.ctrl_esc)
|
self.ctrl_esc_action.triggered.connect(self.ctrl_esc)
|
||||||
|
|
||||||
|
self.alt_esc_action = QAction(self)
|
||||||
|
self.addAction(self.alt_esc_action)
|
||||||
|
self.keyboard.register_shortcut('clear additional restriction',
|
||||||
|
_('Clear the additional restriction'), default_keys=('Alt+Esc',),
|
||||||
|
action=self.alt_esc_action)
|
||||||
|
self.alt_esc_action.triggered.connect(self.clear_additional_restriction)
|
||||||
|
|
||||||
####################### Start spare job server ########################
|
####################### Start spare job server ########################
|
||||||
QTimer.singleShot(1000, self.add_spare_server)
|
QTimer.singleShot(1000, self.add_spare_server)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user