Pull from trunk

This commit is contained in:
Kovid Goyal 2009-04-12 12:16:16 -07:00
commit 0b59ff9c09
14 changed files with 284 additions and 104 deletions

View File

@ -263,6 +263,17 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.mobi import set_metadata
set_metadata(stream, mi)
class PDFMetadataWriter(MetadataWriterPlugin):
name = 'Set PDF metadata'
file_types = set(['pdf'])
description = _('Set metadata in %s files') % 'PDF'
author = 'John Schember'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.pdf import set_metadata
set_metadata(stream, mi)
from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput

View File

@ -2,7 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, shutil, traceback, functools, sys
import os, shutil, traceback, functools, sys, re
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin
@ -55,7 +55,14 @@ def load_plugin(path_to_zip_file):
for name in zf.namelist():
if name.lower().endswith('plugin.py'):
locals = {}
exec zf.read(name) in locals
raw = zf.read(name)
match = re.search(r'coding[:=]\s*([-\w.]+)', raw[:300])
encoding = 'utf-8'
if match is not None:
encoding = match.group(1)
raw = raw.decode(encoding)
raw = re.sub('\r\n', '\n', raw)
exec raw in locals
for x in locals.values():
if isinstance(x, type) and issubclass(x, Plugin):
if x.minimum_calibre_version > version or \

View File

@ -31,6 +31,11 @@ Run an embedded python interpreter.
parser.add_option('--migrate', action='store_true', default=False,
help='Migrate old database. Needs two arguments. Path '
'to library1.db and path to new library folder.')
parser.add_option('--add-simple-plugin', default=None,
help='Add a simple plugin (i.e. a plugin that consists of only a '
'.py file), by specifying the path to the py file containing the '
'plugin code.')
return parser
def update_zipfile(zipfile, mod, path):
@ -115,6 +120,22 @@ def debug_device_driver():
print 'Total space:', d.total_space()
break
def add_simple_plugin(path_to_plugin):
import tempfile, zipfile, shutil
tdir = tempfile.mkdtemp()
open(os.path.join(tdir, 'custom_plugin.py'),
'wb').write(open(path_to_plugin, 'rb').read())
odir = os.getcwd()
os.chdir(tdir)
zf = zipfile.ZipFile('plugin.zip', 'w')
zf.write('custom_plugin.py')
zf.close()
from calibre.customize.ui import main
main(['calibre-customize', '-a', 'plugin.zip'])
os.chdir(odir)
shutil.rmtree(tdir)
def main(args=sys.argv):
opts, args = option_parser().parse_args(args)
@ -137,6 +158,8 @@ def main(args=sys.argv):
print 'You must specify the path to library1.db and the path to the new library folder'
return 1
migrate(args[1], args[2])
elif opts.add_simple_plugin is not None:
add_simple_plugin(opts.add_simple_plugin)
else:
from IPython.Shell import IPShellEmbed
ipshell = IPShellEmbed()

View File

@ -57,13 +57,18 @@ class USBMS(Device):
prefix = self._card_prefix if oncard else self._main_prefix
ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
# Get all books in all directories under the root ebook_dir directory
# Get all books in the ebook_dir directory
if self.SUPPORTS_SUB_DIRS:
for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
# Filter out anything that isn't in the list of supported ebook
# types
# Filter out anything that isn't in the list of supported ebook types
for book_type in self.FORMATS:
for filename in fnmatch.filter(files, '*.%s' % (book_type)):
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
else:
path = os.path.join(prefix, ebook_dir)
for filename in os.listdir(path):
if path_to_ext(filename) in self.FORMATS:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
return bl
def upload_books(self, files, names, on_card=False, end_session=True,

View File

@ -1,11 +1,10 @@
'''Read meta information from PDF files'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files'''
import sys, os, re, StringIO
import sys, os, StringIO
from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.ptempfile import TemporaryDirectory
@ -52,18 +51,27 @@ def get_metadata(stream, extract_cover=True):
def set_metadata(stream, mi):
stream.seek(0)
raw = stream.read()
if mi.title:
tit = mi.title.encode('utf-8') if isinstance(mi.title, unicode) else mi.title
raw = re.compile(r'<<.*?/Title\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), tit), raw)
if mi.authors:
au = authors_to_string(mi.authors)
if isinstance(au, unicode):
au = au.encode('utf-8')
raw = re.compile(r'<<.*?/Author\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), au), raw)
# Use a StringIO object for the pdf because we will want to over
# write it later and if we are working on the stream directly it
# could cause some issues.
raw = StringIO.StringIO(stream.read())
orig_pdf = PdfFileReader(raw)
title = mi.title if mi.title else orig_pdf.documentInfo.title
author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
out_pdf = PdfFileWriter(title=title, author=author)
for page in orig_pdf.pages:
out_pdf.addPage(page)
out_str = StringIO.StringIO()
out_pdf.write(out_str)
stream.seek(0)
stream.truncate()
stream.write(raw)
out_str.seek(0)
stream.write(out_str.read())
stream.seek(0)
def get_cover(stream):

View File

@ -15,7 +15,8 @@ except ImportError:
from lxml import html, etree
from calibre import entity_to_unicode
from calibre import entity_to_unicode, sanitize_file_name
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS
from calibre.ebooks.mobi import MobiError
@ -25,7 +26,6 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.ebooks.metadata.toc import TOC
from calibre import sanitize_file_name
class EXTHHeader(object):
@ -157,6 +157,62 @@ class BookHeader(object):
self.exth.mi.language = self.language
class MetadataHeader(BookHeader):
def __init__(self, stream):
self.stream = stream
self.ident = self.identity()
self.num_sections = self.section_count()
if self.num_sections >= 2:
header = self.header()
BookHeader.__init__(self, header, self.ident, None)
else:
self.exth = None
def identity(self):
self.stream.seek(60)
ident = self.stream.read(8).upper()
if ident not in ['BOOKMOBI', 'TEXTREAD']:
raise MobiError('Unknown book type: %s' % ident)
return ident
def section_count(self):
self.stream.seek(76)
return struct.unpack('>H', self.stream.read(2))[0]
def section_offset(self, number):
self.stream.seek(78+number*8)
return struct.unpack('>LBBBB', self.stream.read(8))[0]
def header(self):
section_headers = []
# First section with the metadata
section_headers.append(self.section_offset(0))
# Second section used to get the lengh of the first
section_headers.append(self.section_offset(1))
end_off = section_headers[1]
off = section_headers[0]
self.stream.seek(off)
return self.stream.read(end_off - off)
def section_data(self, number):
start = self.section_offset(number)
if number == self.num_sections -1:
end = os.stat(self.stream.name).st_size
else:
end = self.section_offset(number + 1)
self.stream.seek(start)
return self.stream.read(end - start)
class MobiReader(object):
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
@ -593,27 +649,34 @@ class MobiReader(object):
im.convert('RGB').save(open(path, 'wb'), format='JPEG')
def get_metadata(stream):
from calibre.utils.logging import Log
log = Log()
mr = MobiReader(stream, log)
if mr.book_header.exth is None:
mi = MetaInformation(mr.name, [_('Unknown')])
else:
mi = mr.create_opf('dummy.html')[0]
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
try:
if hasattr(mr.book_header.exth, 'cover_offset'):
cover_index = mr.book_header.first_image_index + \
mr.book_header.exth.cover_offset
data = mr.sections[int(cover_index)][0]
mh = MetadataHeader(stream)
if mh.exth is not None:
if mh.exth.mi is not None:
mi = mh.exth.mi
else:
data = mr.sections[mr.book_header.first_image_index][0]
with TemporaryDirectory('_mobi_meta_reader') as tdir:
mr = MobiReader(stream)
mr.extract_content(tdir, {})
if mr.embedded_mi is not None:
mi = mr.embedded_mi
if hasattr(mh.exth, 'cover_offset'):
cover_index = mh.first_image_index + mh.exth.cover_offset
data = mh.section_data(int(cover_index))
else:
data = mh.section_data(mh.first_image_index)
buf = cStringIO.StringIO(data)
im = PILImage.open(buf)
obuf = cStringIO.StringIO()
im.convert('RGBA').save(obuf, format='JPEG')
mi.cover_data = ('jpg', obuf.getvalue())
except:
log.exception()
import traceback
traceback.print_exc()
return mi

View File

@ -67,6 +67,8 @@ def _config():
c.add_opt('default_send_to_device_action', default=None,
help=_('Default action to perform when send to device button is '
'clicked'))
c.add_opt('show_donate_button', default=True,
help='Show donation button')
return ConfigProxy(c)
config = _config()

View File

@ -658,7 +658,9 @@ class DeviceGUI(object):
bad = '\n'.join('<li>%s</li>'%(i,) for i in bad)
d = warning_dialog(self, _('No suitable formats'),
_('Could not upload the following books to the device, '
'as no suitable formats were found:<br><ul>%s</ul>')%(bad,))
'as no suitable formats were found. Try changing the output '
'format in the upper right corner next to the red heart and '
're-converting. <br><ul>%s</ul>')%(bad,))
d.exec_()
def upload_booklists(self):

View File

@ -176,19 +176,19 @@ class Config(ResizableDialog, Ui_Dialog):
def get_metadata(self):
title, authors = self.get_title_and_authors()
mi = MetaInformation(title, authors)
publisher = unicode(self.publisher.text())
publisher = unicode(self.publisher.text()).strip()
if publisher:
mi.publisher = publisher
author_sort = unicode(self.author_sort.text())
author_sort = unicode(self.author_sort.text()).strip()
if author_sort:
mi.author_sort = author_sort
comments = unicode(self.comment.toPlainText())
comments = unicode(self.comment.toPlainText()).strip()
if comments:
mi.comments = comments
mi.series_index = int(self.series_index.value())
if self.series.currentIndex() > -1:
mi.series = unicode(self.series.currentText())
tags = [t.strip() for t in unicode(self.tags.text()).split(',')]
mi.series = unicode(self.series.currentText()).strip()
tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
if tags:
mi.tags = tags
@ -267,6 +267,7 @@ class Config(ResizableDialog, Ui_Dialog):
).exec_()
return
mi = self.get_metadata()
self.user_mi = mi
self.read_settings()
self.cover_file = None
if self.row is not None:

View File

@ -108,6 +108,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.donate_action = self.system_tray_menu.addAction(
QIcon(':/images/donate.svg'), _('&Donate to support calibre'))
self.donate_button.setDefaultAction(self.donate_action)
if not config['show_donate_button']:
self.donate_button.setVisible(False)
self.addAction(self.quit_action)
self.action_restart = QAction(_('&Restart'), self)
self.addAction(self.action_restart)

View File

@ -225,10 +225,11 @@ def convert_bulk(fmt, parent, db, comics, others):
if others:
d = get_dialog(fmt)(parent, db)
if d.exec_() != QDialog.Accepted:
others = []
others, user_mi = [], None
else:
opts = d.opts
opts.verbose = 2
user_mi = d.user_mi
if comics:
comic_opts = ComicConf.get_bulk_conversion_options(parent)
if not comic_opts:
@ -256,6 +257,11 @@ def convert_bulk(fmt, parent, db, comics, others):
continue
options = opts.copy()
mi = db.get_metadata(row)
if user_mi is not None:
if user_mi.series_index == 1:
user_mi.series_index = None
mi.smart_update(user_mi)
db.set_metadata(db.id(row), mi)
opf = OPFCreator(os.getcwdu(), mi)
opf_file = PersistentTemporaryFile('.opf')
opf.render(opf_file)

View File

@ -39,6 +39,7 @@ recipe_modules = ['recipe_' + r for r in (
'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
'moneynews',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
moneynews.newsmax.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoneyNews(BasicNewsRecipe):
title = 'Moneynews.com'
__author__ = 'Darko Miletic'
description = 'Financial news worldwide'
publisher = 'moneynews.com'
category = 'news, finances, USA, business'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
feeds = [
(u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' )
,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' )
,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' )
,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' )
,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' )
]
keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
remove_tags = [
dict(name='td' , attrs={'id':'article_fontsize'})
,dict(name='table', attrs={'id':'toolbox' })
,dict(name='tr' , attrs={'id':'noprint3' })
]