KG changes

This commit is contained in:
GRiker 2010-02-11 11:13:01 -07:00
commit 673ca5a2bd
16 changed files with 233 additions and 79 deletions

View File

@ -1,64 +1,63 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
danas.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Danas(BasicNewsRecipe):
title = 'Danas'
__author__ = 'Darko Miletic'
description = 'Vesti'
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
publisher = 'Danas d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = False
encoding = 'utf-8'
masthead_url = 'http://www.danas.rs/images/basic/danas.gif'
language = 'sr'
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
remove_tags = [
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
,dict(name='div', attrs={'id':'comments'})
,dict(name=['object','link'])
,dict(name=['object','link','iframe'])
]
feeds = [
(u'Vesti' , u'http://www.danas.rs/rss/rss.asp' )
,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
feeds = [
(u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24')
,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25')
,(u'Srbija' , u'http://www.danas.rs/rss/rss.asp?column_id=28')
,(u'Kultura' , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
,(u'Sport' , u'http://www.danas.rs/rss/rss.asp?column_id=13')
,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42')
,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19')
,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
]
def preprocess_html(self, soup):
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
for item in soup.findAll(style=True):
del item['style']
return soup
def print_version(self, url):
return url + '&action=print'

View File

@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
http://www.dilbert.com
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
preprocess_regexps = [
(re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
lambda match: 'strip.zoom.gif')
]
def get_article_url(self, article):
return article.get('feedburner_origlink', None)

View File

@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
objects.append(obj)
if self.newer(dest, objects):
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
'-lpng', '-lpthread']
if iswindows:
cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()

View File

@ -137,8 +137,20 @@ class Develop(Command):
self.setup_mount_helper()
self.install_files()
self.run_postinstall()
self.install_env_module()
self.success()
def install_env_module(self):
import distutils.sysconfig as s
libdir = s.get_python_lib(prefix=self.opts.staging_root)
if os.path.exists(libdir):
path = os.path.join(libdir, 'init_calibre.py')
self.info('Installing calibre environment module: '+path)
with open(path, 'wb') as f:
f.write(HEADER.format(**self.template_args()))
else:
self.warn('Cannot install calibre environment module to: '+libdir)
def setup_mount_helper(self):
def warn():
self.warn('Failed to compile mount helper. Auto mounting of',
@ -180,13 +192,20 @@ class Develop(Command):
functions[typ]):
self.write_template(name, mod, func)
def template_args(self):
return {
'path':self.libdir,
'resources':self.sharedir,
'executables':self.bindir,
'extensions':self.j(self.libdir, 'calibre', 'plugins')
}
def write_template(self, name, mod, func):
template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
script = template.format(
module=mod, func=func,
path=self.libdir, resources=self.sharedir,
executables=self.bindir,
extensions=self.j(self.libdir, 'calibre', 'plugins'))
args = self.template_args()
args['module'] = mod
args['func'] = func
script = template.format(**args)
path = self.j(self.staging_bindir, name)
if not os.path.exists(self.staging_bindir):
os.makedirs(self.staging_bindir)

View File

@ -7,6 +7,7 @@ import os
import glob
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract
class HTML2ZIP(FileTypePlugin):
name = 'HTML to ZIP'
@ -423,7 +424,7 @@ from calibre.devices.hanvon.driver import N516
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
from calibre.library.catalog import CSV_XML, EPUB_MOBI
plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
plugins += [
ComicInput,
EPUBInput,

View File

@ -111,7 +111,7 @@ class HTMLFile(object):
raise IOError(msg)
raise IgnoreFile(msg, err.errno)
self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
if not self.is_binary:
if encoding is None:
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import textwrap, os
from contextlib import closing
from calibre.customize import FileTypePlugin
class ArchiveExtract(FileTypePlugin):
name = 'Archive Extract'
author = 'Kovid Goyal'
description = textwrap.dedent(_('''\
Extract common e-book formats from archives (zip/rar) files.
'''))
file_types = set(['zip', 'rar'])
supported_platforms = ['windows', 'osx', 'linux']
on_import = True
def run(self, archive):
is_rar = archive.lower().endswith('.rar')
if is_rar:
from calibre.libunrar import extract_member, names
else:
from calibre.utils.zipfile import ZipFile
zf = ZipFile(archive, 'r')
if is_rar:
fnames = names(archive)
else:
fnames = zf.namelist()
fnames = [x for x in fnames if '.' in x]
if len(fnames) > 1 or not fnames:
return archive
fname = fnames[0]
ext = os.path.splitext(fname)[1][1:]
if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
'mp3'):
return archive
of = self.temporary_file('_archive_extract.'+ext)
with closing(of):
if is_rar:
data = extract_member(archive, match=None, name=fname)[1]
of.write(data)
else:
of.write(zf.read(fname))
return of.name

View File

@ -851,8 +851,10 @@ class Manifest(object):
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
nroot = etree.fromstring('<html><body/></html>')
parent = nroot[0]
for child in list(data):
child.getparent().remove(child)
for child in list(data.iter()):
oparent = child.getparent()
if oparent is not None:
oparent.remove(child)
parent.append(child)
data = nroot

View File

@ -120,7 +120,10 @@ class EbookIterator(object):
bad_map = {}
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
for csspath in css_files:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
try:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
except:
continue
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
block = match.group(1)
family = font_family_pat.search(block)

View File

@ -169,6 +169,8 @@ int main(int argc, char **argv) {
char *memblock;
ifstream::pos_type size;
int ret = 0;
map<string,string> info;
Reflow *reflow = NULL;
if (argc != 2) {
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
}
try {
Reflow reflow(memblock, size);
reflow.render();
vector<char> *data = reflow.render_first_page();
reflow = new Reflow(memblock, size);
info = reflow->get_info();
for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
cout << (*it).first << " : " << (*it).second << endl;
}
//reflow->render();
vector<char> *data = reflow->render_first_page();
ofstream file("cover.png", ios::binary);
file.write(&((*data)[0]), data->size());
delete data;
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
cerr << e.what() << endl;
ret = 1;
}
delete reflow;
delete[] memblock;
return ret;
}

View File

@ -115,6 +115,9 @@
<property name="decimals">
<number>1</number>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
</widget>
</item>
<item row="1" column="0">
@ -135,6 +138,9 @@
<property name="decimals">
<number>1</number>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
</widget>
</item>
<item row="2" column="0">
@ -155,6 +161,9 @@
<property name="decimals">
<number>1</number>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
</widget>
</item>
<item row="3" column="0">
@ -175,6 +184,9 @@
<property name="decimals">
<number>1</number>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
</widget>
</item>
</layout>

View File

@ -12,7 +12,8 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
QDialog.__init__(self, window)
Ui_ChooseFormatDialog.__init__(self)
self.setupUi(self)
self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept())
self.connect(self.formats, SIGNAL('activated(QModelIndex)'),
self.activated_slot)
self.msg.setText(msg)
for format in formats:
@ -20,6 +21,15 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
format.upper()))
self._formats = formats
self.formats.setCurrentRow(0)
self._format = None
def activated_slot(self, *args):
self.accept()
def format(self):
return self._formats[self.formats.currentRow()]
return self._format
def accept(self):
self._format = self._formats[self.formats.currentRow()]
return QDialog.accept(self)

View File

@ -481,9 +481,10 @@ class Line(QGraphicsItem):
painter.restore()
painter.save()
painter.setPen(QPen(Qt.NoPen))
for c in self.children():
painter.setBrush(c.brush)
painter.drawRect(c.boundingRect())
if hasattr(self, 'children'):
for c in self.children():
painter.setBrush(c.brush)
painter.drawRect(c.boundingRect())
painter.restore()
painter.save()
for tok in self.tokens:

View File

@ -337,7 +337,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
QObject.connect(self.view_menu.actions()[0],
SIGNAL("triggered(bool)"), self.view_book)
QObject.connect(self.view_menu.actions()[1],
SIGNAL("triggered(bool)"), self.view_specific_format)
SIGNAL("triggered(bool)"), self.view_specific_format,
Qt.QueuedConnection)
self.connect(self.action_open_containing_folder,
SIGNAL('triggered(bool)'), self.view_folder)
self.delete_menu.actions()[0].triggered.connect(self.delete_books)
@ -1642,12 +1643,9 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
row = rows[0].row()
formats = self.library_view.model().db.formats(row).upper().split(',')
d = ChooseFormatDialog(self, _('Choose the format to view'), formats)
d.exec_()
if d.result() == QDialog.Accepted:
if d.exec_() == QDialog.Accepted:
format = d.format()
self.view_format(row, format)
else:
return
def view_folder(self, *args):
rows = self.current_view().selectionModel().selectedRows()

View File

@ -219,3 +219,30 @@ is great for testing a little snippet of code on the command line. It works in t
can be used to execute your own python script. It works in the same way as passing the script to the python interpreter, except
that the calibre environment is fully initialized, so you can use all the calibre code in your script.
Using calibre in your projects
----------------------------------------
It is possible to directly use calibre functions/code in your python project. Two ways exist to do this:
Binary install of calibre
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If you have a binary install of calibre, you can use the python interpreter bundled with calibre, like this::
calibre-debug -e /path/to/your/python/script.py
Source install on linux
^^^^^^^^^^^^^^^^^^^^^^^^^^
In addition to using the above technique, if you do a source install on linux,
you can also directly import calibre, as follows::
import init_calibre
import calibre
print calibre.__version__
It is essential that you import the init_calibre module before any other calibre modules/packages as
it sets up the interpreter to run calibre code.

View File

@ -3,10 +3,14 @@ Read and write ZIP files. Modified by Kovid Goyal to support replacing files in
a zip archive.
"""
from __future__ import with_statement
from calibre.ptempfile import TemporaryDirectory
from calibre import sanitize_file_name
import struct, os, time, sys, shutil
import binascii, cStringIO
from contextlib import closing
from calibre.ptempfile import TemporaryDirectory
from calibre import sanitize_file_name
from calibre.constants import filesystem_encoding
from calibre.ebooks.chardet import detect
try:
import zlib # We may need its compression method
@ -132,6 +136,16 @@ _CD64_NUMBER_ENTRIES_TOTAL = 7
_CD64_DIRECTORY_SIZE = 8
_CD64_OFFSET_START_CENTDIR = 9
def decode_arcname(name):
if not isinstance(name, unicode):
encoding = detect(name)['encoding']
try:
name = name.decode(encoding)
except:
name = name.decode('utf-8', 'replace')
return name.encode(filesystem_encoding, 'replace')
def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number."""
try:
@ -222,7 +236,8 @@ def _EndRecData(fpin):
endrec = list(struct.unpack(structEndArchive, recData))
comment = data[start+sizeEndCentDir:]
# check that comment length is correct
if endrec[_ECD_COMMENT_SIZE] == len(comment):
# Kovid: Added == 0 check as some zip files apparently dont set this
if endrec[_ECD_COMMENT_SIZE] == 0 or endrec[_ECD_COMMENT_SIZE] == len(comment):
# Append the archive comment and start offset
endrec.append(comment)
endrec.append(maxCommentStart + start)
@ -675,6 +690,7 @@ class ZipFile:
self.debug = 0 # Level of printing: 0 through 3
self.NameToInfo = {} # Find file info given name
self.filelist = [] # List of ZipInfo instances for archive
self.extract_mapping = {}
self.compression = compression # Method of compression
self.mode = key = mode.replace('b', '')[0]
self.pwd = None
@ -1023,10 +1039,10 @@ class ZipFile:
targetpath = targetpath[:-1]
# don't include leading "/" from file name if present
if os.path.isabs(member.filename):
targetpath = os.path.join(targetpath, member.filename[1:])
else:
targetpath = os.path.join(targetpath, member.filename)
fname = decode_arcname(member.filename)
if fname.startswith('/'):
fname = fname[1:]
targetpath = os.path.join(targetpath, fname)
targetpath = os.path.normpath(targetpath)
@ -1037,17 +1053,16 @@ class ZipFile:
if upperdirs and not os.path.exists(upperdirs):
os.makedirs(upperdirs)
source = self.open(member, pwd=pwd)
if not os.path.exists(targetpath): # Could be a previously automatically created directory
try:
target = open(targetpath, "wb")
except IOError:
targetpath = sanitize_file_name(targetpath)
target = open(targetpath, "wb")
shutil.copyfileobj(source, target)
source.close()
target.close()
with closing(self.open(member, pwd=pwd)) as source:
try:
with open(targetpath, 'wb') as target:
shutil.copyfileobj(source, target)
except:
targetpath = sanitize_file_name(targetpath)
with open(targetpath, 'wb') as target:
shutil.copyfileobj(source, target)
self.extract_mapping[member.filename] = targetpath
return targetpath
def _writecheck(self, zinfo):
@ -1328,18 +1343,18 @@ def safe_replace(zipstream, name, datastream):
names = z.infolist()
with TemporaryDirectory('_zipfile_replace') as tdir:
z.extractall(path=tdir)
zipstream.seek(0)
zipstream.truncate()
z = ZipFile(zipstream, 'w')
mapping = z.extract_mapping
path = os.path.join(tdir, *name.split('/'))
shutil.copyfileobj(datastream, open(path, 'wb'))
for info in names:
current = os.path.join(tdir, *info.filename.split('/'))
if os.path.isdir(current):
z.writestr(info.filename+'/', '', 0700)
else:
z.write(current, info.filename, compress_type=info.compress_type)
z.close()
zipstream.seek(0)
zipstream.truncate()
with closing(ZipFile(zipstream, 'w')) as z:
for info in names:
current = mapping[info.filename]
if os.path.isdir(current):
z.writestr(info.filename+'/', '', 0700)
else:
z.write(current, info.filename, compress_type=info.compress_type)
class PyZipFile(ZipFile):
"""Class to create ZIP archives with Python library files and packages."""